diff --git a/9pfs-fix-crash-on-Treaddir-request.patch b/9pfs-fix-crash-on-Treaddir-request.patch new file mode 100644 index 0000000000000000000000000000000000000000..160943104c2fb976b5e81cdab5bb2202c74418a9 --- /dev/null +++ b/9pfs-fix-crash-on-Treaddir-request.patch @@ -0,0 +1,64 @@ +From 93e7987cb5a7b33c2d2e0a02b7f310955ca11851 Mon Sep 17 00:00:00 2001 +From: Christian Schoenebeck +Date: Tue, 5 Nov 2024 11:25:26 +0100 +Subject: [PATCH] 9pfs: fix crash on 'Treaddir' request + +A bad (broken or malicious) 9p client (guest) could cause QEMU host to +crash by sending a 9p 'Treaddir' request with a numeric file ID (FID) that +was previously opened for a file instead of an expected directory: + + #0 0x0000762aff8f4919 in __GI___rewinddir (dirp=0xf) at + ../sysdeps/unix/sysv/linux/rewinddir.c:29 + #1 0x0000557b7625fb40 in do_readdir_many (pdu=0x557bb67d2eb0, + fidp=0x557bb67955b0, entries=0x762afe9fff58, offset=0, maxsize=131072, + dostat=) at ../hw/9pfs/codir.c:101 + #2 v9fs_co_readdir_many (pdu=pdu@entry=0x557bb67d2eb0, + fidp=fidp@entry=0x557bb67955b0, entries=entries@entry=0x762afe9fff58, + offset=0, maxsize=131072, dostat=false) at ../hw/9pfs/codir.c:226 + #3 0x0000557b7625c1f9 in v9fs_do_readdir (pdu=0x557bb67d2eb0, + fidp=0x557bb67955b0, offset=, + max_count=) at ../hw/9pfs/9p.c:2488 + #4 v9fs_readdir (opaque=0x557bb67d2eb0) at ../hw/9pfs/9p.c:2602 + +That's because V9fsFidOpenState was declared as union type. So the +same memory region is used for either an open POSIX file handle (int), +or a POSIX DIR* pointer, etc., so 9p server incorrectly used the +previously opened (valid) POSIX file handle (0xf) as DIR* pointer, +eventually causing a crash in glibc's rewinddir() function. + +Root cause was therefore a missing check in 9p server's 'Treaddir' +request handler, which must ensure that the client supplied FID was +really opened as directory stream before trying to access the +aforementioned union and its DIR* member. + +Cc: qemu-stable@nongnu.org +Fixes: d62dbb51f7 ("virtio-9p: Add fidtype so that we can do type ...") +Reported-by: Akihiro Suda +Tested-by: Akihiro Suda +Signed-off-by: Christian Schoenebeck +Reviewed-by: Greg Kurz +Message-Id: +Signed-off-by: Zhongrui Tang +--- + hw/9pfs/9p.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c +index af636cfb2d..9a291d1b51 100644 +--- a/hw/9pfs/9p.c ++++ b/hw/9pfs/9p.c +@@ -2587,6 +2587,11 @@ static void coroutine_fn v9fs_readdir(void *opaque) + retval = -EINVAL; + goto out_nofid; + } ++ if (fidp->fid_type != P9_FID_DIR) { ++ warn_report_once("9p: bad client: T_readdir on non-directory stream"); ++ retval = -ENOTDIR; ++ goto out; ++ } + if (!fidp->fs.dir.stream) { + retval = -EINVAL; + goto out; +-- +2.41.0.windows.1 + diff --git a/9pfs-local-Fix-possible-memory-leak-in-local_link.patch b/9pfs-local-Fix-possible-memory-leak-in-local_link.patch deleted file mode 100644 index 56b7acb14b023fe3d9f94e22ece7c995a73c3fa6..0000000000000000000000000000000000000000 --- a/9pfs-local-Fix-possible-memory-leak-in-local_link.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 841b8d099c462cd4282c4ced8c2a6512899fd8d9 Mon Sep 17 00:00:00 2001 -From: Jiajun Chen -Date: Mon, 20 Jan 2020 15:11:39 +0100 -Subject: [PATCH] 9pfs: local: Fix possible memory leak in local_link() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -There is a possible memory leak while local_link return -1 without free -odirpath and oname. - -Reported-by: Euler Robot -Signed-off-by: Jaijun Chen -Signed-off-by: Xiang Zheng -Reviewed-by: Christian Schoenebeck -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Greg Kurz -(cherry picked from commit 841b8d099c462cd4282c4ced8c2a6512899fd8d9) ---- - hw/9pfs/9p-local.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c -index ca64139..d0592c3 100644 ---- a/hw/9pfs/9p-local.c -+++ b/hw/9pfs/9p-local.c -@@ -947,7 +947,7 @@ static int local_link(FsContext *ctx, V9fsPath *oldpath, - if (ctx->export_flags & V9FS_SM_MAPPED_FILE && - local_is_mapped_file_metadata(ctx, name)) { - errno = EINVAL; -- return -1; -+ goto out; - } - - odirfd = local_opendir_nofollow(ctx, odirpath); --- -1.8.3.1 - diff --git a/ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch b/ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch deleted file mode 100644 index 3012512988402756527c95341aa921a362c9e43e..0000000000000000000000000000000000000000 --- a/ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch +++ /dev/null @@ -1,64 +0,0 @@ -From e3a7ec839fa4f823666d726989c375dcf73348a4 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 15 Apr 2020 16:14:50 +0800 -Subject: [PATCH] ARM: KVM: Check KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 for smp_cpus > - 256 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Host kernel within [4.18, 5.3] report an erroneous KVM_MAX_VCPUS=512 -for ARM. The actual capability to instantiate more than 256 vcpus -was fixed in 5.4 with the upgrade of the KVM_IRQ_LINE ABI to support -vcpu id encoded on 12 bits instead of 8 and a redistributor consuming -a single KVM IO device instead of 2. - -So let's check this capability when attempting to use more than 256 -vcpus within any ARM kvm accelerated machine. - -Signed-off-by: Eric Auger -Reviewed-by: Richard Henderson -Reviewed-by: Andrew Jones -Acked-by: Marc Zyngier -Message-id: 20191003154640.22451-4-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry-picked from commit fff9f5558d0e0813d4f80bfe1602acf225eca4fd) -[yu: Use the legacy smp_cpus instead of ms->smp.cpus, as we don't have - ¦struct CpuTopology in MachineState at that time. See commit - ¦edeeec911702 for details.] -Signed-off-by: Zenghui Yu ---- - target/arm/kvm.c | 11 ++++++++++- - 1 file changed, 10 insertions(+), 1 deletion(-) - -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 50e86f8b..cc7a46df 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -173,6 +173,8 @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms) - - int kvm_arch_init(MachineState *ms, KVMState *s) - { -+ int ret = 0; -+ unsigned int smp_cpus = ms->smp.cpus; - /* For ARM interrupt delivery is always asynchronous, - * whether we are using an in-kernel VGIC or not. - */ -@@ -186,7 +188,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - - cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); - -- return 0; -+ if (smp_cpus > 256 && -+ !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { -+ error_report("Using more than 256 vcpus requires a host kernel " -+ "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); -+ ret = -EINVAL; -+ } -+ -+ return ret; - } - - unsigned long kvm_arch_vcpu_id(CPUState *cpu) --- -2.23.0 diff --git a/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch b/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch deleted file mode 100644 index 4681e9f33f6877f199768b7a06a65df5356e6a7f..0000000000000000000000000000000000000000 --- a/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 4646a24045cf53f2cc5e0ef1974da88ef50ef676 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 27 May 2020 11:54:31 +0800 -Subject: [PATCH] ARM64: record vtimer tick when cpu is stopped - -The vtimer kick still increases even if the vcpu is stopped when VM has -save/restore or suspend/resume operation. This will cause guest watchdog -soft-lockup if the VM has lots of memory in use. - -Signed-off-by: Hao Hong -Signed-off-by: Haibin Wang -Signed-off-by: Ying Fang ---- - cpus.c | 58 ++++++++++++++++++++++++++++++++++++++++++++ - target/arm/cpu.h | 2 ++ - target/arm/machine.c | 1 + - 3 files changed, 61 insertions(+) - -diff --git a/cpus.c b/cpus.c -index 927a00aa..b9aa51f8 100644 ---- a/cpus.c -+++ b/cpus.c -@@ -1066,6 +1066,28 @@ void cpu_synchronize_all_pre_loadvm(void) - } - } - -+#ifdef __aarch64__ -+static void get_vcpu_timer_tick(CPUState *cs) -+{ -+ CPUARMState *env = &ARM_CPU(cs)->env; -+ int err; -+ struct kvm_one_reg reg; -+ uint64_t timer_tick; -+ -+ reg.id = KVM_REG_ARM_TIMER_CNT; -+ reg.addr = (uintptr_t) &timer_tick; -+ -+ err = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); -+ if (err < 0) { -+ error_report("get vcpu tick failed, ret = %d", err); -+ env->vtimer = 0; -+ return; -+ } -+ env->vtimer = timer_tick; -+ return; -+} -+#endif -+ - static int do_vm_stop(RunState state, bool send_stop) - { - int ret = 0; -@@ -1073,6 +1095,11 @@ static int do_vm_stop(RunState state, bool send_stop) - if (runstate_is_running()) { - cpu_disable_ticks(); - pause_all_vcpus(); -+#ifdef __aarch64__ -+ if (first_cpu) { -+ get_vcpu_timer_tick(first_cpu); -+ } -+#endif - runstate_set(state); - vm_state_notify(0, state); - if (send_stop) { -@@ -1918,11 +1945,42 @@ void cpu_resume(CPUState *cpu) - qemu_cpu_kick(cpu); - } - -+#ifdef __aarch64__ -+static void set_vcpu_timer_tick(CPUState *cs) -+{ -+ CPUARMState *env = &ARM_CPU(cs)->env; -+ -+ if (env->vtimer == 0) { -+ return; -+ } -+ -+ int err; -+ struct kvm_one_reg reg; -+ uint64_t timer_tick = env->vtimer; -+ env->vtimer = 0; -+ -+ reg.id = KVM_REG_ARM_TIMER_CNT; -+ reg.addr = (uintptr_t) &timer_tick; -+ -+ err = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); -+ if (err < 0) { -+ error_report("Set vcpu tick failed, ret = %d", err); -+ return; -+ } -+ return; -+} -+#endif -+ - void resume_all_vcpus(void) - { - CPUState *cpu; - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); -+#ifdef __aarch64__ -+ if (first_cpu) { -+ set_vcpu_timer_tick(first_cpu); -+ } -+#endif - CPU_FOREACH(cpu) { - cpu_resume(cpu); - } -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 86eb79cd..aec6a214 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -262,6 +262,8 @@ typedef struct CPUARMState { - uint64_t sp_el[4]; /* AArch64 banked stack pointers */ - - -+ uint64_t vtimer; /* Timer tick when vcpu stop */ -+ - /* System control coprocessor (cp15) */ - struct { - uint32_t c0_cpuid; -diff --git a/target/arm/machine.c b/target/arm/machine.c -index ee3c59a6..ec28b839 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -814,6 +814,7 @@ const VMStateDescription vmstate_arm_cpu = { - VMSTATE_UINT32(env.exception.syndrome, ARMCPU), - VMSTATE_UINT32(env.exception.fsr, ARMCPU), - VMSTATE_UINT64(env.exception.vaddress, ARMCPU), -+ VMSTATE_UINT64(env.vtimer, ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), - { --- -2.23.0 - diff --git a/Add-if-condition-to-avoid-assertion-failed-error-in-.patch b/Add-if-condition-to-avoid-assertion-failed-error-in-.patch new file mode 100644 index 0000000000000000000000000000000000000000..cded962b641ec382509107b5e222a8beb9c7d1b1 --- /dev/null +++ b/Add-if-condition-to-avoid-assertion-failed-error-in-.patch @@ -0,0 +1,26 @@ +From b78860242162ab5ef1e73973eeca36e0261bfeb5 Mon Sep 17 00:00:00 2001 +From: xiaoyuliang +Date: Wed, 21 Aug 2024 11:26:41 +0800 +Subject: [PATCH] Add if condition to avoid assertion failed error in + blockdev_init + +--- + blockdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index bc7a947dea..d2fe5c361c 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -588,7 +588,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + + read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false); + +- if (!file || !*file) { ++ if ((!file || !*file) && qdict_size(bs_opts) == 2) { + cache = qdict_get_try_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH); + if (cache && !strcmp(cache, "on")) { + bdrv_flags |= BDRV_O_NO_FLUSH; +-- +2.41.0.windows.1 + diff --git a/Add-stub-function-for-tmm_get_kae_num-if-CONFIG_KVM-.patch b/Add-stub-function-for-tmm_get_kae_num-if-CONFIG_KVM-.patch new file mode 100644 index 0000000000000000000000000000000000000000..95eda39d4ad7f3a4a688127afaabf6fbcc34ba4e --- /dev/null +++ b/Add-stub-function-for-tmm_get_kae_num-if-CONFIG_KVM-.patch @@ -0,0 +1,30 @@ +From 69f44f27b30970cf19c0d5507a11852facace775 Mon Sep 17 00:00:00 2001 +From: panhengchang +Date: Tue, 24 Jun 2025 09:38:35 +0800 +Subject: [PATCH 2/2] Add stub function for 'tmm_get_kae_num' if 'CONFIG_KVM' + is not set. + +Signed-off-by: panghengchang +--- + target/arm/kvm_arm.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 7613728..a29d454 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -507,6 +507,11 @@ static inline void tmm_set_hpre_addr(hwaddr base, int num) + { + g_assert_not_reached(); + } ++ ++static inline int tmm_get_kae_num(void) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +-- +2.28.0.windows.1 + diff --git a/Add-support-for-the-virtcca-cvm-feature.patch b/Add-support-for-the-virtcca-cvm-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..79d0fca914372d9d3f7c6f2de3b833e09f41504f --- /dev/null +++ b/Add-support-for-the-virtcca-cvm-feature.patch @@ -0,0 +1,1047 @@ +From 5db954cb188d3775aec053fad8a39bf4c26a2b92 Mon Sep 17 00:00:00 2001 +From: liupingwei +Date: Fri, 2 Aug 2024 11:55:43 +0800 +Subject: [PATCH] Add support for the virtcca cvm feature. + +With this commit,we can append new startup parameters :"cma=64M +cvm_guest=1" and "kvm_type=cvm" to use virtcca cvm feature. +Here is a full example of the append parameters for a cvm : +-M virt,gic-version=3,accel=kvm,kernel_irqchip=on,kvm_type=cvm \ +-append "swiotlb=force console=tty0 console=ttyAMA0 kaslr.disabled=1 +cma=64M cvm_guest=1 rodata=off rootfstype=ext4 root=/dev/vad rw" \ + +Additionally,the SVE and PMU are optional configurations for cvm,here is +an example: +-object tmm-guest,id=tmm0,sve-vector-length=128,num-pmu-counters=1 + +Signed-off-by: liupingwei +--- + accel/kvm/kvm-all.c | 36 ++++ + hw/arm/boot.c | 49 +++++ + hw/arm/virt.c | 61 +++++- + hw/virtio/virtio-bus.c | 6 + + include/hw/arm/boot.h | 1 + + include/hw/arm/virt.h | 1 + + include/sysemu/kvm.h | 9 + + linux-headers/asm-arm64/kvm.h | 62 ++++++ + linux-headers/linux/kvm.h | 32 +++- + qapi/qom.json | 29 ++- + target/arm/kvm-tmm.c | 344 ++++++++++++++++++++++++++++++++++ + target/arm/kvm.c | 6 +- + target/arm/kvm64.c | 5 + + target/arm/kvm_arm.h | 16 ++ + target/arm/meson.build | 1 + + 15 files changed, 651 insertions(+), 7 deletions(-) + create mode 100644 target/arm/kvm-tmm.c + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ade7841ca3..dc3605e648 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -52,6 +52,8 @@ + #include "hw/boards.h" + #include "sysemu/stats.h" + ++#include "sysemu/kvm.h" ++ + /* This check must be after config-host.h is included */ + #ifdef CONFIG_EVENTFD + #include +@@ -86,6 +88,9 @@ struct KVMParkedVcpu { + }; + + KVMState *kvm_state; ++ ++bool virtcca_cvm_allowed = false; ++ + bool kvm_kernel_irqchip; + bool kvm_split_irqchip; + bool kvm_async_interrupts_allowed; +@@ -2355,6 +2360,11 @@ uint32_t kvm_dirty_ring_size(void) + return kvm_state->kvm_dirty_ring_size; + } + ++static inline bool kvm_is_virtcca_cvm_type(int type) ++{ ++ return type & VIRTCCA_CVM_TYPE; ++} ++ + static int kvm_init(MachineState *ms) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -2447,6 +2457,10 @@ static int kvm_init(MachineState *ms) + goto err; + } + ++ if (kvm_is_virtcca_cvm_type(type)) { ++ virtcca_cvm_allowed = true; ++ } ++ + do { + ret = kvm_ioctl(s, KVM_CREATE_VM, type); + } while (ret == -EINTR); +@@ -3503,6 +3517,28 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) + return r; + } + ++int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, ++ struct kvm_numa_info *numa_info) ++{ ++ KVMState *state = kvm_state; ++ struct kvm_user_data data; ++ int ret; ++ ++ data.loader_start = loader_start; ++ data.image_end = image_end; ++ data.initrd_start = initrd_start; ++ data.dtb_end = dtb_end; ++ data.ram_size = ram_size; ++ memcpy(&data.numa_info, numa_info, sizeof(struct kvm_numa_info)); ++ ++ ret = kvm_vm_ioctl(state, KVM_LOAD_USER_DATA, &data); ++ if (ret < 0) { ++ error_report("%s: KVM_LOAD_USER_DATA failed!\n", __func__); ++ } ++ ++ return ret; ++} ++ + static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, + hwaddr start_addr, hwaddr size) + { +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 345c7cfa19..42110b0f18 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -27,6 +27,7 @@ + #include "qemu/config-file.h" + #include "qemu/option.h" + #include "qemu/units.h" ++#include "kvm_arm.h" + + /* Kernel boot protocol is specified in the kernel docs + * Documentation/arm/Booting and Documentation/arm64/booting.txt +@@ -1142,6 +1143,16 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) { + ARM_CPU(cs)->env.boot_info = info; + } ++ ++ if (kvm_enabled() && virtcca_cvm_enabled()) { ++ if (info->dtb_limit == 0) { ++ info->dtb_limit = info->dtb_start + 0x200000; ++ } ++ kvm_load_user_data(info->loader_start, image_high_addr, info->initrd_start, ++ info->dtb_limit, info->ram_size, (struct kvm_numa_info *)info->numa_info); ++ tmm_add_ram_region(info->loader_start, image_high_addr - info->loader_start, ++ info->initrd_start, info->dtb_limit - info->initrd_start, true); ++ } + } + + static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info) +@@ -1235,6 +1246,39 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) + info->initrd_filename = ms->initrd_filename; + info->dtb_filename = ms->dtb; + info->dtb_limit = 0; ++ if (kvm_enabled() && virtcca_cvm_enabled()) { ++ info->ram_size = ms->ram_size; ++ info->numa_info = g_malloc(sizeof(struct kvm_numa_info)); ++ struct kvm_numa_info *numa_info = (struct kvm_numa_info *) info->numa_info; ++ if (ms->numa_state != NULL && ms->numa_state->num_nodes > 0) { ++ numa_info->numa_cnt = ms->numa_state->num_nodes; ++ uint64_t mem_base = info->loader_start; ++ for (int64_t i = 0; i < ms->numa_state->num_nodes && i < MAX_NUMA_NODE; i++) { ++ uint64_t mem_len = ms->numa_state->nodes[i].node_mem; ++ numa_info->numa_nodes[i].numa_id = i; ++ numa_info->numa_nodes[i].ipa_start = mem_base; ++ numa_info->numa_nodes[i].ipa_size = mem_len; ++ memcpy(numa_info->numa_nodes[i].host_numa_nodes, ms->numa_state->nodes[i].node_memdev->host_nodes, ++ MAX_NODES / BITS_PER_LONG * sizeof(uint64_t)); ++ mem_base += mem_len; ++ } ++ } else { ++ numa_info->numa_cnt = 1; ++ numa_info->numa_nodes[0].numa_id = 0; ++ numa_info->numa_nodes[0].ipa_start = info->loader_start; ++ numa_info->numa_nodes[0].ipa_size = info->ram_size; ++ memset(numa_info->numa_nodes[0].host_numa_nodes, 0, MAX_NODES / BITS_PER_LONG * sizeof(uint64_t)); ++ } ++ ++ for (int cpu_idx = ms->smp.cpus - 1; cpu_idx >= 0; cpu_idx--) { ++ ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu_idx)); ++ CPUState *local_cs = CPU(armcpu); ++ uint64_t node_id = 0; ++ if (ms->possible_cpus->cpus[local_cs->cpu_index].props.has_node_id) ++ node_id = ms->possible_cpus->cpus[local_cs->cpu_index].props.node_id; ++ bitmap_set((unsigned long *)numa_info->numa_nodes[node_id].cpu_id, cpu_idx, 1); ++ } ++ } + + /* Load the kernel. */ + if (!info->kernel_filename || info->firmware_loaded) { +@@ -1243,6 +1287,11 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) + arm_setup_direct_kernel_boot(cpu, info); + } + ++ if (kvm_enabled() && virtcca_cvm_enabled()) { ++ g_free(info->numa_info); ++ info->numa_info = NULL; ++ } ++ + /* + * Disable the PSCI conduit if it is set up to target the same + * or a lower EL than the one we're going to start the guest code in. +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a6e324c6f8..e73a795d3d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -285,8 +285,16 @@ static void create_fdt(VirtMachineState *vms) + + /* /chosen must exist for load_dtb to fill in necessary properties later */ + qemu_fdt_add_subnode(fdt, "/chosen"); ++ ++ g_autofree char *kvm_type = NULL; ++ if (object_property_find(OBJECT(current_machine), "kvm-type")) { ++ kvm_type = object_property_get_str(OBJECT(current_machine), ++ "kvm-type", &error_abort); ++ } + if (vms->dtb_randomness) { +- create_randomness(ms, "/chosen"); ++ if (!(kvm_type && !strcmp(kvm_type, "cvm"))) { ++ create_randomness(ms, "/chosen"); ++ } + } + + if (vms->secure) { +@@ -1953,6 +1961,19 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + vms->memmap[i] = base_memmap[i]; + } + ++ /* fix VIRT_MEM range */ ++ if (object_property_find(OBJECT(current_machine), "kvm-type")) { ++ g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine), ++ "kvm-type", &error_abort); ++ ++ if (!strcmp(kvm_type, "cvm")) { ++ vms->memmap[VIRT_MEM].base = 3 * GiB; ++ vms->memmap[VIRT_MEM].size = ms->ram_size; ++ info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base), ++ (unsigned long long)(vms->memmap[VIRT_MEM].base + ms->ram_size)); ++ } ++ } ++ + if (ms->ram_slots > ACPI_MAX_RAM_SLOTS) { + error_report("unsupported number of memory slots: %"PRIu64, + ms->ram_slots); +@@ -2440,7 +2461,7 @@ static void machvirt_init(MachineState *machine) + */ + if (vms->secure && firmware_loaded) { + vms->psci_conduit = QEMU_PSCI_CONDUIT_DISABLED; +- } else if (vms->virt) { ++ } else if (vms->virt || virtcca_cvm_enabled()) { + vms->psci_conduit = QEMU_PSCI_CONDUIT_SMC; + } else { + vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC; +@@ -2509,6 +2530,13 @@ static void machvirt_init(MachineState *machine) + } + } + ++ if (virtcca_cvm_enabled()) { ++ int ret = kvm_arm_tmm_init(machine->cgs, &error_fatal); ++ if (ret != 0) { ++ error_report("fail to initialize TMM"); ++ exit(1); ++ } ++ } + create_fdt(vms); + qemu_log("cpu init start\n"); + +@@ -3592,6 +3620,15 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + static int virt_kvm_type(MachineState *ms, const char *type_str) + { + VirtMachineState *vms = VIRT_MACHINE(ms); ++ int virtcca_cvm_type = 0; ++ if (object_property_find(OBJECT(current_machine), "kvm-type")) { ++ g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine), ++ "kvm-type", &error_abort); ++ ++ if (!strcmp(kvm_type, "cvm")) { ++ virtcca_cvm_type = VIRTCCA_CVM_TYPE; ++ } ++ } + int max_vm_pa_size, requested_pa_size; + bool fixed_ipa; + +@@ -3621,7 +3658,9 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + * the implicit legacy 40b IPA setting, in which case the kvm_type + * must be 0. + */ +- return fixed_ipa ? 0 : requested_pa_size; ++ return strcmp(type_str, "cvm") == 0 ? ++ ((fixed_ipa ? 0 : requested_pa_size) | virtcca_cvm_type) : ++ (fixed_ipa ? 0 : requested_pa_size); + } + + static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3793,6 +3832,19 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + + } + ++static char *virt_get_kvm_type(Object *obj, Error **errp G_GNUC_UNUSED) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ return g_strdup(vms->kvm_type); ++} ++ ++static void virt_set_kvm_type(Object *obj, const char *value, Error **errp G_GNUC_UNUSED) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ g_free(vms->kvm_type); ++ vms->kvm_type = g_strdup(value); ++} ++ + static void virt_instance_init(Object *obj) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -3853,6 +3905,9 @@ static void virt_instance_init(Object *obj) + + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ ++ object_property_add_str(obj, "kvm-type", virt_get_kvm_type, virt_set_kvm_type); ++ object_property_set_description(obj, "kvm-type", "CVM or Normal VM"); + } + + static const TypeInfo virt_machine_info = { +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index 896feb37a1..7e750d073d 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/kvm.h" + #include "qapi/error.h" + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio.h" +@@ -81,6 +82,11 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + vdev->dma_as = &address_space_memory; + if (has_iommu) { + vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ ++ if (virtcca_cvm_enabled() && (strcmp(vdev->name, "vhost-user-fs") == 0)) { ++ vdev_has_iommu = true; ++ } ++ + /* + * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and + * device operational. If the driver does not accept IOMMU_PLATFORM +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index f81326a1dc..4491b1f85b 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -39,6 +39,7 @@ void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, + /* arm_boot.c */ + struct arm_boot_info { + uint64_t ram_size; ++ void *numa_info; + const char *kernel_filename; + const char *kernel_cmdline; + const char *initrd_filename; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 7a734f07f7..27f5333772 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -182,6 +182,7 @@ struct VirtMachineState { + PCIBus *bus; + char *oem_id; + char *oem_table_id; ++ char *kvm_type; + NotifierList cpuhp_notifiers; + }; + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index cfa77cc15b..31af5f0e24 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -19,6 +19,7 @@ + #include "exec/memattrs.h" + #include "qemu/accel.h" + #include "qom/object.h" ++#include "linux-headers/linux/kvm.h" + + #ifdef NEED_CPU_H + # ifdef CONFIG_KVM +@@ -32,6 +33,7 @@ + #ifdef CONFIG_KVM_IS_POSSIBLE + + extern bool kvm_allowed; ++extern bool virtcca_cvm_allowed; + extern bool kvm_kernel_irqchip; + extern bool kvm_split_irqchip; + extern bool kvm_async_interrupts_allowed; +@@ -44,6 +46,8 @@ extern bool kvm_readonly_mem_allowed; + extern bool kvm_msi_use_devid; + + #define kvm_enabled() (kvm_allowed) ++#define virtcca_cvm_enabled() (virtcca_cvm_allowed) ++#define VIRTCCA_CVM_TYPE (1UL << 8) + /** + * kvm_irqchip_in_kernel: + * +@@ -146,6 +150,8 @@ extern bool kvm_msi_use_devid; + #else + + #define kvm_enabled() (0) ++#define virtcca_cvm_enabled() (0) ++#define VIRTCCA_CVM_TYPE (0) + #define kvm_irqchip_in_kernel() (false) + #define kvm_irqchip_is_split() (false) + #define kvm_async_interrupts_enabled() (false) +@@ -543,6 +549,9 @@ bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); + ++int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, ++ struct kvm_numa_info *numa_info); ++ + #ifdef __aarch64__ + int kvm_create_shadow_device(PCIDevice *dev); + int kvm_delete_shadow_device(PCIDevice *dev); +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index c59ea55cd8..2b040b5d60 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -110,6 +110,7 @@ struct kvm_regs { + #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ + #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ + #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ ++#define KVM_ARM_VCPU_TEC 8 /* VCPU TEC state as part of cvm */ + + struct kvm_vcpu_init { + __u32 target; +@@ -523,6 +524,67 @@ struct reg_mask_range { + __u32 reserved[13]; + }; + ++/* KVM_CAP_ARM_TMM on VM fd */ ++#define KVM_CAP_ARM_TMM_CONFIG_CVM 0 ++#define KVM_CAP_ARM_TMM_CREATE_RD 1 ++#define KVM_CAP_ARM_TMM_POPULATE_CVM 2 ++#define KVM_CAP_ARM_TMM_ACTIVATE_CVM 3 ++ ++#define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA256 0 ++#define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA512 1 ++ ++#define KVM_CAP_ARM_TMM_RPV_SIZE 64 ++ ++/* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ ++#define KVM_CAP_ARM_TMM_CFG_RPV 0 ++#define KVM_CAP_ARM_TMM_CFG_HASH_ALGO 1 ++#define KVM_CAP_ARM_TMM_CFG_SVE 2 ++#define KVM_CAP_ARM_TMM_CFG_DBG 3 ++#define KVM_CAP_ARM_TMM_CFG_PMU 4 ++ ++struct kvm_cap_arm_tmm_config_item { ++ __u32 cfg; ++ union { ++ /* cfg == KVM_CAP_ARM_TMM_CFG_RPV */ ++ struct { ++ __u8 rpv[KVM_CAP_ARM_TMM_RPV_SIZE]; ++ }; ++ ++ /* cfg == KVM_CAP_ARM_TMM_CFG_HASH_ALGO */ ++ struct { ++ __u32 hash_algo; ++ }; ++ ++ /* cfg == KVM_CAP_ARM_TMM_CFG_SVE */ ++ struct { ++ __u32 sve_vq; ++ }; ++ ++ /* cfg == KVM_CAP_ARM_TMM_CFG_DBG */ ++ struct { ++ __u32 num_brps; ++ __u32 num_wrps; ++ }; ++ ++ /* cfg == KVM_CAP_ARM_TMM_CFG_PMU */ ++ struct { ++ __u32 num_pmu_cntrs; ++ }; ++ /* Fix the size of the union */ ++ __u8 reserved[256]; ++ }; ++}; ++ ++#define KVM_ARM_TMM_POPULATE_FLAGS_MEASURE (1U << 0) ++struct kvm_cap_arm_tmm_populate_region_args { ++ __u64 populate_ipa_base1; ++ __u64 populate_ipa_size1; ++ __u64 populate_ipa_base2; ++ __u64 populate_ipa_size2; ++ __u32 flags; ++ __u32 reserved[3]; ++}; ++ + #endif + + #endif /* __ARM_KVM_H__ */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 56f6b2583f..8d12435e41 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -14,6 +14,8 @@ + #include + #include + ++#include "sysemu/numa.h" ++ + #define KVM_API_VERSION 12 + + /* *** Deprecated interfaces *** */ +@@ -1198,6 +1200,8 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 + #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + ++#define KVM_CAP_ARM_TMM 300 ++ + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + + #ifdef KVM_CAP_IRQ_ROUTING +@@ -1469,6 +1473,32 @@ struct kvm_vfio_spapr_tce { + __s32 tablefd; + }; + ++#define MAX_NUMA_NODE 8 ++#define MAX_CPU_BIT_MAP 4 ++#define MAX_NODE_BIT_MAP (MAX_NODES / BITS_PER_LONG) ++ ++struct kvm_numa_node { ++ __u64 numa_id; ++ __u64 ipa_start; ++ __u64 ipa_size; ++ __u64 host_numa_nodes[MAX_NODE_BIT_MAP]; ++ __u64 cpu_id[MAX_CPU_BIT_MAP]; ++}; ++ ++struct kvm_numa_info { ++ __u64 numa_cnt; ++ struct kvm_numa_node numa_nodes[MAX_NUMA_NODE]; ++}; ++ ++struct kvm_user_data { ++ __u64 loader_start; ++ __u64 image_end; ++ __u64 initrd_start; ++ __u64 dtb_end; ++ __u64 ram_size; ++ struct kvm_numa_info numa_info; ++}; ++ + /* + * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns + * a vcpu fd. +@@ -1481,7 +1511,7 @@ struct kvm_vfio_spapr_tce { + struct kvm_userspace_memory_region) + #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) + #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +- ++#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) + /* enable ucontrol for s390 */ + struct kvm_s390_ucas_mapping { + __u64 user_addr; +diff --git a/qapi/qom.json b/qapi/qom.json +index c53ef978ff..213edd8db2 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -899,6 +899,29 @@ + 'data': { '*cpu-affinity': ['uint16'], + '*node-affinity': ['uint16'] } } + ++## ++# @TmmGuestMeasurementAlgo: ++# ++# Algorithm to use for cvm measurements ++# ++# Since: FIXME ++## ++{ 'enum': 'TmmGuestMeasurementAlgo', ++'data': ['default', 'sha256', 'sha512'] } ++ ++## ++# @TmmGuestProperties: ++# ++# Properties for tmm-guest objects. ++# ++# @sve-vector-length: SVE vector length (default: 0, SVE disabled) ++# ++# Since: FIXME ++## ++{ 'struct': 'TmmGuestProperties', ++ 'data': { '*sve-vector-length': 'uint32', ++ '*num-pmu-counters': 'uint32', ++ '*measurement-algo': 'TmmGuestMeasurementAlgo' } } + + ## + # @ObjectType: +@@ -962,7 +985,8 @@ + 'tls-creds-x509', + 'tls-cipher-suites', + { 'name': 'x-remote-object', 'features': [ 'unstable' ] }, +- { 'name': 'x-vfio-user-server', 'features': [ 'unstable' ] } ++ { 'name': 'x-vfio-user-server', 'features': [ 'unstable' ] }, ++ 'tmm-guest' + ] } + + ## +@@ -1029,7 +1053,8 @@ + 'tls-creds-x509': 'TlsCredsX509Properties', + 'tls-cipher-suites': 'TlsCredsProperties', + 'x-remote-object': 'RemoteObjectProperties', +- 'x-vfio-user-server': 'VfioUserServerProperties' ++ 'x-vfio-user-server': 'VfioUserServerProperties', ++ 'tmm-guest': 'TmmGuestProperties' + } } + + ## +diff --git a/target/arm/kvm-tmm.c b/target/arm/kvm-tmm.c +new file mode 100644 +index 0000000000..efe2ca0006 +--- /dev/null ++++ b/target/arm/kvm-tmm.c +@@ -0,0 +1,344 @@ ++/* ++ * QEMU add virtcca cvm feature. ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "exec/confidential-guest-support.h" ++#include "hw/boards.h" ++#include "hw/core/cpu.h" ++#include "kvm_arm.h" ++#include "migration/blocker.h" ++#include "qapi/error.h" ++#include "qom/object_interfaces.h" ++#include "sysemu/kvm.h" ++#include "sysemu/runstate.h" ++#include "hw/loader.h" ++ ++#define TYPE_TMM_GUEST "tmm-guest" ++OBJECT_DECLARE_SIMPLE_TYPE(TmmGuest, TMM_GUEST) ++ ++#define TMM_PAGE_SIZE qemu_real_host_page_size() ++#define TMM_MAX_PMU_CTRS 0x20 ++#define TMM_MAX_CFG 5 ++ ++struct TmmGuest { ++ ConfidentialGuestSupport parent_obj; ++ GSList *ram_regions; ++ TmmGuestMeasurementAlgo measurement_algo; ++ uint32_t sve_vl; ++ uint32_t num_pmu_cntrs; ++}; ++ ++typedef struct { ++ hwaddr base1; ++ hwaddr len1; ++ hwaddr base2; ++ hwaddr len2; ++ bool populate; ++} TmmRamRegion; ++ ++static TmmGuest *tmm_guest; ++ ++bool kvm_arm_tmm_enabled(void) ++{ ++ return !!tmm_guest; ++} ++ ++static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp) ++{ ++ int ret = 1; ++ const char *cfg_str; ++ struct kvm_cap_arm_tmm_config_item args = { ++ .cfg = cfg, ++ }; ++ ++ switch (cfg) { ++ case KVM_CAP_ARM_TMM_CFG_RPV: ++ return 0; ++ case KVM_CAP_ARM_TMM_CFG_HASH_ALGO: ++ switch (guest->measurement_algo) { ++ case TMM_GUEST_MEASUREMENT_ALGO_DEFAULT: ++ return 0; ++ case TMM_GUEST_MEASUREMENT_ALGO_SHA256: ++ args.hash_algo = KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA256; ++ break; ++ case TMM_GUEST_MEASUREMENT_ALGO_SHA512: ++ args.hash_algo = KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA512; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ cfg_str = "hash algorithm"; ++ break; ++ case KVM_CAP_ARM_TMM_CFG_SVE: ++ if (!guest->sve_vl) { ++ return 0; ++ } ++ args.sve_vq = guest->sve_vl / 128; ++ cfg_str = "SVE"; ++ break; ++ case KVM_CAP_ARM_TMM_CFG_DBG: ++ return 0; ++ case KVM_CAP_ARM_TMM_CFG_PMU: ++ if (!guest->num_pmu_cntrs) { ++ return 0; ++ } ++ args.num_pmu_cntrs = guest->num_pmu_cntrs; ++ cfg_str = "PMU"; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ KVM_CAP_ARM_TMM_CONFIG_CVM, (intptr_t)&args); ++ if (ret) { ++ error_setg_errno(errp, -ret, "TMM: failed to configure %s", cfg_str); ++ } ++ ++ return ret; ++} ++ ++static gint tmm_compare_ram_regions(gconstpointer a, gconstpointer b) ++{ ++ const TmmRamRegion *ra = a; ++ const TmmRamRegion *rb = b; ++ ++ g_assert(ra->base1 != rb->base1); ++ return ra->base1 < rb->base1 ? -1 : 1; ++} ++ ++void tmm_add_ram_region(hwaddr base1, hwaddr len1, hwaddr base2, hwaddr len2, bool populate) ++{ ++ TmmRamRegion *region; ++ ++ region = g_new0(TmmRamRegion, 1); ++ region->base1 = QEMU_ALIGN_DOWN(base1, TMM_PAGE_SIZE); ++ region->len1 = QEMU_ALIGN_UP(len1, TMM_PAGE_SIZE); ++ region->base2 = QEMU_ALIGN_DOWN(base2, TMM_PAGE_SIZE); ++ region->len2 = QEMU_ALIGN_UP(len2, TMM_PAGE_SIZE); ++ region->populate = populate; ++ ++ tmm_guest->ram_regions = g_slist_insert_sorted(tmm_guest->ram_regions, ++ region, tmm_compare_ram_regions); ++} ++ ++static void tmm_populate_region(gpointer data, gpointer unused) ++{ ++ int ret; ++ const TmmRamRegion *region = data; ++ struct kvm_cap_arm_tmm_populate_region_args populate_args = { ++ .populate_ipa_base1 = region->base1, ++ .populate_ipa_size1 = region->len1, ++ .populate_ipa_base2 = region->base2, ++ .populate_ipa_size2 = region->len2, ++ .flags = KVM_ARM_TMM_POPULATE_FLAGS_MEASURE, ++ }; ++ ++ if (!region->populate) { ++ return; ++ } ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ KVM_CAP_ARM_TMM_POPULATE_CVM, ++ (intptr_t)&populate_args); ++ if (ret) { ++ error_report("TMM: failed to populate cvm region (0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx"): %s", ++ region->base1, region->len1, region->base2, region->len2, strerror(-ret)); ++ exit(1); ++ } ++} ++ ++static int tmm_create_rd(Error **errp) ++{ ++ int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ KVM_CAP_ARM_TMM_CREATE_RD); ++ if (ret) { ++ error_setg_errno(errp, -ret, "TMM: failed to create tmm Descriptor"); ++ } ++ return ret; ++} ++ ++static void tmm_vm_state_change(void *opaque, bool running, RunState state) ++{ ++ int ret; ++ CPUState *cs; ++ ++ if (!running) { ++ return; ++ } ++ ++ g_slist_foreach(tmm_guest->ram_regions, tmm_populate_region, NULL); ++ g_slist_free_full(g_steal_pointer(&tmm_guest->ram_regions), g_free); ++ ++ CPU_FOREACH(cs) { ++ ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_TEC); ++ if (ret) { ++ error_report("TMM: failed to finalize vCPU: %s", strerror(-ret)); ++ exit(1); ++ } ++ } ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ KVM_CAP_ARM_TMM_ACTIVATE_CVM); ++ if (ret) { ++ error_report("TMM: failed to activate cvm: %s", strerror(-ret)); ++ exit(1); ++ } ++} ++ ++int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ int ret; ++ int cfg; ++ ++ if (!tmm_guest) { ++ return -ENODEV; ++ } ++ ++ if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_TMM)) { ++ error_setg(errp, "KVM does not support TMM"); ++ return -ENODEV; ++ } ++ ++ for (cfg = 0; cfg < TMM_MAX_CFG; cfg++) { ++ ret = tmm_configure_one(tmm_guest, cfg, &error_abort); ++ if (ret) { ++ return ret; ++ } ++ } ++ ++ ret = tmm_create_rd(&error_abort); ++ if (ret) { ++ return ret; ++ } ++ ++ qemu_add_vm_change_state_handler(tmm_vm_state_change, NULL); ++ return 0; ++} ++ ++static void tmm_get_sve_vl(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ ++ visit_type_uint32(v, name, &guest->sve_vl, errp); ++} ++ ++static void tmm_set_sve_vl(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ uint32_t value; ++ ++ if (!visit_type_uint32(v, name, &value, errp)) { ++ return; ++ } ++ ++ if (value & 0x7f || value >= ARM_MAX_VQ * 128) { ++ error_setg(errp, "invalid SVE vector length"); ++ return; ++ } ++ ++ guest->sve_vl = value; ++} ++ ++static void tmm_get_num_pmu_cntrs(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ ++ visit_type_uint32(v, name, &guest->num_pmu_cntrs, errp); ++} ++ ++static void tmm_set_num_pmu_cntrs(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ uint32_t value; ++ ++ if (!visit_type_uint32(v, name, &value, errp)) { ++ return; ++ } ++ ++ if (value >= TMM_MAX_PMU_CTRS) { ++ error_setg(errp, "invalid number of PMU counters"); ++ return; ++ } ++ ++ guest->num_pmu_cntrs = value; ++} ++ ++static int tmm_get_measurement_algo(Object *obj, Error **errp G_GNUC_UNUSED) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ ++ return guest->measurement_algo; ++} ++ ++static void tmm_set_measurement_algo(Object *obj, int algo, Error **errp G_GNUC_UNUSED) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ ++ guest->measurement_algo = algo; ++} ++ ++static void tmm_guest_class_init(ObjectClass *oc, void *data) ++{ ++ object_class_property_add_enum(oc, "measurement-algo", ++ "TmmGuestMeasurementAlgo", ++ &TmmGuestMeasurementAlgo_lookup, ++ tmm_get_measurement_algo, ++ tmm_set_measurement_algo); ++ object_class_property_set_description(oc, "measurement-algo", ++ "cvm measurement algorithm ('sha256', 'sha512')"); ++ /* ++ * This is not ideal. Normally SVE parameters are given to -cpu, but the ++ * cvm parameters are needed much earlier than CPU initialization. We also ++ * don't have a way to discover what is supported at the moment, the idea is ++ * that the user knows exactly what hardware it is running on because these ++ * parameters are part of the measurement and play in the attestation. ++ */ ++ object_class_property_add(oc, "sve-vector-length", "uint32", tmm_get_sve_vl, ++ tmm_set_sve_vl, NULL, NULL); ++ object_class_property_set_description(oc, "sve-vector-length", ++ "SVE vector length. 0 disables SVE (the default)"); ++ object_class_property_add(oc, "num-pmu-counters", "uint32", ++ tmm_get_num_pmu_cntrs, tmm_set_num_pmu_cntrs, ++ NULL, NULL); ++ object_class_property_set_description(oc, "num-pmu-counters", ++ "Number of PMU counters"); ++} ++ ++static void tmm_guest_instance_init(Object *obj) ++{ ++ if (tmm_guest) { ++ error_report("a single instance of TmmGuest is supported"); ++ exit(1); ++ } ++ tmm_guest = TMM_GUEST(obj); ++} ++ ++static const TypeInfo tmm_guest_info = { ++ .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .name = TYPE_TMM_GUEST, ++ .instance_size = sizeof(struct TmmGuest), ++ .instance_init = tmm_guest_instance_init, ++ .class_init = tmm_guest_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static void tmm_register_types(void) ++{ ++ type_register_static(&tmm_guest_info); ++} ++type_init(tmm_register_types); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 1ceb72a1c1..ee5ba68305 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -613,6 +613,10 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) + continue; + } + ++ if (virtcca_cvm_enabled() && regidx == KVM_REG_ARM_TIMER_CNT) { ++ continue; ++ } ++ + switch (regidx & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + v32 = cpu->cpreg_values[i]; +@@ -1212,7 +1216,7 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) + + bool kvm_arch_cpu_check_are_resettable(void) + { +- return true; ++ return !virtcca_cvm_enabled(); + } + + static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 8f01d485b0..b099287ed0 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -584,6 +584,11 @@ static int kvm_arm_sve_set_vls(CPUState *cs) + + assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); + ++ if (virtcca_cvm_enabled()) { ++ /* Already set through tmm config */ ++ return 0; ++ } ++ + return kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_VLS, &vls[0]); + } + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index bf4df54c96..d6c7139f4a 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -388,6 +388,11 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa); + + int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + ++void tmm_add_ram_region(hwaddr base1, hwaddr len1, hwaddr base2, hwaddr len2, bool populate); ++ ++int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp); ++bool kvm_arm_tmm_enabled(void); ++ + /** + * kvm_arm_set_smccc_filter + * @func: funcion +@@ -475,6 +480,17 @@ static inline int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) + { + g_assert_not_reached(); + } ++ ++static inline int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp G_GNUC_UNUSED) ++{ ++ g_assert_not_reached(); ++} ++ ++static inline void tmm_add_ram_region(hwaddr base1, hwaddr len1, hwaddr base2, ++ hwaddr len2, bool populate) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +diff --git a/target/arm/meson.build b/target/arm/meson.build +index d1dd4932ed..389ee54658 100644 +--- a/target/arm/meson.build ++++ b/target/arm/meson.build +@@ -10,6 +10,7 @@ arm_ss.add(zlib) + + arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c'), if_false: files('kvm-stub.c')) + arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) ++arm_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c', 'kvm64.c', 'kvm-tmm.c'), if_false: files('kvm-stub.c')) + + arm_ss.add(when: 'TARGET_AARCH64', if_true: files( + 'cpu64.c', +-- +2.41.0.windows.1 + diff --git a/Add-virtCCA-Coda-annotation.patch b/Add-virtCCA-Coda-annotation.patch new file mode 100644 index 0000000000000000000000000000000000000000..607c81d89c5b02fc04641902efeb79054e850b0b --- /dev/null +++ b/Add-virtCCA-Coda-annotation.patch @@ -0,0 +1,39 @@ +From 0cf5a4c56d34542bcc2f646446bf54828a51a014 Mon Sep 17 00:00:00 2001 +From: yangxiangkai +Date: Tue, 12 Nov 2024 09:03:51 +0800 +Subject: [PATCH] Add virtCCA Coda annotation Adjust the position of the + security device Signed-off-by: yangxiangkai + +--- + hw/arm/virt.c | 1 + + linux-headers/linux/vfio.h | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e73a795d3d..a744393f6e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -162,6 +162,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, + [VIRT_CPUHP_ACPI] = { 0x090c0000, ACPI_CPU_HOTPLUG_REG_LEN}, ++ /* In the virtCCA scenario, this space is used for MSI interrupt mapping */ + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index c27a43d74b..5b1e2871af 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -225,7 +225,7 @@ struct vfio_device_info { + #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ + #define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ + #define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */ +-#define VFIO_DEVICE_FLAGS_SECURE (1 << 9) /* secure pci device */ ++#define VFIO_DEVICE_FLAGS_SECURE (1 << 15) /* secure pci device */ + __u32 num_regions; /* Max region index + 1 */ + __u32 num_irqs; /* Max IRQ index + 1 */ + __u32 cap_offset; /* Offset within info struct of first cap */ +-- +2.41.0.windows.1 + diff --git a/Added-CoDA-feature-support-in-the-context-of-CVM.-Wh.patch b/Added-CoDA-feature-support-in-the-context-of-CVM.-Wh.patch new file mode 100644 index 0000000000000000000000000000000000000000..57b712c803937b3b79b0ab3d8f25490216da4a3a --- /dev/null +++ b/Added-CoDA-feature-support-in-the-context-of-CVM.-Wh.patch @@ -0,0 +1,107 @@ +From 1f0c212191d0f63744ef61e0725ab4c859b1d189 Mon Sep 17 00:00:00 2001 +From: yangxiangkai +Date: Mon, 23 Sep 2024 19:23:37 +0800 +Subject: [PATCH] Added CoDA feature support in the context of CVM. When + virtcca cvm is enabled, the iommu is tagged as secure. + +--- + hw/vfio/container.c | 15 +++++++++++++++ + hw/virtio/virtio-bus.c | 7 +++++++ + linux-headers/linux/vfio.h | 2 ++ + 3 files changed, 24 insertions(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index d8b9117f4f..422235a221 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -33,6 +33,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "migration/migration.h" ++#include "sysemu/kvm.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -399,6 +400,14 @@ static int vfio_get_iommu_type(VFIOContainer *container, + VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU }; + int i; + ++ if (virtcca_cvm_enabled()) { ++ if (ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_S_IOMMU)) { ++ return VFIO_TYPE1v2_S_IOMMU; ++ } else { ++ return -errno; ++ } ++ } ++ + for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { + if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { + return iommu_types[i]; +@@ -625,6 +634,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: ++ case VFIO_TYPE1v2_S_IOMMU: + { + struct vfio_iommu_type1_info *info; + +@@ -857,6 +867,11 @@ static int vfio_get_device(VFIOGroup *group, const char *name, + return -1; + } + ++ if (!virtcca_cvm_enabled() && (info->flags & VFIO_DEVICE_FLAGS_SECURE)) { ++ error_setg(errp, "Normal vm cannot use confidential device."); ++ return -1; ++ } ++ + /* + * Set discarding of RAM as not broken for this group if the driver knows + * the device operates compatibly with discarding. Setting must be +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index 4f16e7ef77..749df6478e 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -30,6 +30,7 @@ + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio.h" + #include "exec/address-spaces.h" ++#include "sysemu/kvm.h" + + /* #define DEBUG_VIRTIO_BUS */ + +@@ -71,6 +72,12 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + ++ if (virtcca_cvm_enabled() && (strcmp(vdev->name, "vhost-user-fs") == 0)) { ++ /* VIRTIO_F_IOMMU_PLATFORM should be enabled for vhost-user-fs using swiotlb */ ++ error_setg(errp, "iommu_platform is not supported by this device"); ++ return; ++ } ++ + if (klass->device_plugged != NULL) { + klass->device_plugged(qbus->parent, &local_err); + } +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 956154e509..c27a43d74b 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -25,6 +25,7 @@ + #define VFIO_TYPE1_IOMMU 1 + #define VFIO_SPAPR_TCE_IOMMU 2 + #define VFIO_TYPE1v2_IOMMU 3 ++#define VFIO_TYPE1v2_S_IOMMU 12 + /* + * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping). This + * capability is subject to change as groups are added or removed. +@@ -224,6 +225,7 @@ struct vfio_device_info { + #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ + #define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ + #define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */ ++#define VFIO_DEVICE_FLAGS_SECURE (1 << 9) /* secure pci device */ + __u32 num_regions; /* Max region index + 1 */ + __u32 num_irqs; /* Max IRQ index + 1 */ + __u32 cap_offset; /* Offset within info struct of first cap */ +-- +2.41.0.windows.1 + diff --git a/Avoid-taking-address-of-out-of-bounds-array-index.patch b/Avoid-taking-address-of-out-of-bounds-array-index.patch new file mode 100644 index 0000000000000000000000000000000000000000..54b034ae62416394b610b1a23508c0912f98a2b8 --- /dev/null +++ b/Avoid-taking-address-of-out-of-bounds-array-index.patch @@ -0,0 +1,39 @@ +From 8ac5c38a54d407b363d6633eb01806b0e9aaa15e Mon Sep 17 00:00:00 2001 +From: yinxiuxiu +Date: Fri, 22 Nov 2024 14:45:09 +0800 +Subject: [PATCH] Avoid taking address of out-of-bounds array index + +Signed-off-by: yinxiuxiu +--- + hw/intc/openpic.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c +index 0f99b77a17..d74ec11af4 100644 +--- a/hw/intc/openpic.c ++++ b/hw/intc/openpic.c +@@ -1031,13 +1031,14 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, + s_IRQ = IRQ_get_next(opp, &dst->servicing); + /* Check queued interrupts. */ + n_IRQ = IRQ_get_next(opp, &dst->raised); +- src = &opp->src[n_IRQ]; +- if (n_IRQ != -1 && +- (s_IRQ == -1 || +- IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { +- DPRINTF("Raise OpenPIC INT output cpu %d irq %d", +- idx, n_IRQ); +- qemu_irq_raise(opp->dst[idx].irqs[OPENPIC_OUTPUT_INT]); ++ if (n_IRQ != -1) { ++ src = &opp->src[n_IRQ]; ++ if (s_IRQ == -1 || ++ IVPR_PRIORITY(src->ivpr) > dst->servicing.priority) { ++ DPRINTF("Raise OpenPIC INT output cpu %d irq %d", ++ idx, n_IRQ); ++ qemu_irq_raise(opp->dst[idx].irqs[OPENPIC_OUTPUT_INT]); ++ } + } + break; + default: +-- +2.41.0.windows.1 + diff --git a/Avoid-unaligned-fetch-in-ladr_match.patch b/Avoid-unaligned-fetch-in-ladr_match.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f553e9da8fc5115d5d57110147bccaf132e29ea --- /dev/null +++ b/Avoid-unaligned-fetch-in-ladr_match.patch @@ -0,0 +1,37 @@ +From d2ee29691b6d6b48ba8da179e97572f5a6684a9d Mon Sep 17 00:00:00 2001 +From: gubin +Date: Mon, 18 Nov 2024 14:47:25 +0800 +Subject: [PATCH] Avoid unaligned fetch in ladr_match() + +cherry-pick from 6a5287ce80470bb8df95901d73ee779a64e70c3a + +There is no guarantee that the PCNetState is allocated such that +csr[8] is allocated on an 8-byte boundary. Since not all hosts are +capable of unaligned fetches the 16-bit elements need to be fetched +individually to avoid a potential fault. Closes issue #2143 + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2143 +Signed-off-by: Nick Briggs +Reviewed-by: Peter Maydell +Signed-off-by: Jason Wang +Signed-off-by: gubin +--- + hw/net/pcnet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c +index a7e123e60d..7d574f487b 100644 +--- a/hw/net/pcnet.c ++++ b/hw/net/pcnet.c +@@ -632,7 +632,7 @@ static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size) + { + struct qemu_ether_header *hdr = (void *)buf; + if ((*(hdr->ether_dhost)&0x01) && +- ((uint64_t *)&s->csr[8])[0] != 0LL) { ++ (s->csr[8] | s->csr[9] | s->csr[10] | s->csr[11]) != 0) { + uint8_t ladr[8] = { + s->csr[8] & 0xff, s->csr[8] >> 8, + s->csr[9] & 0xff, s->csr[9] >> 8, +-- +2.41.0.windows.1 + diff --git a/BUGFIX-Enforce-isolation-for-virtcca_shared_hugepage.patch b/BUGFIX-Enforce-isolation-for-virtcca_shared_hugepage.patch new file mode 100644 index 0000000000000000000000000000000000000000..148b47869f436ccf3ade5d19c8a65086d9780215 --- /dev/null +++ b/BUGFIX-Enforce-isolation-for-virtcca_shared_hugepage.patch @@ -0,0 +1,43 @@ +From 458d90e226d5833661f9257f6af57c14f9b9bdfe Mon Sep 17 00:00:00 2001 +From: gongchangsui +Date: Mon, 17 Mar 2025 02:52:21 -0400 +Subject: [PATCH] BUGFIX: Enforce isolation for virtcca_shared_hugepage + +Add memory isolation enforcement when virtcca hugepage is disabled. + +Signed-off-by: gongchangsui +--- + hw/core/numa.c | 3 ++- + hw/virtio/vhost.c | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index e7c48dab61..c691578ef5 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -728,7 +728,8 @@ void numa_complete_configuration(MachineState *ms) + memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id, + ms->ram_size); + numa_init_memdev_container(ms, ms->ram); +- if (virtcca_cvm_enabled() && virtcca_shared_hugepage->ram_block) { ++ if (virtcca_cvm_enabled() && virtcca_shared_hugepage && ++ virtcca_shared_hugepage->ram_block) { + virtcca_shared_memory_configuration(ms); + } + } +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 8b95558013..4bf0b03977 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -1617,7 +1617,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + hdev->log_size = 0; + hdev->log_enabled = false; + hdev->started = false; +- if (virtcca_cvm_enabled()) { ++ if (virtcca_cvm_enabled() && virtcca_shared_hugepage && virtcca_shared_hugepage->ram_block) { + memory_listener_register(&hdev->memory_listener, + &address_space_virtcca_shared_memory); + } else { +-- +2.41.0.windows.1 + diff --git a/BinDir.tar.gz b/BinDir.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee03657d69f4a2b78d8ab5e6e0ba8998bef9e6a4 Binary files /dev/null and b/BinDir.tar.gz differ diff --git a/Bugfix-Correctly-set-vms-bootinfo.confidential-in-vi.patch b/Bugfix-Correctly-set-vms-bootinfo.confidential-in-vi.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a2af28ea1507c665b963d533124195309d28dde --- /dev/null +++ b/Bugfix-Correctly-set-vms-bootinfo.confidential-in-vi.patch @@ -0,0 +1,33 @@ +From 07e397a40e7f33ca980b29ba6c8b6de0c7419991 Mon Sep 17 00:00:00 2001 +From: yxk +Date: Wed, 20 Aug 2025 03:22:03 +0800 +Subject: [PATCH] Bugfix: Correctly set vms->bootinfo.confidential in virtCCA + senarios. + +Both CCA and virtCCA senarios can set vms->bootinfo.confidential in +hw/arm/virt.c. + +Signed-off-by: yxk +--- + hw/arm/virt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f12bc645d2..cf4156ed49 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2930,9 +2930,9 @@ static void machvirt_init(MachineState *machine) + vms->bootinfo.firmware_loaded = firmware_loaded; + vms->bootinfo.firmware_base = vms->memmap[VIRT_FLASH].base; + vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size; +- vms->bootinfo.confidential = virtcca_cvm_enabled(); + vms->bootinfo.psci_conduit = vms->psci_conduit; +- vms->bootinfo.confidential = virt_machine_is_confidential(vms); ++ vms->bootinfo.confidential = virt_machine_is_confidential(vms) || ++ virtcca_cvm_enabled(); + vms->bootinfo.skip_bootloader = vms->bootinfo.confidential; + arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); + +-- +2.33.0 + diff --git a/Bugfix-Fix-compile-error-in-aarch32.patch b/Bugfix-Fix-compile-error-in-aarch32.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d49dfd17f8e2fae13b16b9d8ce7bcfba47f8b18 --- /dev/null +++ b/Bugfix-Fix-compile-error-in-aarch32.patch @@ -0,0 +1,28 @@ +From 15bdaa95d03419ba00c34fe3249c8ab50faee9de Mon Sep 17 00:00:00 2001 +From: yxk +Date: Fri, 22 Aug 2025 02:34:27 +0800 +Subject: [PATCH] Bugfix: Fix compile error in aarch32. + +Add definition of virtcca_cvm_allowed in include/sysemu/kvm.h when +CONFIG_KVM_IS_POSSIBLE in disable. + +Signed-off-by: yxk +--- + include/sysemu/kvm.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 5f3f779de4..7602cd4429 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -180,6 +180,7 @@ extern bool kvm_csv3_allowed; + #define kvm_msi_devid_required() (false) + #define kvm_csv3_enabled() (false) + #define kvm_csv3_should_set_priv_mem() (false) ++extern bool virtcca_cvm_allowed; + + #endif /* CONFIG_KVM_IS_POSSIBLE */ + +-- +2.33.0 + diff --git a/Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch b/Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch deleted file mode 100644 index 8d492058d77ba70d51c88bffcbef7a1ae7b28adb..0000000000000000000000000000000000000000 --- a/Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 38734e26ce3840d459da13607a9d46de24a15388 Mon Sep 17 00:00:00 2001 -From: kevinZhu -Date: Thu, 29 Oct 2020 19:24:48 +0800 -Subject: [PATCH] Bugfix: hw/acpi: Use max_cpus instead of cpus when build PPTT - table - -The field "cpus" is the initial number of CPU for guest, and the field "max_cpus" -is the max number of CPU after CPU hotplug. When building PPTT for guest, we -should take all CPUs into account, otherwise the "smp_sockets" is wrong. - -Fixes: 7cfcd8c8a2fe ("build smt processor structure to support smt topology") -Signed-off-by: Keqian Zhu ---- - hw/acpi/aml-build.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 8a3b51c835..f01669df57 100644 ---- a/hw/acpi/aml-build.c -+++ b/hw/acpi/aml-build.c -@@ -167,7 +167,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) - struct offset_status offset; - const MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int smp_cores = ms->smp.cores; -- unsigned int smp_sockets = ms->smp.cpus / (smp_cores * ms->smp.threads); -+ unsigned int smp_sockets = ms->smp.max_cpus / (smp_cores * ms->smp.threads); - - acpi_data_push(table_data, sizeof(AcpiTableHeader)); - --- -2.27.0 - diff --git a/COLO-compare-Fix-incorrect-if-logic.patch b/COLO-compare-Fix-incorrect-if-logic.patch deleted file mode 100644 index 8deb1b31bb0f1f801ec552f6695503809f148acb..0000000000000000000000000000000000000000 --- a/COLO-compare-Fix-incorrect-if-logic.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 124032e79e354f5e7cc28958f2ca6b9f898da719 Mon Sep 17 00:00:00 2001 -From: Fan Yang -Date: Tue, 24 Sep 2019 22:08:29 +0800 -Subject: [PATCH] COLO-compare: Fix incorrect `if` logic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -'colo_mark_tcp_pkt' should return 'true' when packets are the same, and -'false' otherwise. However, it returns 'true' when -'colo_compare_packet_payload' returns non-zero while -'colo_compare_packet_payload' is just a 'memcmp'. The result is that -COLO-compare reports inconsistent TCP packets when they are actually -the same. - -Fixes: f449c9e549c ("colo: compare the packet based on the tcp sequence number") -Cc: qemu-stable@nongnu.org -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Fan Yang -Signed-off-by: Jason Wang -(cherry picked from commit 1e907a32b77e5d418538453df5945242e43224fa) -Signed-off-by: Michael Roth ---- - net/colo-compare.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/colo-compare.c b/net/colo-compare.c -index bf10526..9827c0e 100644 ---- a/net/colo-compare.c -+++ b/net/colo-compare.c -@@ -287,7 +287,7 @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt, - *mark = 0; - - if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) { -- if (colo_compare_packet_payload(ppkt, spkt, -+ if (!colo_compare_packet_payload(ppkt, spkt, - ppkt->header_size, spkt->header_size, - ppkt->payload_size)) { - *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY; -@@ -297,7 +297,7 @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt, - - /* one part of secondary packet payload still need to be compared */ - if (!after(ppkt->seq_end, spkt->seq_end)) { -- if (colo_compare_packet_payload(ppkt, spkt, -+ if (!colo_compare_packet_payload(ppkt, spkt, - ppkt->header_size + ppkt->offset, - spkt->header_size + spkt->offset, - ppkt->payload_size - ppkt->offset)) { -@@ -316,7 +316,7 @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt, - /* primary packet is longer than secondary packet, compare - * the same part and mark the primary packet offset - */ -- if (colo_compare_packet_payload(ppkt, spkt, -+ if (!colo_compare_packet_payload(ppkt, spkt, - ppkt->header_size + ppkt->offset, - spkt->header_size + spkt->offset, - spkt->payload_size - spkt->offset)) { --- -1.8.3.1 - diff --git a/Change-vmstate_cpuhp_sts-vmstateDescription-version_.patch b/Change-vmstate_cpuhp_sts-vmstateDescription-version_.patch new file mode 100644 index 0000000000000000000000000000000000000000..164bf17500220aad740ed0f1100e01d224b13947 --- /dev/null +++ b/Change-vmstate_cpuhp_sts-vmstateDescription-version_.patch @@ -0,0 +1,30 @@ +From 0fc0686798aba89c4d4d94f7e0c8e513cfc473b1 Mon Sep 17 00:00:00 2001 +From: lijunwei +Date: Fri, 22 Nov 2024 17:09:17 +0800 +Subject: [PATCH] Change vmstate_cpuhp_sts vmstateDescription version_id + + fix live migration failed error message: + "qemu-kvm: Missing section footer for 0000:00:01.3/piix4_pm" + change vmstate_cpuhp_sts vmstateDescription version_id + + Signed-off-by: lijunwei +--- + hw/acpi/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 292e1daca2..4ab27ac66e 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -316,7 +316,7 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + + static const VMStateDescription vmstate_cpuhp_sts = { + .name = "CPU hotplug device state", +- .version_id = 1, ++ .version_id = 2, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_inserting, AcpiCpuStatus), +-- +2.41.0.windows.1 + diff --git a/Consider-discard-option-when-writing-zeros.patch b/Consider-discard-option-when-writing-zeros.patch new file mode 100644 index 0000000000000000000000000000000000000000..7649a823f86c10def1ebb8fef72479f2dcd79cb2 --- /dev/null +++ b/Consider-discard-option-when-writing-zeros.patch @@ -0,0 +1,287 @@ +From 60b9463e35fe801e49db14539ccb8c9a6057e5c3 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 12 Oct 2024 14:12:17 +0800 +Subject: [PATCH] Consider discard option when writing zeros + +When opening an image with discard=off, we punch hole in the image when +writing zeroes, making the image sparse. This breaks users that want to +ensure that writes cannot fail with ENOSPACE by using fully allocated +images[1]. + +bdrv_co_pwrite_zeroes() correctly disables BDRV_REQ_MAY_UNMAP if we +opened the child without discard=unmap or discard=on. But we don't go +through this function when accessing the top node. Move the check down +to bdrv_co_do_pwrite_zeroes() which seems to be used in all code paths. + +This change implements the documented behavior, punching holes only when +opening the image with discard=on or discard=unmap. This may not be the +best default but can improve it later. + +The test depends on a file system supporting discard, deallocating the +entire file when punching hole with the length of the entire file. +Tested with xfs, ext4, and tmpfs. + +[1] https://lists.nongnu.org/archive/html/qemu-discuss/2024-06/msg00003.html + +Signed-off-by: Nir Soffer +Message-id: 20240628202058.1964986-3-nsoffer@redhat.com +Signed-off-by: Stefan Hajnoczi +Signed-off-by: dinglimin +--- + block/io.c | 9 +- + tests/qemu-iotests/tests/write-zeroes-unmap | 127 ++++++++++++++++++ + .../qemu-iotests/tests/write-zeroes-unmap.out | 81 +++++++++++ + 3 files changed, 213 insertions(+), 4 deletions(-) + create mode 100644 tests/qemu-iotests/tests/write-zeroes-unmap + create mode 100644 tests/qemu-iotests/tests/write-zeroes-unmap.out + +diff --git a/block/io.c b/block/io.c +index 7e62fabbf5..a280a5a4c9 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1885,6 +1885,11 @@ bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + return -EINVAL; + } + ++ /* If opened with discard=off we should never unmap. */ ++ if (!(bs->open_flags & BDRV_O_UNMAP)) { ++ flags &= ~BDRV_REQ_MAY_UNMAP; ++ } ++ + /* Invalidate the cached block-status data range if this write overlaps */ + bdrv_bsc_invalidate_range(bs, offset, bytes); + +@@ -2338,10 +2343,6 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, + trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags); + assert_bdrv_graph_readable(); + +- if (!(child->bs->open_flags & BDRV_O_UNMAP)) { +- flags &= ~BDRV_REQ_MAY_UNMAP; +- } +- + return bdrv_co_pwritev(child, offset, bytes, NULL, + BDRV_REQ_ZERO_WRITE | flags); + } +diff --git a/tests/qemu-iotests/tests/write-zeroes-unmap b/tests/qemu-iotests/tests/write-zeroes-unmap +new file mode 100644 +index 0000000000..7cfeeaf839 +--- /dev/null ++++ b/tests/qemu-iotests/tests/write-zeroes-unmap +@@ -0,0 +1,127 @@ ++#!/usr/bin/env bash ++# group: quick ++# ++# Test write zeros unmap. ++# ++# Copyright (C) Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++trap _cleanup_test_img exit ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto file ++_supported_os Linux ++ ++create_test_image() { ++ _make_test_img -f $IMGFMT 1m ++} ++ ++filter_command() { ++ _filter_testdir | _filter_qemu_io | _filter_qemu | _filter_hmp ++} ++ ++print_disk_usage() { ++ du -sh $TEST_IMG | _filter_testdir ++} ++ ++echo ++echo "=== defaults - write zeros ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -z 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT \ ++ | filter_command ++print_disk_usage ++ ++echo ++echo "=== defaults - write zeros unmap ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -zu 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT \ ++ | filter_command ++print_disk_usage ++ ++ ++echo ++echo "=== defaults - write actual zeros ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -P 0 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT \ ++ | filter_command ++print_disk_usage ++ ++echo ++echo "=== discard=off - write zeroes unmap ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -zu 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT,discard=off \ ++ | filter_command ++print_disk_usage ++ ++echo ++echo "=== detect-zeroes=on - write actual zeros ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -P 0 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT,detect-zeroes=on \ ++ | filter_command ++print_disk_usage ++ ++echo ++echo "=== detect-zeroes=on,discard=on - write actual zeros ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -P 0 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT,detect-zeroes=on,discard=on \ ++ | filter_command ++print_disk_usage ++ ++echo ++echo "=== discard=on - write zeroes ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -z 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT,discard=on \ ++ | filter_command ++print_disk_usage ++ ++echo ++echo "=== discard=on - write zeroes unmap ===" ++echo ++ ++create_test_image ++echo -e 'qemu-io none0 "write -zu 0 1m"\nquit' \ ++ | $QEMU -monitor stdio -drive if=none,file=$TEST_IMG,format=$IMGFMT,discard=on \ ++ | filter_command ++print_disk_usage +diff --git a/tests/qemu-iotests/tests/write-zeroes-unmap.out b/tests/qemu-iotests/tests/write-zeroes-unmap.out +new file mode 100644 +index 0000000000..c931994897 +--- /dev/null ++++ b/tests/qemu-iotests/tests/write-zeroes-unmap.out +@@ -0,0 +1,81 @@ ++QA output created by write-zeroes-unmap ++ ++=== defaults - write zeros === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -z 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++1.0M TEST_DIR/t.raw ++ ++=== defaults - write zeros unmap === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -zu 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++1.0M TEST_DIR/t.raw ++ ++=== defaults - write actual zeros === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -P 0 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++1.0M TEST_DIR/t.raw ++ ++=== discard=off - write zeroes unmap === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -zu 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++1.0M TEST_DIR/t.raw ++ ++=== detect-zeroes=on - write actual zeros === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -P 0 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++1.0M TEST_DIR/t.raw ++ ++=== detect-zeroes=on,discard=on - write actual zeros === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -P 0 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++1.0M TEST_DIR/t.raw ++ ++=== discard=on - write zeroes === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -z 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++1.0M TEST_DIR/t.raw ++ ++=== discard=on - write zeroes unmap === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++QEMU X.Y.Z monitor - type 'help' for more information ++(qemu) qemu-io none0 "write -zu 0 1m" ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++(qemu) quit ++0 TEST_DIR/t.raw +-- +2.41.0.windows.1 + diff --git a/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch b/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b662075670f5b73ff590d750f671bb41313c1cc --- /dev/null +++ b/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch @@ -0,0 +1,27 @@ +From 59f038d21c1901245ba0be417f6285cec465d6c1 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 11:24:32 +0800 +Subject: [PATCH] Currently, while kvm and qemu can not handle some kvm exit, + qemu will do vm_stop, which will make vm in pause state. This action make vm + unrecoverable, so send guest panic to libvirt instead. + +--- + accel/kvm/kvm-all.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index e39a810a4e..33f4c6d547 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2993,7 +2993,7 @@ int kvm_cpu_exec(CPUState *cpu) + + if (ret < 0) { + cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); +- vm_stop(RUN_STATE_INTERNAL_ERROR); ++ qemu_system_guest_panicked(cpu_get_crash_info(cpu)); + } + + qatomic_set(&cpu->exit_request, 0); +-- +2.27.0 + diff --git a/Drop-bogus-IPv6-messages.patch b/Drop-bogus-IPv6-messages.patch deleted file mode 100644 index 2fc1e0e780e34b1570fbcfcc4581138c79e7fa46..0000000000000000000000000000000000000000 --- a/Drop-bogus-IPv6-messages.patch +++ /dev/null @@ -1,30 +0,0 @@ -From e8b555c08061ad78920611a5e98ee14fcd967692 Mon Sep 17 00:00:00 2001 -From: Ralf Haferkamp -Date: Fri, 11 Sep 2020 10:55:49 +0800 -Subject: [PATCH] Drop bogus IPv6 messages - -Drop IPv6 message shorter than what's mentioned in the playload -length header (+the size of IPv6 header). They're invalid and could -lead to data leakage in icmp6_send_echoreply(). - -diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c -index d9d2b7e..c2dce52 100644 ---- a/slirp/src/ip6_input.c -+++ b/slirp/src/ip6_input.c -@@ -49,6 +49,13 @@ void ip6_input(struct mbuf *m) - goto bad; - } - -+ // Check if the message size is big enough to hold what's -+ // set in the payload length header. If not this is an invalid -+ // packet -+ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { -+ goto bad; -+ } -+ - /* check ip_ttl for a correct ICMP reply */ - if (ip6->ip_hl == 0) { - icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); --- -1.8.3.1 - diff --git a/Fix-calculation-of-minimum-in-colo_compare_tcp.patch b/Fix-calculation-of-minimum-in-colo_compare_tcp.patch new file mode 100644 index 0000000000000000000000000000000000000000..b0178f50575ba3a132e647332a531a40e95bcb46 --- /dev/null +++ b/Fix-calculation-of-minimum-in-colo_compare_tcp.patch @@ -0,0 +1,35 @@ +From f6ad72a5b215bc5b2d8df86cd537bf1c0f468108 Mon Sep 17 00:00:00 2001 +From: zhangchujun +Date: Wed, 30 Oct 2024 13:33:58 +0800 +Subject: [PATCH] Fix calculation of minimum in colo_compare_tcp + +GitHub's CodeQL reports a critical error which is fixed by using the MIN macro: + + Unsigned difference expression compared to zero + +Signed-off-by: Stefan Weil +Cc: qemu-stable@nongnu.org +Reviewed-by: Zhang Chen +Signed-off-by: Jason Wang +Signed-off-by: zhangchujun +--- + net/colo-compare.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/net/colo-compare.c b/net/colo-compare.c +index 7f9e6f89ce..d4e51cb306 100644 +--- a/net/colo-compare.c ++++ b/net/colo-compare.c +@@ -413,8 +413,7 @@ static void colo_compare_tcp(CompareState *s, Connection *conn) + * can ensure that the packet's payload is acknowledged by + * primary and secondary. + */ +- uint32_t min_ack = conn->pack - conn->sack > 0 ? +- conn->sack : conn->pack; ++ uint32_t min_ack = MIN(conn->pack, conn->sack); + + pri: + if (g_queue_is_empty(&conn->primary_list)) { +-- +2.41.0.windows.1 + diff --git a/Fix-error-in-virtCCA-CoDA-scenario.patch b/Fix-error-in-virtCCA-CoDA-scenario.patch new file mode 100644 index 0000000000000000000000000000000000000000..d9da628c551e6e53014eee87d1a534e22618fd51 --- /dev/null +++ b/Fix-error-in-virtCCA-CoDA-scenario.patch @@ -0,0 +1,28 @@ +From f80776f3dfd1d05ef3328d5be9fe42df095f4bc1 Mon Sep 17 00:00:00 2001 +From: yxk +Date: Mon, 21 Apr 2025 04:00:46 -0400 +Subject: [PATCH] Fix error in virtCCA CoDA scenario. + +Add 'iommu_type' VFIO_TYPE1v2_S_IOMMU in vfio_get_iommu_class +to avoid error happens in virtCCA CoDA scenario. + +Signed-off-by: yxk +--- + hw/vfio/container.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 64eacfd912..539cf34b20 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -439,6 +439,7 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) + switch (iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: ++ case VFIO_TYPE1v2_S_IOMMU: + klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); + break; + case VFIO_SPAPR_TCE_v2_IOMMU: +-- +2.33.0 + diff --git a/Fix-use-after-free-in-vfio_migration_probe.patch b/Fix-use-after-free-in-vfio_migration_probe.patch deleted file mode 100644 index f0a94e60054da414102dbda43f9d111c4bc2e6d9..0000000000000000000000000000000000000000 --- a/Fix-use-after-free-in-vfio_migration_probe.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 126fc13ebe9c5e58a5b1daeb4e102e6fa5845779 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Fri, 6 Nov 2020 23:32:24 +0530 -Subject: [PATCH] Fix use after free in vfio_migration_probe -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fixes Coverity issue: -CID 1436126: Memory - illegal accesses (USE_AFTER_FREE) - -Fixes: a9e271ec9b36 ("vfio: Add migration region initialization and finalize function") -Signed-off-by: Kirti Wankhede -Reviewed-by: David Edmondson -Reviewed-by: Alex Bennée -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Alex Williamson -Signed-off-by: Kunkun Jiang ---- - hw/vfio/migration.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 1a97784486..8546075706 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -903,8 +903,8 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) - goto add_blocker; - } - -- g_free(info); - trace_vfio_migration_probe(vbasedev->name, info->index); -+ g_free(info); - return 0; - - add_blocker: --- -2.27.0 - diff --git a/Fixed-integer-overflow-in-e1000e.patch b/Fixed-integer-overflow-in-e1000e.patch deleted file mode 100644 index 004390fc5a3d60d2aaf4912d0679c4fa471d28a2..0000000000000000000000000000000000000000 --- a/Fixed-integer-overflow-in-e1000e.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 41077af2c4283c15c0a822017ea51612d15b68f8 Mon Sep 17 00:00:00 2001 -From: Andrew Melnychenko -Date: Wed, 4 Mar 2020 16:20:58 +0200 -Subject: [PATCH 1/5] Fixed integer overflow in e1000e -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1737400 -Fixed setting max_queue_num if there are no peers in -NICConf. qemu_new_nic() creates NICState with 1 NetClientState(index -0) without peers, set max_queue_num to 0 - It prevents undefined -behavior and possible crashes, especially during pcie hotplug. - -Fixes: 6f3fbe4ed06 ("net: Introduce e1000e device emulation") -Signed-off-by: Andrew Melnychenko -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Dmitry Fleytman -Signed-off-by: Jason Wang -Signed-off-by: Zhenyu Ye ---- - hw/net/e1000e.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 581f7d03..1e827c4f 100644 ---- a/hw/net/e1000e.c -+++ b/hw/net/e1000e.c -@@ -325,7 +325,7 @@ e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr) - s->nic = qemu_new_nic(&net_e1000e_info, &s->conf, - object_get_typename(OBJECT(s)), dev->id, s); - -- s->core.max_queue_num = s->conf.peers.queues - 1; -+ s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0; - - trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); - memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); --- -2.22.0.windows.1 - diff --git a/HostIOMMUDevice-Introduce-realize_late-callback.patch b/HostIOMMUDevice-Introduce-realize_late-callback.patch new file mode 100644 index 0000000000000000000000000000000000000000..f60aa776270db27cf3efc78b48f0aecaba0ff30b --- /dev/null +++ b/HostIOMMUDevice-Introduce-realize_late-callback.patch @@ -0,0 +1,93 @@ +From 53a82c6a5a22bb41e9bd3f754479baf4ce0845bf Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 5 Aug 2024 09:29:00 +0800 +Subject: [PATCH] HostIOMMUDevice: Introduce realize_late callback + +Previously we have a realize() callback which is called before attachment. +But there are still some elements e.g., ioas not ready before attachment. +So we need a realize_late() callback to further initialize them. + +Currently, this callback is only useful for iommufd backend. For legacy +backend nothing needs to be initialized after attachment. + +Signed-off-by: Zhenzhong Duan +--- + hw/vfio/common.c | 18 +++++++++++++++--- + include/sysemu/host_iommu_device.h | 17 +++++++++++++++++ + 2 files changed, 32 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a8bc1c6055..0be63c5fbc 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1654,6 +1654,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + const VFIOIOMMUClass *ops = + VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + HostIOMMUDevice *hiod = NULL; ++ HostIOMMUDeviceClass *hiod_ops = NULL; + int ret; + + if (vbasedev->iommufd) { +@@ -1664,17 +1665,28 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + + if (!vbasedev->mdev) { + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); ++ hiod_ops = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + vbasedev->hiod = hiod; + } + + ret = ops->attach_device(name, vbasedev, as, errp); + if (ret) { +- object_unref(hiod); +- vbasedev->hiod = NULL; +- return ret; ++ goto err_attach; ++ } ++ ++ if (hiod_ops && hiod_ops->realize_late && ++ !hiod_ops->realize_late(hiod, vbasedev, errp)) { ++ ops->detach_device(vbasedev); ++ ret = -EINVAL; ++ goto err_attach; + } + + return 0; ++ ++err_attach: ++ object_unref(hiod); ++ vbasedev->hiod = NULL; ++ return ret; + } + + void vfio_detach_device(VFIODevice *vbasedev) +diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h +index e4d8300350..84131f5495 100644 +--- a/include/sysemu/host_iommu_device.h ++++ b/include/sysemu/host_iommu_device.h +@@ -64,6 +64,23 @@ struct HostIOMMUDeviceClass { + * Returns: true on success, false on failure. + */ + bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); ++ /** ++ * @realize_late: initialize host IOMMU device instance after attachment, ++ * some elements e.g., ioas are ready only after attachment. ++ * This callback initialize them. ++ * ++ * Optional callback. ++ * ++ * @hiod: pointer to a host IOMMU device instance. ++ * ++ * @opaque: pointer to agent device of this host IOMMU device, ++ * e.g., VFIO base device or VDPA device. ++ * ++ * @errp: pass an Error out when realize fails. ++ * ++ * Returns: true on success, false on failure. ++ */ ++ bool (*realize_late)(HostIOMMUDevice *hiod, void *opaque, Error **errp); + /** + * @get_cap: check if a host IOMMU device capability is supported. + * +-- +2.41.0.windows.1 + diff --git a/HostIOMMUDevice-Store-the-VFIO-VDPA-agent.patch b/HostIOMMUDevice-Store-the-VFIO-VDPA-agent.patch new file mode 100644 index 0000000000000000000000000000000000000000..34ee17e76c07991a3833957481776df0719a9ecb --- /dev/null +++ b/HostIOMMUDevice-Store-the-VFIO-VDPA-agent.patch @@ -0,0 +1,57 @@ +From 35f33bf18826286c9e9fc739a893b9915c71f43c Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 14 Jun 2024 11:52:51 +0200 +Subject: [PATCH] HostIOMMUDevice: Store the VFIO/VDPA agent + +Store the agent device (VFIO or VDPA) in the host IOMMU device. +This will allow easy access to some of its resources. + +Signed-off-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Reviewed-by: Michael S. Tsirkin +--- + hw/vfio/container.c | 1 + + hw/vfio/iommufd.c | 2 ++ + include/sysemu/host_iommu_device.h | 1 + + 3 files changed, 4 insertions(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 10f7635425..8a5a112b6b 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1259,6 +1259,7 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + + hiod->name = g_strdup(vdev->name); + hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); ++ hiod->agent = opaque; + + return true; + } +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 3b75cba26c..7a069ca576 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -735,6 +735,8 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + } data; + uint64_t hw_caps; + ++ hiod->agent = opaque; ++ + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, + &type, &data, sizeof(data), + &hw_caps, errp)) { +diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h +index a57873958b..3e5f058e7b 100644 +--- a/include/sysemu/host_iommu_device.h ++++ b/include/sysemu/host_iommu_device.h +@@ -34,6 +34,7 @@ struct HostIOMMUDevice { + Object parent_obj; + + char *name; ++ void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ + HostIOMMUDeviceCaps caps; + }; + +-- +2.41.0.windows.1 + diff --git a/KVM-track-whether-guest-state-is-encrypted.patch b/KVM-track-whether-guest-state-is-encrypted.patch new file mode 100644 index 0000000000000000000000000000000000000000..32a63355987ece920b7affd222f8625cb429a2b2 --- /dev/null +++ b/KVM-track-whether-guest-state-is-encrypted.patch @@ -0,0 +1,122 @@ +From 98c7d031289a52028656a64bd393a5b959209e19 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:10 -0400 +Subject: [PATCH] KVM: track whether guest state is encrypted +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://gitlab.com/qemu-project/qemu/-/commit/5c3131c392f84c660033d511ec39872d8beb4b1e + +So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the +guest state is encrypted, in which case they do nothing. For the new +API using VM types, instead, the ioctls will fail which is a safer and +more robust approach. + +The new API will be the only one available for SEV-SNP and TDX, but it +is also usable for SEV and SEV-ES. In preparation for that, require +architecture-specific KVM code to communicate the point at which guest +state is protected (which must be after kvm_cpu_synchronize_post_init(), +though that might change in the future in order to suppor migration). +From that point, skip reading registers so that cpu->vcpu_dirty is +never true: if it ever becomes true, kvm_arch_put_registers() will +fail miserably. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +Conflicts: + include/sysemu/kvm.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + accel/kvm/kvm-all.c | 17 ++++++++++++++--- + include/sysemu/kvm.h | 3 +++ + include/sysemu/kvm_int.h | 1 + + target/i386/sev.c | 1 + + 4 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 2cdd615025..50047b9b71 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2782,7 +2782,7 @@ bool kvm_cpu_check_are_resettable(void) + + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + int ret = kvm_arch_get_registers(cpu); + if (ret) { + error_report("Failed to get registers: %s", strerror(-ret)); +@@ -2796,7 +2796,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_state(CPUState *cpu) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); + } + } +@@ -2831,7 +2831,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_post_init(CPUState *cpu) + { +- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ if (!kvm_state->guest_state_protected) { ++ /* ++ * This runs before the machine_init_done notifiers, and is the last ++ * opportunity to synchronize the state of confidential guests. ++ */ ++ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ } + } + + static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) +@@ -4223,3 +4229,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) + query_stats_schema_vcpu(first_cpu, &stats_args); + } + } ++ ++void kvm_mark_guest_state_protected(void) ++{ ++ kvm_state->guest_state_protected = true; ++} +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 098257e72f..5f3f779de4 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -604,4 +604,7 @@ int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_star + int kvm_create_shadow_device(PCIDevice *dev); + int kvm_delete_shadow_device(PCIDevice *dev); + #endif ++ ++void kvm_mark_guest_state_protected(void); ++ + #endif +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index b2d2c59477..9a7bc1a4b8 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -87,6 +87,7 @@ struct KVMState + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; ++ bool guest_state_protected; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b4b42fd716..8c1f4d653e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -936,6 +936,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + if (ret) { + exit(1); + } ++ kvm_mark_guest_state_protected(); + } + + /* query the measurement blob length */ +-- +2.33.0 + diff --git a/Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch b/Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch new file mode 100644 index 0000000000000000000000000000000000000000..97726fce67283fe9b7b5c4d9f069592da0081130 --- /dev/null +++ b/Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch @@ -0,0 +1,38 @@ +From 08a4aa240587fed26c17271bf9af87f0a5997f4a Mon Sep 17 00:00:00 2001 +From: libai +Date: Wed, 26 Mar 2025 18:59:33 +0800 +Subject: [PATCH] Kconfig/iommufd/VDPA: Update IOMMUFD module configuration + dependencies The vDPA module can also use IOMMUFD like the VFIO module. + Therefore, adjust Kconfig to remove the dependency of IOMMUFD on VFIO and add + a reverse dependency on IOMMUFD for vDPA + +Signed-off-by: libai +--- + Kconfig.host | 1 + + backends/Kconfig | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Kconfig.host b/Kconfig.host +index f496475f8e..faf58d9af5 100644 +--- a/Kconfig.host ++++ b/Kconfig.host +@@ -28,6 +28,7 @@ config VHOST_USER + + config VHOST_VDPA + bool ++ select IOMMUFD + + config VHOST_KERNEL + bool +diff --git a/backends/Kconfig b/backends/Kconfig +index 2cb23f62fa..8d0be5a263 100644 +--- a/backends/Kconfig ++++ b/backends/Kconfig +@@ -2,4 +2,3 @@ source tpm/Kconfig + + config IOMMUFD + bool +- depends on VFIO +-- +2.41.0.windows.1 + diff --git a/On-the-Adaptation-of-CCA-and-virtCCA.patch b/On-the-Adaptation-of-CCA-and-virtCCA.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7102a97e9f7e946e3081a78803cba3101b57060 --- /dev/null +++ b/On-the-Adaptation-of-CCA-and-virtCCA.patch @@ -0,0 +1,134 @@ +From 7916c32580dd8e887466fe597ba64dc6e212685f Mon Sep 17 00:00:00 2001 +From: yxk +Date: Wed, 16 Jul 2025 18:47:39 +0800 +Subject: [PATCH] On the Adaptation of CCA and virtCCA. + +We modified virtCCA to use the same Macros as CCA, but did not +change the values of these Macros to keep it compact. + +Signed-off-by: yxk +--- + accel/kvm/kvm-all.c | 4 ---- + hw/arm/virt.c | 1 + + linux-headers/asm-arm64/kvm.h | 3 +-- + linux-headers/linux/kvm.h | 4 +--- + target/arm/kvm-tmm.c | 12 ++++++------ + 5 files changed, 9 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 50047b9b71..f472fc4f69 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2491,10 +2491,6 @@ static int kvm_init(MachineState *ms) + goto err; + } + +- if (kvm_is_virtcca_cvm_type(type)) { +- virtcca_cvm_allowed = true; +- } +- + do { + ret = kvm_ioctl(s, KVM_CREATE_VM, type); + } while (ret == -EINTR); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 52789a3782..f12bc645d2 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3876,6 +3876,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + + if (!strcmp(kvm_type, "cvm")) { + virtcca_cvm_type = VIRTCCA_CVM_TYPE; ++ virtcca_cvm_allowed = true; + } + } + int rme_vm_type = kvm_arm_rme_vm_type(ms), type; +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index aed56ef371..777b668851 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -110,9 +110,8 @@ struct kvm_regs { + #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ + #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ + #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +-#define KVM_ARM_VCPU_TEC 8 /* VCPU TEC state as part of cvm */ ++#define KVM_ARM_VCPU_REC 8 /* VCPU REC state as part of Realm */ + #define KVM_ARM_VCPU_HAS_EL2_E2H0 9 /* Limit NV support to E2H RES0 */ +-#define KVM_ARM_VCPU_REC 10 /* VCPU REC state as part of Realm */ + + struct kvm_vcpu_init { + __u32 target; +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index beb41f7433..96bc60475e 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1218,9 +1218,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 + #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +-#define KVM_CAP_ARM_RME 240 +- +-#define KVM_CAP_ARM_TMM 300 ++#define KVM_CAP_ARM_RME 300 + + #define KVM_CAP_SEV_ES_GHCB 500 + #define KVM_CAP_HYGON_COCO_EXT 501 +diff --git a/target/arm/kvm-tmm.c b/target/arm/kvm-tmm.c +index d18ac10896..d6dc8342c4 100644 +--- a/target/arm/kvm-tmm.c ++++ b/target/arm/kvm-tmm.c +@@ -118,7 +118,7 @@ static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp) + g_assert_not_reached(); + } + +- ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_TMM_CONFIG_CVM, (intptr_t)&args); + if (ret) { + error_setg_errno(errp, -ret, "TMM: failed to configure %s", cfg_str); +@@ -167,7 +167,7 @@ static void tmm_populate_region(gpointer data, gpointer unused) + return; + } + +- ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_TMM_POPULATE_CVM, + (intptr_t)&populate_args); + if (ret) { +@@ -179,7 +179,7 @@ static void tmm_populate_region(gpointer data, gpointer unused) + + static int tmm_create_rd(Error **errp) + { +- int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_TMM_CREATE_RD); + if (ret) { + error_setg_errno(errp, -ret, "TMM: failed to create tmm Descriptor"); +@@ -200,14 +200,14 @@ static void tmm_vm_state_change(void *opaque, bool running, RunState state) + g_slist_free_full(g_steal_pointer(&tmm_guest->ram_regions), g_free); + + CPU_FOREACH(cs) { +- ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_TEC); ++ ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC); + if (ret) { + error_report("TMM: failed to finalize vCPU: %s", strerror(-ret)); + exit(1); + } + } + +- ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_TMM_ACTIVATE_CVM); + if (ret) { + error_report("TMM: failed to activate cvm: %s", strerror(-ret)); +@@ -224,7 +224,7 @@ int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -ENODEV; + } + +- if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_TMM)) { ++ if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) { + error_setg(errp, "KVM does not support TMM"); + return -ENODEV; + } +-- +2.33.0 + diff --git a/Reserve-address-for-MSI-mapping-in-the-CVM-scenario.patch b/Reserve-address-for-MSI-mapping-in-the-CVM-scenario.patch new file mode 100644 index 0000000000000000000000000000000000000000..234f45902a7e2ce525cd58581a83331b44955270 --- /dev/null +++ b/Reserve-address-for-MSI-mapping-in-the-CVM-scenario.patch @@ -0,0 +1,41 @@ +From e698238a5fa6e78fdffc8269d59884df69da3434 Mon Sep 17 00:00:00 2001 +From: chenzheng +Date: Thu, 5 Dec 2024 11:06:57 +0000 +Subject: [PATCH] Reserve address for MSI mapping in the CVM scenario. + +Signed-off-by: yangxiangkai@huawei.com +--- + hw/arm/virt.c | 3 ++- + include/hw/arm/virt.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a9efcec85e..8823f2ed1c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -162,8 +162,9 @@ static const MemMapEntry base_memmap[] = { + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, + [VIRT_CPUHP_ACPI] = { 0x090c0000, ACPI_CPU_HOTPLUG_REG_LEN}, +- /* In the virtCCA scenario, this space is used for MSI interrupt mapping */ + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, ++ /* In the virtCCA scenario, this space is used for MSI interrupt mapping */ ++ [VIRT_CVM_MSI] = { 0x0a001000, 0x00fff000 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ + [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 4b7dc61c24..345b2d5594 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -121,6 +121,7 @@ enum { + VIRT_UART, + VIRT_CPUFREQ, + VIRT_MMIO, ++ VIRT_CVM_MSI, + VIRT_RTC, + VIRT_FW_CFG, + VIRT_PCIE, +-- +2.41.0.windows.1 + diff --git a/Revert-backends-iommufd-Make-iommufd_backend_-return.patch b/Revert-backends-iommufd-Make-iommufd_backend_-return.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d952d3ad3cd8560e830d9171977200500b5f7c8 --- /dev/null +++ b/Revert-backends-iommufd-Make-iommufd_backend_-return.patch @@ -0,0 +1,153 @@ +From 25c0fad8f9a2ac10f184d346f87da03506314ed6 Mon Sep 17 00:00:00 2001 +From: Zhou Wang +Date: Fri, 13 Jun 2025 11:26:54 +0800 +Subject: [PATCH] Revert "backends/iommufd: Make iommufd_backend_*() return + bool" + +Revert "backends/iommufd: Make iommufd_backend_*() return bool" and +fix the way of vdpa codes use related iommufd APIs. + +Signed-off-by: Zhou Wang +Signed-off-by: Jian Cai +--- + backends/iommufd.c | 29 ++++++++++++++++------------- + backends/trace-events | 4 ++-- + hw/virtio/vdpa-dev-iommufd.c | 6 +++--- + include/sysemu/iommufd.h | 6 +++--- + 4 files changed, 24 insertions(+), 21 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 62df6e41f0..4446efaa32 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -74,21 +74,23 @@ static void iommufd_backend_class_init(ObjectClass *oc, void *data) + object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); + } + +-bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + { +- int fd; ++ int fd, ret = 0; + + if (be->owned && !be->users) { + fd = qemu_open("/dev/iommu", O_RDWR, errp); + if (fd < 0) { +- return false; ++ ret = fd; ++ goto out; + } + be->fd = fd; + } + be->users++; +- +- trace_iommufd_backend_connect(be->fd, be->owned, be->users); +- return true; ++out: ++ trace_iommufd_backend_connect(be->fd, be->owned, ++ be->users, ret); ++ return ret; + } + + void iommufd_backend_disconnect(IOMMUFDBackend *be) +@@ -105,24 +107,25 @@ out: + trace_iommufd_backend_disconnect(be->fd, be->users); + } + +-bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +- Error **errp) ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp) + { +- int fd = be->fd; ++ int ret, fd = be->fd; + struct iommu_ioas_alloc alloc_data = { + .size = sizeof(alloc_data), + .flags = 0, + }; + +- if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) { ++ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); ++ if (ret) { + error_setg_errno(errp, errno, "Failed to allocate ioas"); +- return false; ++ return ret; + } + + *ioas_id = alloc_data.out_ioas_id; +- trace_iommufd_backend_alloc_ioas(fd, *ioas_id); ++ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); + +- return true; ++ return ret; + } + + void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) +diff --git a/backends/trace-events b/backends/trace-events +index 8fe77149b2..f8592a2711 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -7,13 +7,13 @@ dbus_vmstate_loading(const char *id) "id: %s" + dbus_vmstate_saving(const char *id) "id: %s" + + # iommufd.c +-iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d" ++iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" + iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" + iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" + iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" + iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" + iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" +-iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d" ++iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" + iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)" + iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" + iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" +diff --git a/hw/virtio/vdpa-dev-iommufd.c b/hw/virtio/vdpa-dev-iommufd.c +index 2b0498f9dc..f5718bae99 100644 +--- a/hw/virtio/vdpa-dev-iommufd.c ++++ b/hw/virtio/vdpa-dev-iommufd.c +@@ -186,12 +186,12 @@ static int vhost_vdpa_container_connect_iommufd(VDPAIOMMUFDContainer *container) + return -1; + } + +- if (!iommufd_backend_connect(iommufd, &err)) { ++ if (iommufd_backend_connect(iommufd, &err)) { + error_report_err(err); + return -1; + } + +- if (!iommufd_backend_alloc_ioas(iommufd, &ioas_id, &err)) { ++ if (iommufd_backend_alloc_ioas(iommufd, &ioas_id, &err)) { + error_report_err(err); + iommufd_backend_disconnect(iommufd); + return -1; +@@ -480,4 +480,4 @@ void vhost_vdpa_detach_container(VhostVdpaDevice *vdev) + vhost_vdpa_container_disconnect_iommufd(container); + + vhost_vdpa_destroy_container(container); +-} +\ No newline at end of file ++} +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 0531a4ad98..908c94d811 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -43,11 +43,11 @@ typedef struct IOMMUFDViommu { + uint32_t viommu_id; + } IOMMUFDViommu; + +-bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); + void iommufd_backend_disconnect(IOMMUFDBackend *be); + +-bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +- Error **errp); ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp); + void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); + int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +-- +2.33.0 + diff --git a/Revert-file-posix-Remove-unused-s-discard_zeroes.patch b/Revert-file-posix-Remove-unused-s-discard_zeroes.patch new file mode 100644 index 0000000000000000000000000000000000000000..bad52a205a9db5a1555133fd6e81a3c67841105f --- /dev/null +++ b/Revert-file-posix-Remove-unused-s-discard_zeroes.patch @@ -0,0 +1,53 @@ +From db37bc0d85e141a666dd287cdc562a47f29b4343 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Mon, 18 Mar 2024 10:01:28 +0800 +Subject: [PATCH] Revert "file-posix: Remove unused s->discard_zeroes" + +This reverts commit a7ca2eb488ff149c898f43abe103f8bd8e3ca3c4. +--- + block/file-posix.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index b862406c71..01ae5fd88c 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -158,6 +158,7 @@ typedef struct BDRVRawState { + + bool has_discard:1; + bool has_write_zeroes:1; ++ bool discard_zeroes:1; + bool use_linux_aio:1; + bool use_linux_io_uring:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ +@@ -765,6 +766,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + ret = -EINVAL; + goto fail; + } else { ++ s->discard_zeroes = true; + s->has_fallocate = true; + } + } else { +@@ -790,12 +792,19 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + #endif + + if (S_ISBLK(st.st_mode)) { ++#ifdef BLKDISCARDZEROES ++ unsigned int arg; ++ if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) { ++ s->discard_zeroes = true; ++ } ++#endif + #ifdef __linux__ + /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do + * not rely on the contents of discarded blocks unless using O_DIRECT. + * Same for BLKZEROOUT. + */ + if (!(bs->open_flags & BDRV_O_NOCACHE)) { ++ s->discard_zeroes = false; + s->has_write_zeroes = false; + } + #endif +-- +2.27.0 + diff --git a/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch b/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch deleted file mode 100644 index 200e0b2df02607b11e3117863afd00a346419e27..0000000000000000000000000000000000000000 --- a/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 73a5bf472921068e6db10e7e325b7ac46f111834 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 18:36:05 -0400 -Subject: [PATCH] Revert "ide/ahci: Check for -ECANCELED in aio callbacks" - -This reverts commit 0d910cfeaf2076b116b4517166d5deb0fea76394. - -It's not correct to just ignore an error code in a callback; we need to -handle that error and possible report failure to the guest so that they -don't wait indefinitely for an operation that will now never finish. - -This ought to help cases reported by Nutanix where iSCSI returns a -legitimate -ECANCELED for certain operations which should be propagated -normally. - -Reported-by: Shaju Abraham -Signed-off-by: John Snow -Message-id: 20190729223605.7163-1-jsnow@redhat.com -Signed-off-by: John Snow -(cherry picked from commit 8ec41c4265714255d5a138f8b538faf3583dcff6) -Signed-off-by: Michael Roth ---- - hw/ide/ahci.c | 3 --- - hw/ide/core.c | 14 -------------- - 2 files changed, 17 deletions(-) - -diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c -index 00ba422a48..6aaf66534a 100644 ---- a/hw/ide/ahci.c -+++ b/hw/ide/ahci.c -@@ -1023,9 +1023,6 @@ static void ncq_cb(void *opaque, int ret) - IDEState *ide_state = &ncq_tfs->drive->port.ifs[0]; - - ncq_tfs->aiocb = NULL; -- if (ret == -ECANCELED) { -- return; -- } - - if (ret < 0) { - bool is_read = ncq_tfs->cmd == READ_FPDMA_QUEUED; -diff --git a/hw/ide/core.c b/hw/ide/core.c -index 6afadf894f..8e1624f7ce 100644 ---- a/hw/ide/core.c -+++ b/hw/ide/core.c -@@ -722,9 +722,6 @@ static void ide_sector_read_cb(void *opaque, int ret) - s->pio_aiocb = NULL; - s->status &= ~BUSY_STAT; - -- if (ret == -ECANCELED) { -- return; -- } - if (ret != 0) { - if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO | - IDE_RETRY_READ)) { -@@ -840,10 +837,6 @@ static void ide_dma_cb(void *opaque, int ret) - uint64_t offset; - bool stay_active = false; - -- if (ret == -ECANCELED) { -- return; -- } -- - if (ret == -EINVAL) { - ide_dma_error(s); - return; -@@ -975,10 +968,6 @@ static void ide_sector_write_cb(void *opaque, int ret) - IDEState *s = opaque; - int n; - -- if (ret == -ECANCELED) { -- return; -- } -- - s->pio_aiocb = NULL; - s->status &= ~BUSY_STAT; - -@@ -1058,9 +1047,6 @@ static void ide_flush_cb(void *opaque, int ret) - - s->pio_aiocb = NULL; - -- if (ret == -ECANCELED) { -- return; -- } - if (ret < 0) { - /* XXX: What sector number to set here? */ - if (ide_handle_rw_error(s, -ret, IDE_RETRY_FLUSH)) { --- -2.23.0 diff --git a/Revert-linux-user-Print-tid-not-pid-with-strace.patch b/Revert-linux-user-Print-tid-not-pid-with-strace.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec949f7a8d54daf0d40dd772f312e5a42cb22e64 --- /dev/null +++ b/Revert-linux-user-Print-tid-not-pid-with-strace.patch @@ -0,0 +1,32 @@ +From c0717e82e34f96af456309b3786a6808e8e324e4 Mon Sep 17 00:00:00 2001 +From: huangyan +Date: Wed, 16 Apr 2025 00:43:27 +0800 +Subject: [PATCH] Revert "linux-user: Print tid not pid with strace" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This reverts commit 2f37362de1d971cc90c35405705bfa22a33f6cd8. + +* this change is incomplete, "get_task_state" lacks the implementation. +* Moreover, it requires all calls to the "getpid" function to be changed to use "get_task_state", it would cause too much disruption,and it has not been applied in the upstream 8.2.0. +--- + linux-user/strace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/linux-user/strace.c b/linux-user/strace.c +index ac9177ebe4..cf26e55264 100644 +--- a/linux-user/strace.c ++++ b/linux-user/strace.c +@@ -4176,7 +4176,7 @@ print_syscall(CPUArchState *cpu_env, int num, + if (!f) { + return; + } +- fprintf(f, "%d ", get_task_state(env_cpu(cpu_env))->ts_tid); ++ fprintf(f, "%d ", getpid()); + + for (i = 0; i < nsyscalls; i++) { + if (scnames[i].nr == num) { +-- +2.41.0.windows.1 + diff --git a/Revert-target-arm-Change-arm_cpu_mp_affinity-when-en.patch b/Revert-target-arm-Change-arm_cpu_mp_affinity-when-en.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e6929d6e06c84ef9f01cffb7d1ff879ea664711 --- /dev/null +++ b/Revert-target-arm-Change-arm_cpu_mp_affinity-when-en.patch @@ -0,0 +1,72 @@ +From 23a7d46a9bf6b0d692155eca9be0b7607db5d861 Mon Sep 17 00:00:00 2001 +From: Jinqian Yang +Date: Fri, 16 May 2025 18:20:10 +0800 +Subject: [PATCH] Revert "target/arm: Change arm_cpu_mp_affinity when enabled + IPIV feature" + +virt inclusion +category: feature +bugzilla: https://gitee.com/openeuler/qemu/issues/IC1EV7 + +------------------------------------------------------------------------ + +This reverts commit 33aa02dc05bed8316b1c64131e8269f404287598. +OpenEuler kernel OLK-6.6 add the SMCCC interface so that the guest OS can +control the enabling of IPIV. When IPIV is enabled, the guest OS uses multiple +unicast to implement multicast. So do not need to modify the MPIDR. + +Signed-off-by: Jinqian Yang +--- + linux-headers/linux/kvm.h | 2 -- + target/arm/cpu.c | 22 +++------------------- + 2 files changed, 3 insertions(+), 21 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index a19683f1e9..b711c04506 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1205,8 +1205,6 @@ struct kvm_ppc_resize_hpt { + + #define KVM_CAP_SEV_ES_GHCB 500 + #define KVM_CAP_HYGON_COCO_EXT 501 +- +-#define KVM_CAP_ARM_IPIV_MODE 503 + /* support userspace to request firmware to build CSV3 guest's memory space */ + #define KVM_CAP_HYGON_COCO_EXT_CSV3_SET_PRIV_MEM (1 << 0) + /* support request to update CSV3 guest's memory region multiple times */ +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index b0f70de018..09d391bd34 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1324,25 +1324,9 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) + + uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) + { +- uint64_t Aff0 = 0, Aff1 = 0, Aff2 = 0, Aff3 = 0; +- int mode; +- +- if (!kvm_enabled()) { +- Aff1 = idx / clustersz; +- Aff0 = idx % clustersz; +- return (Aff1 << ARM_AFF1_SHIFT) | Aff0; +- } +- +- mode = kvm_check_extension(kvm_state, KVM_CAP_ARM_IPIV_MODE); +- if (mode) { +- Aff1 = idx % 16; +- Aff2 = idx / 16; +- } else { +- Aff1 = idx / clustersz; +- Aff0 = idx % clustersz; +- } +- return (Aff3 << ARM_AFF3_SHIFT) | (Aff2 << ARM_AFF2_SHIFT) | +- (Aff1 << ARM_AFF1_SHIFT) | Aff0; ++ uint32_t Aff1 = idx / clustersz; ++ uint32_t Aff0 = idx % clustersz; ++ return (Aff1 << ARM_AFF1_SHIFT) | Aff0; + } + + static void arm_cpu_initfn(Object *obj) +-- +2.41.0.windows.1 + diff --git a/Revert-vdpa-add-vhost_vdpa-suspended-parameter.patch b/Revert-vdpa-add-vhost_vdpa-suspended-parameter.patch new file mode 100644 index 0000000000000000000000000000000000000000..789223bede4e2e945430898b2a0a45392058c1a7 --- /dev/null +++ b/Revert-vdpa-add-vhost_vdpa-suspended-parameter.patch @@ -0,0 +1,48 @@ +From e1f733fcbc4eb39333ad9527865c1590d74092ed Mon Sep 17 00:00:00 2001 +From: fangyi +Date: Tue, 29 Oct 2024 19:53:27 +0800 +Subject: [PATCH 4/6] Revert "vdpa: add vhost_vdpa->suspended parameter" + +Use a new scheme instead for kernel vdpa, So revert it. + +This reverts commit b6662cb7e5376659c7abb56efe27dcf3898d4fe6. +--- + hw/virtio/vhost-vdpa.c | 8 -------- + include/hw/virtio/vhost-vdpa.h | 2 -- + 2 files changed, 10 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 130afb06dc..bb3320946d 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1406,14 +1406,6 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, + return 0; + } + +- if (!v->suspended) { +- /* +- * Cannot trust in value returned by device, let vhost recover used +- * idx from guest. +- */ +- return -1; +- } +- + ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); + trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); + return ret; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 5407d54fd7..ee255bc1bd 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -42,8 +42,6 @@ typedef struct vhost_vdpa { + bool shadow_vqs_enabled; + /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ + bool shadow_data; +- /* Device suspended successfully */ +- bool suspended; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; + GPtrArray *shadow_vqs; +-- +2.43.0 + diff --git a/Revert-vdpa-add-vhost_vdpa_suspend.patch b/Revert-vdpa-add-vhost_vdpa_suspend.patch new file mode 100644 index 0000000000000000000000000000000000000000..1ec9f55740ca0c3f45a92bc16cbe5c248266e225 --- /dev/null +++ b/Revert-vdpa-add-vhost_vdpa_suspend.patch @@ -0,0 +1,84 @@ +From 05ee3017d156005e3d8d8fb19514d593858abd44 Mon Sep 17 00:00:00 2001 +From: fangyi +Date: Tue, 29 Oct 2024 19:51:41 +0800 +Subject: [PATCH 3/6] Revert "vdpa: add vhost_vdpa_suspend" + +Use a new scheme instead for kernel vdpa, So revert it. + +This reverts commit 0bb302a9960a186fc488068d268dc373e6b70876. +--- + hw/virtio/trace-events | 1 - + hw/virtio/vhost-vdpa.c | 26 -------------------------- + 2 files changed, 27 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 637cac4edf..de02bdc1d0 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -52,7 +52,6 @@ vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: % + vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" + vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 + vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 +-vhost_vdpa_suspend(void *dev) "dev: %p" + vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d" + vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p" + vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64 +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index d49826845f..130afb06dc 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -865,13 +865,11 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, + + static int vhost_vdpa_reset_device(struct vhost_dev *dev) + { +- struct vhost_vdpa *v = dev->opaque; + int ret; + uint8_t status = 0; + + ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); + trace_vhost_vdpa_reset_device(dev); +- v->suspended = false; + return ret; + } + +@@ -1274,29 +1272,6 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + } + } + +-static void vhost_vdpa_suspend(struct vhost_dev *dev) +-{ +- struct vhost_vdpa *v = dev->opaque; +- int r; +- +- if (!vhost_vdpa_first_dev(dev)) { +- return; +- } +- +- if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) { +- trace_vhost_vdpa_suspend(dev); +- r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); +- if (unlikely(r)) { +- error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); +- } else { +- v->suspended = true; +- return; +- } +- } +- +- vhost_vdpa_reset_device(dev); +-} +- + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + { + struct vhost_vdpa *v = dev->opaque; +@@ -1310,7 +1285,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + return -1; + } + } else { +- vhost_vdpa_suspend(dev); + vhost_vdpa_svqs_stop(dev); + vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); + } +-- +2.43.0 + diff --git a/Revert-vdpa-block-migration-if-SVQ-does-not-admit-a-.patch b/Revert-vdpa-block-migration-if-SVQ-does-not-admit-a-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f21d7ed922eb33cadd7e8ec1470471db53808e79 --- /dev/null +++ b/Revert-vdpa-block-migration-if-SVQ-does-not-admit-a-.patch @@ -0,0 +1,42 @@ +From 4a79b3c07dca4f1e21e4dbb1e59bf437b2a814fa Mon Sep 17 00:00:00 2001 +From: fangyi +Date: Tue, 29 Oct 2024 19:58:14 +0800 +Subject: [PATCH 5/6] Revert "vdpa: block migration if SVQ does not admit a + feature" + +Use a new scheme instead for kernel vdpa, So revert it. + +This reverts commit 57ac831865e370012496fb581a38d261cb72c5d0. +--- + hw/virtio/vhost-vdpa.c | 15 --------------- + 1 file changed, 15 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bb3320946d..69cf3b76e9 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -596,21 +596,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + return 0; + } + +- /* +- * If dev->shadow_vqs_enabled at initialization that means the device has +- * been started with x-svq=on, so don't block migration +- */ +- if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { +- /* We don't have dev->features yet */ +- uint64_t features; +- ret = vhost_vdpa_get_dev_features(dev, &features); +- if (unlikely(ret)) { +- error_setg_errno(errp, -ret, "Could not get device features"); +- return ret; +- } +- vhost_svq_valid_features(features, &dev->migration_blocker); +- } +- + /* + * Similar to VFIO, we end up pinning all guest memory and have to + * disable discarding of RAM. +-- +2.43.0 + diff --git a/Revert-vtimer-compat-cross-version-migration-from-v4.patch b/Revert-vtimer-compat-cross-version-migration-from-v4.patch deleted file mode 100644 index 082f1763f9b445ba3816e2051c47129a6c64528a..0000000000000000000000000000000000000000 --- a/Revert-vtimer-compat-cross-version-migration-from-v4.patch +++ /dev/null @@ -1,37 +0,0 @@ -From ced290d644a00e18e70046194d042bcaa2703b65 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 27 May 2020 11:16:53 +0800 -Subject: [PATCH] Revert: "vtimer: compat cross version migration from v4.0.1" - -This reverts commit patch: -vtimer-compat-cross-version-migration-from-v4.0.1.patch - -Signed-off-by: Ying Fang - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 2609113d..86eb79cd 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -261,7 +261,6 @@ typedef struct CPUARMState { - uint64_t elr_el[4]; /* AArch64 exception link regs */ - uint64_t sp_el[4]; /* AArch64 banked stack pointers */ - -- uint64_t vtimer; /* Timer tick when vcpu is stopped */ - - /* System control coprocessor (cp15) */ - struct { -diff --git a/target/arm/machine.c b/target/arm/machine.c -index ec28b839..ee3c59a6 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -814,7 +814,6 @@ const VMStateDescription vmstate_arm_cpu = { - VMSTATE_UINT32(env.exception.syndrome, ARMCPU), - VMSTATE_UINT32(env.exception.fsr, ARMCPU), - VMSTATE_UINT64(env.exception.vaddress, ARMCPU), -- VMSTATE_UINT64(env.vtimer, ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), - { --- -2.23.0 - diff --git a/Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch b/Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch deleted file mode 100644 index bc1fd44d163beb57f08c3b58918d305772b0c362..0000000000000000000000000000000000000000 --- a/Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 843f593280b93e03bb7b0d0001da7488d61f13f6 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Mon, 6 Apr 2020 08:55:17 +0800 -Subject: [PATCH] Typo: Correct the name of CPU hotplug memory region - -Replace "acpi-mem-hotplug" with "acpi-cpu-hotplug" - -Signed-off-by: Keqian Zhu ---- - hw/acpi/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c -index 7a90c8f82d..0c0bfe479a 100644 ---- a/hw/acpi/cpu.c -+++ b/hw/acpi/cpu.c -@@ -203,7 +203,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, - state->devs[i].arch_id = id_list->cpus[i].arch_id; - } - memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, -- "acpi-mem-hotplug", ACPI_CPU_HOTPLUG_REG_LEN); -+ "acpi-cpu-hotplug", ACPI_CPU_HOTPLUG_REG_LEN); - memory_region_add_subregion(as, base_addr, &state->ctrl_reg); - } - --- -2.19.1 diff --git a/Update-iommufd.h-header-for-vSVA.patch b/Update-iommufd.h-header-for-vSVA.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c75416a8f5d9cf35fe83818a48e171aca881a34 --- /dev/null +++ b/Update-iommufd.h-header-for-vSVA.patch @@ -0,0 +1,514 @@ +From ac715e361fdb6d92169b3b3f5964405c816a13ac Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Tue, 14 Jan 2025 10:29:24 +0000 +Subject: [PATCH] Update iommufd.h header for vSVA + +This is based on Linaro UADK branch: +https://github.com/Linaro/linux-kernel-uadk/tree/6.12-wip-10.26 + +Signed-off-by: Shameer Kolothum +--- + linux-headers/linux/iommufd.h | 394 ++++++++++++++++++++++++++++++++-- + 1 file changed, 371 insertions(+), 23 deletions(-) + +diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h +index 806d98d09c..41559c6064 100644 +--- a/linux-headers/linux/iommufd.h ++++ b/linux-headers/linux/iommufd.h +@@ -37,18 +37,22 @@ + enum { + IOMMUFD_CMD_BASE = 0x80, + IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, +- IOMMUFD_CMD_IOAS_ALLOC, +- IOMMUFD_CMD_IOAS_ALLOW_IOVAS, +- IOMMUFD_CMD_IOAS_COPY, +- IOMMUFD_CMD_IOAS_IOVA_RANGES, +- IOMMUFD_CMD_IOAS_MAP, +- IOMMUFD_CMD_IOAS_UNMAP, +- IOMMUFD_CMD_OPTION, +- IOMMUFD_CMD_VFIO_IOAS, +- IOMMUFD_CMD_HWPT_ALLOC, +- IOMMUFD_CMD_GET_HW_INFO, +- IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, +- IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, ++ IOMMUFD_CMD_IOAS_ALLOC = 0x81, ++ IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, ++ IOMMUFD_CMD_IOAS_COPY = 0x83, ++ IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, ++ IOMMUFD_CMD_IOAS_MAP = 0x85, ++ IOMMUFD_CMD_IOAS_UNMAP = 0x86, ++ IOMMUFD_CMD_OPTION = 0x87, ++ IOMMUFD_CMD_VFIO_IOAS = 0x88, ++ IOMMUFD_CMD_HWPT_ALLOC = 0x89, ++ IOMMUFD_CMD_GET_HW_INFO = 0x8a, ++ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, ++ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, ++ IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, ++ IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, ++ IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f, ++ IOMMUFD_CMD_VDEVICE_ALLOC = 0x90, + }; + + /** +@@ -355,10 +359,13 @@ struct iommu_vfio_ioas { + * the parent HWPT in a nesting configuration. + * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is + * enforced on device attachment ++ * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is ++ * valid. + */ + enum iommufd_hwpt_alloc_flags { + IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, ++ IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, + }; + + /** +@@ -389,14 +396,34 @@ struct iommu_hwpt_vtd_s1 { + __u32 __reserved; + }; + ++/** ++ * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 Context Descriptor Table info ++ * (IOMMU_HWPT_DATA_ARM_SMMUV3) ++ * ++ * @ste: The first two double words of the user space Stream Table Entry for ++ * a user stage-1 Context Descriptor Table. Must be little-endian. ++ * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) ++ * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax ++ * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD ++ * ++ * -EIO will be returned if @ste is not legal or contains any non-allowed field. ++ * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass ++ * nested domain will translate the same as the nesting parent. ++ */ ++struct iommu_hwpt_arm_smmuv3 { ++ __aligned_le64 ste[2]; ++}; ++ + /** + * enum iommu_hwpt_data_type - IOMMU HWPT Data Type + * @IOMMU_HWPT_DATA_NONE: no data + * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table ++ * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table + */ + enum iommu_hwpt_data_type { +- IOMMU_HWPT_DATA_NONE, +- IOMMU_HWPT_DATA_VTD_S1, ++ IOMMU_HWPT_DATA_NONE = 0, ++ IOMMU_HWPT_DATA_VTD_S1 = 1, ++ IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, + }; + + /** +@@ -404,12 +431,15 @@ enum iommu_hwpt_data_type { + * @size: sizeof(struct iommu_hwpt_alloc) + * @flags: Combination of enum iommufd_hwpt_alloc_flags + * @dev_id: The device to allocate this HWPT for +- * @pt_id: The IOAS or HWPT to connect this HWPT to ++ * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to + * @out_hwpt_id: The ID of the new HWPT + * @__reserved: Must be 0 + * @data_type: One of enum iommu_hwpt_data_type + * @data_len: Length of the type specific data + * @data_uptr: User pointer to the type specific data ++ * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of ++ * IOMMU_HWPT_FAULT_ID_VALID is set. ++ * @__reserved2: Padding to 64-bit alignment. Must be 0. + * + * Explicitly allocate a hardware page table object. This is the same object + * type that is returned by iommufd_device_attach() and represents the +@@ -420,11 +450,13 @@ enum iommu_hwpt_data_type { + * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a + * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. + * +- * A user-managed nested HWPT will be created from a given parent HWPT via +- * @pt_id, in which the parent HWPT must be allocated previously via the +- * same ioctl from a given IOAS (@pt_id). In this case, the @data_type +- * must be set to a pre-defined type corresponding to an I/O page table +- * type supported by the underlying IOMMU hardware. ++ * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a ++ * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be ++ * allocated previously via the same ioctl from a given IOAS (@pt_id). In this ++ * case, the @data_type must be set to a pre-defined type corresponding to an ++ * I/O page table type supported by the underlying IOMMU hardware. The device ++ * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU ++ * instance. + * + * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and + * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr +@@ -440,6 +472,8 @@ struct iommu_hwpt_alloc { + __u32 data_type; + __u32 data_len; + __aligned_u64 data_uptr; ++ __u32 fault_id; ++ __u32 __reserved2; + }; + #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) + +@@ -474,15 +508,50 @@ struct iommu_hw_info_vtd { + __aligned_u64 ecap_reg; + }; + ++/** ++ * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information ++ * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3) ++ * ++ * @flags: Must be set to 0 ++ * @__reserved: Must be 0 ++ * @idr: Implemented features for ARM SMMU Non-secure programming interface ++ * @iidr: Information about the implementation and implementer of ARM SMMU, ++ * and architecture version supported ++ * @aidr: ARM SMMU architecture version ++ * ++ * For the details of @idr, @iidr and @aidr, please refer to the chapters ++ * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. ++ * ++ * User space should read the underlying ARM SMMUv3 hardware information for ++ * the list of supported features. ++ * ++ * Note that these values reflect the raw HW capability, without any insight if ++ * any required kernel driver support is present. Bits may be set indicating the ++ * HW has functionality that is lacking kernel software support, such as BTM. If ++ * a VMM is using this information to construct emulated copies of these ++ * registers it should only forward bits that it knows it can support. ++ * ++ * In future, presence of required kernel support will be indicated in flags. ++ */ ++struct iommu_hw_info_arm_smmuv3 { ++ __u32 flags; ++ __u32 __reserved; ++ __u32 idr[6]; ++ __u32 iidr; ++ __u32 aidr; ++}; ++ + /** + * enum iommu_hw_info_type - IOMMU Hardware Info Types + * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware + * info + * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type ++ * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type + */ + enum iommu_hw_info_type { +- IOMMU_HW_INFO_TYPE_NONE, +- IOMMU_HW_INFO_TYPE_INTEL_VTD, ++ IOMMU_HW_INFO_TYPE_NONE = 0, ++ IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, ++ IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, + }; + + /** +@@ -494,9 +563,17 @@ enum iommu_hw_info_type { + * IOMMU_HWPT_GET_DIRTY_BITMAP + * IOMMU_HWPT_SET_DIRTY_TRACKING + * ++ * @IOMMU_HW_CAP_PASID_EXEC: Execute Permission Supported, user ignores it ++ * when the struct iommu_hw_info::out_max_pasid_log2 ++ * is zero. ++ * @IOMMU_HW_CAP_PASID_PRIV: Privileged Mode Supported, user ignores it ++ * when the struct iommu_hw_info::out_max_pasid_log2 ++ * is zero. + */ + enum iommufd_hw_capabilities { + IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, ++ IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, ++ IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, + }; + + /** +@@ -512,6 +589,9 @@ enum iommufd_hw_capabilities { + * iommu_hw_info_type. + * @out_capabilities: Output the generic iommu capability info type as defined + * in the enum iommu_hw_capabilities. ++ * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. ++ * PCI devices turn to out_capabilities to check if the ++ * specific capabilities is supported or not. + * @__reserved: Must be 0 + * + * Query an iommu type specific hardware information data from an iommu behind +@@ -535,7 +615,8 @@ struct iommu_hw_info { + __u32 data_len; + __aligned_u64 data_uptr; + __u32 out_data_type; +- __u32 __reserved; ++ __u8 out_max_pasid_log2; ++ __u8 __reserved[3]; + __aligned_u64 out_capabilities; + }; + #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) +@@ -613,4 +694,271 @@ struct iommu_hwpt_get_dirty_bitmap { + #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) + ++/** ++ * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation ++ * Data Type ++ * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 ++ * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3 ++ */ ++enum iommu_hwpt_invalidate_data_type { ++ IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, ++ IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1, ++}; ++ ++/** ++ * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d ++ * stage-1 cache invalidation ++ * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies ++ * to all-levels page structure cache or just ++ * the leaf PTE cache. ++ */ ++enum iommu_hwpt_vtd_s1_invalidate_flags { ++ IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0, ++}; ++ ++/** ++ * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation ++ * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) ++ * @addr: The start address of the range to be invalidated. It needs to ++ * be 4KB aligned. ++ * @npages: Number of contiguous 4K pages to be invalidated. ++ * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags ++ * @__reserved: Must be 0 ++ * ++ * The Intel VT-d specific invalidation data for user-managed stage-1 cache ++ * invalidation in nested translation. Userspace uses this structure to ++ * tell the impacted cache scope after modifying the stage-1 page table. ++ * ++ * Invalidating all the caches related to the page table by setting @addr ++ * to be 0 and @npages to be U64_MAX. ++ * ++ * The device TLB will be invalidated automatically if ATS is enabled. ++ */ ++struct iommu_hwpt_vtd_s1_invalidate { ++ __aligned_u64 addr; ++ __aligned_u64 npages; ++ __u32 flags; ++ __u32 __reserved; ++}; ++ ++/** ++ * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation ++ * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) ++ * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. ++ * Must be little-endian. ++ * ++ * Supported command list only when passing in a vIOMMU via @hwpt_id: ++ * CMDQ_OP_TLBI_NSNH_ALL ++ * CMDQ_OP_TLBI_NH_VA ++ * CMDQ_OP_TLBI_NH_VAA ++ * CMDQ_OP_TLBI_NH_ALL ++ * CMDQ_OP_TLBI_NH_ASID ++ * CMDQ_OP_ATC_INV ++ * CMDQ_OP_CFGI_CD ++ * CMDQ_OP_CFGI_CD_ALL ++ * ++ * -EIO will be returned if the command is not supported. ++ */ ++struct iommu_viommu_arm_smmuv3_invalidate { ++ __aligned_le64 cmd[2]; ++}; ++ ++/** ++ * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) ++ * @size: sizeof(struct iommu_hwpt_invalidate) ++ * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation ++ * @data_uptr: User pointer to an array of driver-specific cache invalidation ++ * data. ++ * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data ++ * type of all the entries in the invalidation request array. It ++ * should be a type supported by the hwpt pointed by @hwpt_id. ++ * @entry_len: Length (in bytes) of a request entry in the request array ++ * @entry_num: Input the number of cache invalidation requests in the array. ++ * Output the number of requests successfully handled by kernel. ++ * @__reserved: Must be 0. ++ * ++ * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications ++ * on a user-managed page table should be followed by this operation, if a HWPT ++ * is passed in via @hwpt_id. Other caches, such as device cache or descriptor ++ * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field. ++ * ++ * Each ioctl can support one or more cache invalidation requests in the array ++ * that has a total size of @entry_len * @entry_num. ++ * ++ * An empty invalidation request array by setting @entry_num==0 is allowed, and ++ * @entry_len and @data_uptr would be ignored in this case. This can be used to ++ * check if the given @data_type is supported or not by kernel. ++ */ ++struct iommu_hwpt_invalidate { ++ __u32 size; ++ __u32 hwpt_id; ++ __aligned_u64 data_uptr; ++ __u32 data_type; ++ __u32 entry_len; ++ __u32 entry_num; ++ __u32 __reserved; ++}; ++#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) ++ ++/** ++ * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault ++ * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is ++ * valid. ++ * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. ++ */ ++enum iommu_hwpt_pgfault_flags { ++ IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), ++ IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), ++}; ++ ++/** ++ * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault ++ * @IOMMU_PGFAULT_PERM_READ: request for read permission ++ * @IOMMU_PGFAULT_PERM_WRITE: request for write permission ++ * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the ++ * Execute Requested bit set in PASID TLP Prefix. ++ * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the ++ * Privileged Mode Requested bit set in PASID TLP ++ * Prefix. ++ */ ++enum iommu_hwpt_pgfault_perm { ++ IOMMU_PGFAULT_PERM_READ = (1 << 0), ++ IOMMU_PGFAULT_PERM_WRITE = (1 << 1), ++ IOMMU_PGFAULT_PERM_EXEC = (1 << 2), ++ IOMMU_PGFAULT_PERM_PRIV = (1 << 3), ++}; ++ ++/** ++ * struct iommu_hwpt_pgfault - iommu page fault data ++ * @flags: Combination of enum iommu_hwpt_pgfault_flags ++ * @dev_id: id of the originated device ++ * @pasid: Process Address Space ID ++ * @grpid: Page Request Group Index ++ * @perm: Combination of enum iommu_hwpt_pgfault_perm ++ * @addr: Fault address ++ * @length: a hint of how much data the requestor is expecting to fetch. For ++ * example, if the PRI initiator knows it is going to do a 10MB ++ * transfer, it could fill in 10MB and the OS could pre-fault in ++ * 10MB of IOVA. It's default to 0 if there's no such hint. ++ * @cookie: kernel-managed cookie identifying a group of fault messages. The ++ * cookie number encoded in the last page fault of the group should ++ * be echoed back in the response message. ++ */ ++struct iommu_hwpt_pgfault { ++ __u32 flags; ++ __u32 dev_id; ++ __u32 pasid; ++ __u32 grpid; ++ __u32 perm; ++ __u64 addr; ++ __u32 length; ++ __u32 cookie; ++}; ++ ++/** ++ * enum iommufd_page_response_code - Return status of fault handlers ++ * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables ++ * populated, retry the access. This is the ++ * "Success" defined in PCI 10.4.2.1. ++ * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the ++ * access. This is the "Invalid Request" in PCI ++ * 10.4.2.1. ++ */ ++enum iommufd_page_response_code { ++ IOMMUFD_PAGE_RESP_SUCCESS = 0, ++ IOMMUFD_PAGE_RESP_INVALID = 1, ++}; ++ ++/** ++ * struct iommu_hwpt_page_response - IOMMU page fault response ++ * @cookie: The kernel-managed cookie reported in the fault message. ++ * @code: One of response code in enum iommufd_page_response_code. ++ */ ++struct iommu_hwpt_page_response { ++ __u32 cookie; ++ __u32 code; ++}; ++ ++/** ++ * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) ++ * @size: sizeof(struct iommu_fault_alloc) ++ * @flags: Must be 0 ++ * @out_fault_id: The ID of the new FAULT ++ * @out_fault_fd: The fd of the new FAULT ++ * ++ * Explicitly allocate a fault handling object. ++ */ ++struct iommu_fault_alloc { ++ __u32 size; ++ __u32 flags; ++ __u32 out_fault_id; ++ __u32 out_fault_fd; ++}; ++#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) ++ ++/** ++ * enum iommu_viommu_type - Virtual IOMMU Type ++ * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use ++ * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type ++ */ ++enum iommu_viommu_type { ++ IOMMU_VIOMMU_TYPE_DEFAULT = 0, ++ IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, ++}; ++ ++/** ++ * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) ++ * @size: sizeof(struct iommu_viommu_alloc) ++ * @flags: Must be 0 ++ * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type ++ * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU ++ * @hwpt_id: ID of a nesting parent HWPT to associate to ++ * @out_viommu_id: Output virtual IOMMU ID for the allocated object ++ * ++ * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's ++ * virtualization support that is a security-isolated slice of the real IOMMU HW ++ * that is unique to a specific VM. Operations global to the IOMMU are connected ++ * to the vIOMMU, such as: ++ * - Security namespace for guest owned ID, e.g. guest-controlled cache tags ++ * - Access to a sharable nesting parent pagetable across physical IOMMUs ++ * - Non-affiliated event reporting (e.g. an invalidation queue error) ++ * - Virtualization of various platforms IDs, e.g. RIDs and others ++ * - Delivery of paravirtualized invalidation ++ * - Direct assigned invalidation queues ++ * - Direct assigned interrupts ++ */ ++struct iommu_viommu_alloc { ++ __u32 size; ++ __u32 flags; ++ __u32 type; ++ __u32 dev_id; ++ __u32 hwpt_id; ++ __u32 out_viommu_id; ++}; ++#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) ++ ++/** ++ * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) ++ * @size: sizeof(struct iommu_vdevice_alloc) ++ * @viommu_id: vIOMMU ID to associate with the virtual device ++ * @dev_id: The pyhsical device to allocate a virtual instance on the vIOMMU ++ * @__reserved: Must be 0 ++ * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID ++ * of AMD IOMMU, and vID of a nested Intel VT-d to a Context Table. ++ * @out_vdevice_id: Output virtual instance ID for the allocated object ++ * @__reserved2: Must be 0 ++ * ++ * Allocate a virtual device instance (for a physical device) against a vIOMMU. ++ * This instance holds the device's information (related to its vIOMMU) in a VM. ++ */ ++struct iommu_vdevice_alloc { ++ __u32 size; ++ __u32 viommu_id; ++ __u32 dev_id; ++ __u32 __reserved; ++ __aligned_u64 virt_id; ++ __u32 out_vdevice_id; ++ __u32 __reserved2; ++}; ++#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) + #endif +-- +2.41.0.windows.1 + diff --git a/accel-kvm-Add-pre-park-vCPU-support.patch b/accel-kvm-Add-pre-park-vCPU-support.patch deleted file mode 100644 index 9bc81178581c30504ca1b32d1f47ba8cbc1a5b85..0000000000000000000000000000000000000000 --- a/accel-kvm-Add-pre-park-vCPU-support.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 135119d2e82e99adc67346572c761fbe54d73e4a Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 13:04:40 +0800 -Subject: [PATCH] accel/kvm: Add pre-park vCPU support - -For that KVM do not support dynamic adjustment of vCPU count, -we must pre-park all possible vCPU at start. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - accel/kvm/kvm-all.c | 23 +++++++++++++++++++++++ - include/sysemu/kvm.h | 1 + - 2 files changed, 24 insertions(+) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index f450f25295..84edbe8bb1 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -339,6 +339,29 @@ err: - return ret; - } - -+int kvm_create_parked_vcpu(unsigned long vcpu_id) -+{ -+ KVMState *s = kvm_state; -+ struct KVMParkedVcpu *vcpu = NULL; -+ int ret; -+ -+ DPRINTF("kvm_create_parked_vcpu\n"); -+ -+ ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); -+ if (ret < 0) { -+ DPRINTF("kvm_create_vcpu failed\n"); -+ goto err; -+ } -+ -+ vcpu = g_malloc0(sizeof(*vcpu)); -+ vcpu->vcpu_id = vcpu_id; -+ vcpu->kvm_fd = ret; -+ QLIST_INSERT_HEAD(&s->kvm_parked_vcpus, vcpu, node); -+ -+err: -+ return ret; -+} -+ - static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) - { - struct KVMParkedVcpu *cpu; -diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h -index acd90aebb6..565adb4e2c 100644 ---- a/include/sysemu/kvm.h -+++ b/include/sysemu/kvm.h -@@ -216,6 +216,7 @@ int kvm_has_many_ioeventfds(void); - int kvm_has_gsi_routing(void); - int kvm_has_intx_set_mask(void); - -+int kvm_create_parked_vcpu(unsigned long vcpu_id); - int kvm_init_vcpu(CPUState *cpu); - int kvm_cpu_exec(CPUState *cpu); - int kvm_destroy_vcpu(CPUState *cpu); --- -2.19.1 diff --git a/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c-sync-upstream.patch b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c-sync-upstream.patch new file mode 100644 index 0000000000000000000000000000000000000000..43c53ab4029646ebc22ab7e7d511a263d101c25c --- /dev/null +++ b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c-sync-upstream.patch @@ -0,0 +1,236 @@ +From 2464d0d6115e1794468ff455e3acdb98e0d71a31 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 16 Jul 2024 12:14:56 +0100 +Subject: [PATCH 62/78] accel/kvm: Extract common KVM vCPU {creation,parking} + code + +KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread +is spawned. This is common to all the architectures as of now. + +Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the +corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't +support vCPU removal. Therefore, its representative KVM vCPU object/context in +Qemu is parked. + +Refactor architecture common logic so that some APIs could be reused by vCPU +Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs +with trace events. New APIs qemu_{create,park,unpark}_vcpu() can be externally +called. No functional change is intended here. + +Signed-off-by: Salil Mehta +Reviewed-by: Gavin Shan +Tested-by: Vishnu Pajjuri +Reviewed-by: Jonathan Cameron +Tested-by: Xianglai Li +Tested-by: Miguel Luis +Reviewed-by: Shaoqin Huang +Reviewed-by: Vishnu Pajjuri +Reviewed-by: Nicholas Piggin +Tested-by: Zhao Liu +Reviewed-by: Zhao Liu +Reviewed-by: Harsh Prateek Bora +Reviewed-by: Igor Mammedov +Message-Id: <20240716111502.202344-2-salil.mehta@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Xianglai Li +--- + accel/kvm/kvm-all.c | 71 +++++++++++++++++++++--------------------- + accel/kvm/trace-events | 11 +++++++ + include/sysemu/kvm.h | 27 ++++++++++++++-- + 3 files changed, 71 insertions(+), 38 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 8077630825..8dea8f98bb 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -141,7 +141,6 @@ static QemuMutex kml_slots_lock; + #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) + + static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); +-static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); + + static inline void kvm_resample_fd_remove(int gsi) + { +@@ -334,39 +333,57 @@ void kvm_park_vcpu(CPUState *cpu) + { + struct KVMParkedVcpu *vcpu; + ++ trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); ++ + vcpu = g_malloc0(sizeof(*vcpu)); + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); + vcpu->kvm_fd = cpu->kvm_fd; + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); + } + ++int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) ++{ ++ struct KVMParkedVcpu *cpu; ++ int kvm_fd = -ENOENT; ++ ++ QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { ++ if (cpu->vcpu_id == vcpu_id) { ++ QLIST_REMOVE(cpu, node); ++ kvm_fd = cpu->kvm_fd; ++ g_free(cpu); ++ } ++ } ++ ++ trace_kvm_unpark_vcpu(vcpu_id, kvm_fd > 0 ? "unparked" : "!found parked"); ++ ++ return kvm_fd; ++} ++ + int kvm_create_vcpu(CPUState *cpu) + { +- unsigned long vcpu_id = cpu->cpu_index; ++ unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); + KVMState *s = kvm_state; +- int ret; +- +- DPRINTF("kvm_create_vcpu\n"); ++ int kvm_fd; + + /* check if the KVM vCPU already exist but is parked */ +- ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); +- if (ret > 0) { +- goto found; +- } +- +- /* create a new KVM vcpu */ +- ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); +- if (ret < 0) { +- return ret; ++ kvm_fd = kvm_unpark_vcpu(s, vcpu_id); ++ if (kvm_fd < 0) { ++ /* vCPU not parked: create a new KVM vCPU */ ++ kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); ++ if (kvm_fd < 0) { ++ error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); ++ return kvm_fd; ++ } + } + +-found: +- cpu->vcpu_dirty = true; +- cpu->kvm_fd = ret; ++ cpu->kvm_fd = kvm_fd; + cpu->kvm_state = s; ++ cpu->vcpu_dirty = true; + cpu->dirty_pages = 0; + cpu->throttle_us_per_full = 0; + ++ trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd); ++ + return 0; + } + +@@ -376,7 +393,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) + long mmap_size; + int ret = 0; + +- DPRINTF("kvm_destroy_vcpu\n"); ++ trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + + ret = kvm_arch_destroy_vcpu(cpu); + if (ret < 0) { +@@ -415,24 +432,6 @@ void kvm_destroy_vcpu(CPUState *cpu) + } + } + +-static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) +-{ +- struct KVMParkedVcpu *cpu; +- +- QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { +- if (cpu->vcpu_id == vcpu_id) { +- int kvm_fd; +- +- QLIST_REMOVE(cpu, node); +- kvm_fd = cpu->kvm_fd; +- g_free(cpu); +- return kvm_fd; +- } +- } +- +- return -1; +-} +- + int kvm_init_vcpu(CPUState *cpu, Error **errp) + { + KVMState *s = kvm_state; +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index 399aaeb0ec..9c880fdcf4 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" + kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" + kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" + kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" ++kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id, int kvm_fd) "index: %d, id: %lu, kvm fd: %d" ++kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" ++kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" ++kvm_unpark_vcpu(unsigned long arch_cpu_id, const char *msg) "id: %lu %s" + kvm_irqchip_commit_routes(void) "" + kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" + kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" +@@ -26,3 +30,10 @@ kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"P + kvm_dirty_ring_reaper_kick(const char *reason) "%s" + kvm_dirty_ring_flush(int finished) "%d" + ++kvm_failed_get_vcpu_mmap_size(void) "" ++kvm_cpu_exec(void) "" ++kvm_interrupt_exit_request(void) "" ++kvm_io_window_exit(void) "" ++kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 ++kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" ++kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 31af5f0e24..7ffb5e4992 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -319,6 +319,31 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test); + */ + bool kvm_device_supported(int vmfd, uint64_t type); + ++/** ++ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU ++ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. ++ * ++ * @returns: 0 when success, errno (<0) when failed. ++ */ ++int kvm_create_vcpu(CPUState *cpu); ++ ++/** ++ * kvm_park_vcpu - Park QEMU KVM vCPU context ++ * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. ++ * ++ * @returns: none ++ */ ++void kvm_park_vcpu(CPUState *cpu); ++ ++/** ++ * kvm_unpark_vcpu - unpark QEMU KVM vCPU context ++ * @s: KVM State ++ * @vcpu_id: Architecture vCPU ID of the parked vCPU ++ * ++ * @returns: KVM fd ++ */ ++int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id); ++ + /* Arch specific hooks */ + + extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; +@@ -440,8 +465,6 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); + + int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, + hwaddr *phys_addr); +-int kvm_create_vcpu(CPUState *cpu); +-void kvm_park_vcpu(CPUState *cpu); + + #endif /* NEED_CPU_H */ + +-- +2.39.1 + diff --git a/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..d68e7f54f43f58b3be56a880de48e15b7ebd5a2f --- /dev/null +++ b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch @@ -0,0 +1,147 @@ +From 6999ced63ca3bb05a1cbc4a667bd9fd27eeaeaee Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 Sep 2023 00:04:04 +0000 +Subject: [PATCH] accel/kvm: Extract common KVM vCPU {creation,parking} code + +KVM vCPU creation is done once during the initialization of the VM when Qemu +threads are spawned. This is common to all the architectures. If the architecture +supports vCPU hot-{un}plug then this KVM vCPU creation could be deferred to +later point as well. Some architectures might in any case create KVM vCPUs for +the yet-to-be plugged vCPUs (i.e. QoM Object & thread does not exists) during VM +init time and park them. + +Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but +the KVM vCPU objects in the Host KVM are not destroyed and their representative +KVM vCPU objects in Qemu are parked. + +Signed-off-by: Salil Mehta +--- + accel/kvm/kvm-all.c | 61 ++++++++++++++++++++++++++++++++++---------- + include/sysemu/kvm.h | 2 ++ + 2 files changed, 49 insertions(+), 14 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index d900df93a4..6d503aa614 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -136,6 +136,7 @@ static QemuMutex kml_slots_lock; + #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) + + static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); ++static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); + + static inline void kvm_resample_fd_remove(int gsi) + { +@@ -324,11 +325,51 @@ err: + return ret; + } + ++void kvm_park_vcpu(CPUState *cpu) ++{ ++ unsigned long vcpu_id = cpu->cpu_index; ++ struct KVMParkedVcpu *vcpu; ++ ++ vcpu = g_malloc0(sizeof(*vcpu)); ++ vcpu->vcpu_id = vcpu_id; ++ vcpu->kvm_fd = cpu->kvm_fd; ++ QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); ++} ++ ++int kvm_create_vcpu(CPUState *cpu) ++{ ++ unsigned long vcpu_id = cpu->cpu_index; ++ KVMState *s = kvm_state; ++ int ret; ++ ++ DPRINTF("kvm_create_vcpu\n"); ++ ++ /* check if the KVM vCPU already exist but is parked */ ++ ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); ++ if (ret > 0) { ++ goto found; ++ } ++ ++ /* create a new KVM vcpu */ ++ ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++found: ++ cpu->vcpu_dirty = true; ++ cpu->kvm_fd = ret; ++ cpu->kvm_state = s; ++ cpu->dirty_pages = 0; ++ cpu->throttle_us_per_full = 0; ++ ++ return 0; ++} ++ + static int do_kvm_destroy_vcpu(CPUState *cpu) + { + KVMState *s = kvm_state; + long mmap_size; +- struct KVMParkedVcpu *vcpu = NULL; + int ret = 0; + + DPRINTF("kvm_destroy_vcpu\n"); +@@ -357,10 +398,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) + } + } + +- vcpu = g_malloc0(sizeof(*vcpu)); +- vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); +- vcpu->kvm_fd = cpu->kvm_fd; +- QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); ++ kvm_park_vcpu(cpu); + err: + return ret; + } +@@ -388,7 +426,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) + } + } + +- return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); ++ return -1; + } + + int kvm_init_vcpu(CPUState *cpu, Error **errp) +@@ -399,19 +437,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) + + trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + +- ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); ++ ret = kvm_create_vcpu(cpu); + if (ret < 0) { +- error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", ++ error_setg_errno(errp, -ret, ++ "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", + kvm_arch_vcpu_id(cpu)); + goto err; + } + +- cpu->kvm_fd = ret; +- cpu->kvm_state = s; +- cpu->vcpu_dirty = true; +- cpu->dirty_pages = 0; +- cpu->throttle_us_per_full = 0; +- + mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size < 0) { + ret = mmap_size; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index b46d6203b4..e534411ddc 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -434,6 +434,8 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); + + int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, + hwaddr *phys_addr); ++int kvm_create_vcpu(CPUState *cpu); ++void kvm_park_vcpu(CPUState *cpu); + + #endif /* NEED_CPU_H */ + +-- +2.27.0 + diff --git a/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch b/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch deleted file mode 100644 index daff52e901686e17c1c492e899165b773db96258..0000000000000000000000000000000000000000 --- a/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch +++ /dev/null @@ -1,45 +0,0 @@ -From b50b9a0e2e5e8262c830df5994f3abbe0a37655a Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Thu, 17 Dec 2020 09:49:40 +0800 -Subject: [PATCH] accel: kvm: Fix memory waste under mismatch page size - -When handle dirty log, we face qemu_real_host_page_size and -TARGET_PAGE_SIZE. The first one is the granule of KVM dirty -bitmap, and the second one is the granule of QEMU dirty bitmap. - -As qemu_real_host_page_size >= TARGET_PAGE_SIZE (kvm_init() -enforced it), misuse TARGET_PAGE_SIZE to init kvmslot dirty_bmap -may waste memory. For example, when qemu_real_host_page_size is -64K and TARGET_PAGE_SIZE is 4K, it wastes 93.75% (15/16) memory. - -Signed-off-by: Keqian Zhu -Reviewed-by: Andrew Jones -Reviewed-by: Peter Xu -Message-Id: <20201217014941.22872-2-zhukeqian1@huawei.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Kunkun Jiang ---- - accel/kvm/kvm-all.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 5a6b89cc2a..4daff563a0 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -551,8 +551,12 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) - * too, in most cases). - * So for now, let's align to 64 instead of HOST_LONG_BITS here, in - * a hope that sizeof(long) won't become >8 any time soon. -+ * -+ * Note: the granule of kvm dirty log is qemu_real_host_page_size. -+ * And mem->memory_size is aligned to it (otherwise this mem can't -+ * be registered to KVM). - */ -- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -+ hwaddr bitmap_size = ALIGN(mem->memory_size / qemu_real_host_page_size, - /*HOST_LONG_BITS*/ 64) / 8; - mem->dirty_bmap = g_malloc0(bitmap_size); - } --- -2.27.0 - diff --git a/accel-kvm-Use-correct-id-for-parked-vcpu.patch b/accel-kvm-Use-correct-id-for-parked-vcpu.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec759c7697e70a45b61b8c7ba2e27a2be4c35b22 --- /dev/null +++ b/accel-kvm-Use-correct-id-for-parked-vcpu.patch @@ -0,0 +1,32 @@ +From 9de26d69c52db67f48619ad20b8cb9d8ee71e42c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:42:57 +0800 +Subject: [PATCH] accel/kvm: Use correct id for parked vcpu + +kvm_arch_vcpu_id is correct for all platform. + +Signed-off-by: Keqian Zhu +--- + accel/kvm/kvm-all.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6d503aa614..75a3075c14 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -327,11 +327,10 @@ err: + + void kvm_park_vcpu(CPUState *cpu) + { +- unsigned long vcpu_id = cpu->cpu_index; + struct KVMParkedVcpu *vcpu; + + vcpu = g_malloc0(sizeof(*vcpu)); +- vcpu->vcpu_id = vcpu_id; ++ vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); + vcpu->kvm_fd = cpu->kvm_fd; + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); + } +-- +2.27.0 + diff --git a/accel-kvm-kvm-all-Fixes-the-missing-break-in-vCPU-un.patch b/accel-kvm-kvm-all-Fixes-the-missing-break-in-vCPU-un.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c138ff4e199aa774f4dbf83548874606165d6cc --- /dev/null +++ b/accel-kvm-kvm-all-Fixes-the-missing-break-in-vCPU-un.patch @@ -0,0 +1,46 @@ +From 9bbc73e18d36d75c5dd842e478ed1f1b47ed4222 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Thu, 1 Aug 2024 10:15:03 +0100 +Subject: [PATCH 68/78] accel/kvm/kvm-all: Fixes the missing break in vCPU + unpark logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Loop should exit prematurely on successfully finding out the parked vCPU (struct +KVMParkedVcpu) in the 'struct KVMState' maintained 'kvm_parked_vcpus' list of +parked vCPUs. + +Fixes: Coverity CID 1558552 +Fixes: 08c3286822 ("accel/kvm: Extract common KVM vCPU {creation,parking} code") +Reported-by: Peter Maydell +Signed-off-by: Salil Mehta +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Gavin Shan +Reviewed-by: Zhao Liu +Reviewed-by: Igor Mammedov +Message-id: 20240725145132.99355-1-salil.mehta@huawei.com +Suggested-by: Peter Maydell +Message-ID: +Signed-off-by: Salil Mehta +Signed-off-by: Peter Maydell +Signed-off-by: Xianglai Li +--- + accel/kvm/kvm-all.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 8dea8f98bb..79d5671841 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -351,6 +351,7 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) + QLIST_REMOVE(cpu, node); + kvm_fd = cpu->kvm_fd; + g_free(cpu); ++ break; + } + } + +-- +2.39.1 + diff --git a/accel-tcg-Fix-typo-causing-tb-page_addr-1-to-not-be-.patch b/accel-tcg-Fix-typo-causing-tb-page_addr-1-to-not-be-.patch new file mode 100644 index 0000000000000000000000000000000000000000..382187c5b46eabc3ebfd68b37aa4cd5aee41f38b --- /dev/null +++ b/accel-tcg-Fix-typo-causing-tb-page_addr-1-to-not-be-.patch @@ -0,0 +1,52 @@ +From 378d79fa6b9410af702776ffa93865219f273380 Mon Sep 17 00:00:00 2001 +From: Anton Johansson +Date: Wed, 12 Jun 2024 15:30:31 +0200 +Subject: [PATCH] accel/tcg: Fix typo causing tb->page_addr[1] to not be + recorded +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For TBs crossing page boundaries, the 2nd page will never be +recorded/removed, as the index of the 2nd page is computed from the +address of the 1st page. This is due to a typo, fix it. + +Cc: qemu-stable@nongnu.org +Fixes: deba78709a ("accel/tcg: Always lock pages before translation") +Signed-off-by: Anton Johansson +Reviewed-by: Manos Pitsidianakis +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Alex Bennée +Message-Id: <20240612133031.15298-1-anjo@rev.ng> +Signed-off-by: Richard Henderson +(cherry picked from commit 3b279f73fa37bec8d3ba04a15f5153d6491cffaf) +Signed-off-by: zhujun2 +--- + accel/tcg/tb-maint.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c +index 3d2a896220..eb37f9e8a8 100644 +--- a/accel/tcg/tb-maint.c ++++ b/accel/tcg/tb-maint.c +@@ -712,7 +712,7 @@ static void tb_record(TranslationBlock *tb) + tb_page_addr_t paddr0 = tb_page_addr0(tb); + tb_page_addr_t paddr1 = tb_page_addr1(tb); + tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS; +- tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS; ++ tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS; + + assert(paddr0 != -1); + if (unlikely(paddr1 != -1) && pindex0 != pindex1) { +@@ -744,7 +744,7 @@ static void tb_remove(TranslationBlock *tb) + tb_page_addr_t paddr0 = tb_page_addr0(tb); + tb_page_addr_t paddr1 = tb_page_addr1(tb); + tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS; +- tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS; ++ tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS; + + assert(paddr0 != -1); + if (unlikely(paddr1 != -1) && pindex0 != pindex1) { +-- +2.41.0.windows.1 + diff --git a/accel-tcg-Fix-user-only-probe_access_internal-plugin.patch b/accel-tcg-Fix-user-only-probe_access_internal-plugin.patch new file mode 100644 index 0000000000000000000000000000000000000000..5042483cab3f2c3e04b64b6075bb32cc31deb9ee --- /dev/null +++ b/accel-tcg-Fix-user-only-probe_access_internal-plugin.patch @@ -0,0 +1,42 @@ +From b611bd7f3f4525c8373f2e504594414e1ed5b058 Mon Sep 17 00:00:00 2001 +From: guping +Date: Mon, 18 Nov 2024 02:50:17 +0000 +Subject: [PATCH] accel/tcg: Fix user-only probe_access_internal plugin check + cherry-pick from 2a339fee450638b512c5122281cb5ab49331cfb8 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The acc_flag check for write should have been against PAGE_WRITE_ORG, +not PAGE_WRITE. But it is better to combine two acc_flag checks +to a single check against access_type. This matches the system code +in cputlb.c. + +Cc: qemu-stable@nongnu.org +Resolves: #2647 + +Signed-off-by: default avatarRichard Henderson +Message-Id: 20241111145002.144995-1-richard.henderson@linaro.org +Reviewed-by: default avatarAlex Bennée + +Signed-off-by: guping +--- + accel/tcg/user-exec.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c +index 68b252cb8e..e87848a5e2 100644 +--- a/accel/tcg/user-exec.c ++++ b/accel/tcg/user-exec.c +@@ -794,7 +794,7 @@ static int probe_access_internal(CPUArchState *env, vaddr addr, + if (guest_addr_valid_untagged(addr)) { + int page_flags = page_get_flags(addr); + if (page_flags & acc_flag) { +- if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE) ++ if (access_type != MMU_INST_FETCH + && cpu_plugin_mem_cbs_enabled(env_cpu(env))) { + return TLB_MMIO; + } +-- +2.41.0.windows.1 + diff --git a/acpi-cpu-Add-cpu_cppc-building-support.patch b/acpi-cpu-Add-cpu_cppc-building-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b045f296d183ddf356f8fbc54d8ddd34780f121 --- /dev/null +++ b/acpi-cpu-Add-cpu_cppc-building-support.patch @@ -0,0 +1,72 @@ +From c75a0102a1bb00190b07b06ede8b1f9fa0bdaa3c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 16:52:10 +0800 +Subject: [PATCH] acpi/cpu: Add cpu_cppc building support + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 8 +++++++- + hw/i386/acpi-build.c | 2 +- + include/hw/acpi/cpu.h | 6 +++++- + 3 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index cf0c7e8538..c8c11e51c6 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -342,7 +342,9 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_FW_EJECT_EVENT "CEJF" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, ++ build_madt_cpu_fn build_madt_cpu, ++ build_cpu_cppc_fn build_cpu_cppc, ++ hwaddr base_addr, + const char *res_root, + const char *event_handler_method, + AmlRegionSpace rs) +@@ -668,6 +670,10 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, aml_name_decl("_UID", uid)); + } + ++ if (build_cpu_cppc) { ++ build_cpu_cppc(i, arch_ids->len, dev); ++ } ++ + method = aml_method("_STA", 0, AML_SERIALIZED); + aml_append(method, aml_return(aml_call1(CPU_STS_METHOD, uid))); + aml_append(dev, method); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index db4ca8a66a..e10799ecc6 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1545,7 +1545,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + .smi_path = pm->smi_on_cpuhp ? "\\_SB.PCI0.SMI0.SMIC" : NULL, + .fw_unplugs_cpu = pm->smi_on_cpu_unplug, + }; +- build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, ++ build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, NULL, + pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", + AML_SYSTEM_IO); + } +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index 76bc7eb251..b31a2e50d9 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -59,8 +59,12 @@ typedef struct CPUHotplugFeatures { + typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + ++typedef void (*build_cpu_cppc_fn)(int uid, int num_cpu, Aml *dev); ++ + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, ++ build_madt_cpu_fn build_madt_cpu, ++ build_cpu_cppc_fn build_cpu_cppc, ++ hwaddr base_addr, + const char *res_root, + const char *event_handler_method, + AmlRegionSpace rs); +-- +2.27.0 + diff --git a/acpi-cpu-Fix-cpu_hotplug_hw_init.patch b/acpi-cpu-Fix-cpu_hotplug_hw_init.patch new file mode 100644 index 0000000000000000000000000000000000000000..bca3afd0faa559c7881fd251b8d164e90e1e5b9f --- /dev/null +++ b/acpi-cpu-Fix-cpu_hotplug_hw_init.patch @@ -0,0 +1,36 @@ +From 14c4062c4acc7d417d163276b65e59073ba18eeb Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:51:18 +0800 +Subject: [PATCH] acpi/cpu: Fix cpu_hotplug_hw_init() + +For the present but disabled vCPUs, they will be released after +cpu_hotplug_hw_init(), we should not assign it to AcpiCpuStatus. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index c922c380aa..b258396e01 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -229,7 +229,6 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + for (i = 0; i < id_list->len; i++) { + struct CPUState *cpu = CPU(id_list->cpus[i].cpu); + if (qemu_present_cpu(cpu)) { +- state->devs[i].cpu = cpu; + state->devs[i].is_present = true; + } else { + if (qemu_persistent_cpu(cpu)) { +@@ -240,6 +239,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + } + + if (qemu_enabled_cpu(cpu)) { ++ state->devs[i].cpu = cpu; + state->devs[i].is_enabled = true; + } else { + state->devs[i].is_enabled = false; +-- +2.27.0 + diff --git a/acpi-cpu-Fix-detection-of-present-cpu.patch b/acpi-cpu-Fix-detection-of-present-cpu.patch new file mode 100644 index 0000000000000000000000000000000000000000..6bd4b47ef8fd3088c13f2d57c6833a0675513113 --- /dev/null +++ b/acpi-cpu-Fix-detection-of-present-cpu.patch @@ -0,0 +1,34 @@ +From c2eb1176fe06f359a8102bbacb54760c9c1d5aae Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 28 Apr 2024 12:50:09 +0800 +Subject: [PATCH] acpi/cpu: Fix detection of present cpu + +When qemu_present_cpu is false. it means cpu object is +null and then calling of qemu_persistent_cpu() will +cause null pointer access. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index b258396e01..292e1daca2 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -231,11 +231,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + if (qemu_present_cpu(cpu)) { + state->devs[i].is_present = true; + } else { +- if (qemu_persistent_cpu(cpu)) { +- state->devs[i].is_present = true; +- } else { +- state->devs[i].is_present = false; +- } ++ state->devs[i].is_present = false; + } + + if (qemu_enabled_cpu(cpu)) { +-- +2.27.0 + diff --git a/acpi-ged-Add-macro-for-acpi-sleep-control-register.patch b/acpi-ged-Add-macro-for-acpi-sleep-control-register.patch new file mode 100644 index 0000000000000000000000000000000000000000..61ebd380d0b62fd905a08567db1ace48707a22ec --- /dev/null +++ b/acpi-ged-Add-macro-for-acpi-sleep-control-register.patch @@ -0,0 +1,59 @@ +From 07fa80eacaa17d3cc3865050244b79d39cc61944 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Wed, 23 Oct 2024 14:34:56 +0800 +Subject: [PATCH] acpi: ged: Add macro for acpi sleep control register + +cheery-pick from edafc90ba481c586d0a649f34dcb8cd1f29c4259 + +Macro definition is added for acpi sleep control register, ged emulation +driver can use the macro , also it can be used in FDT table if ged is +exposed with FDT table. + +Signed-off-by: Bibo Mao +Reviewed-by: Igor Mammedov +Message-Id: <20240918014206.2165821-2-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Zhang Jiao +--- + hw/acpi/generic_event_device.c | 6 +++--- + include/hw/acpi/generic_event_device.h | 7 +++++-- + 2 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 4731a614a3..2ce7031f1a 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -203,9 +203,9 @@ static void ged_regs_write(void *opaque, hwaddr addr, uint64_t data, + + switch (addr) { + case ACPI_GED_REG_SLEEP_CTL: +- slp_typ = (data >> 2) & 0x07; +- slp_en = (data >> 5) & 0x01; +- if (slp_en && slp_typ == 5) { ++ slp_typ = (data >> ACPI_GED_SLP_TYP_POS) & ACPI_GED_SLP_TYP_MASK; ++ slp_en = !!(data & ACPI_GED_SLP_EN); ++ if (slp_en && slp_typ == ACPI_GED_SLP_TYP_S5) { + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } + return; +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index 90fc41cbb8..8ed9534c57 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -81,8 +81,11 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) + /* ACPI_GED_REG_RESET value for reset*/ + #define ACPI_GED_RESET_VALUE 0x42 + +-/* ACPI_GED_REG_SLEEP_CTL.SLP_TYP value for S5 (aka poweroff) */ +-#define ACPI_GED_SLP_TYP_S5 0x05 ++/* [ACPI 5.0 Chapter 4.8.3.7] Sleep Control and Status Register */ ++#define ACPI_GED_SLP_TYP_POS 0x2 /* SLP_TYPx Bit Offset */ ++#define ACPI_GED_SLP_TYP_MASK 0x07 /* SLP_TYPx 3-bit mask */ ++#define ACPI_GED_SLP_TYP_S5 0x05 /* System _S5 State (Soft Off) */ ++#define ACPI_GED_SLP_EN 0x20 /* SLP_EN write-only bit */ + + #define GED_DEVICE "GED" + #define AML_GED_EVT_REG "EREG" +-- +2.41.0.windows.1 + diff --git a/acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch b/acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch deleted file mode 100644 index 57247e6797da5a977685f737e9454427ab2c41df..0000000000000000000000000000000000000000 --- a/acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch +++ /dev/null @@ -1,204 +0,0 @@ -From 05d22b55133db1a2526cfe305102e075e883b5e2 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 3 Apr 2020 15:41:01 +0800 -Subject: [PATCH] acpi/ged: Extend ACPI GED to support CPU hotplug - -This adds a new GED event called ACPI_GED_CPU_HOTPLUG_EVT. -The basic workflow is that: GED sends this event to guest, -then ACPI driver in guest will call _EVT method of GED aml, -then _EVT will call CSCN method in cpus aml to get status of -all cpus. - -The status of cpus is maintained by CPUHotplugState in GED and -is made accessable to guest through memory region. - -This also adds migration support to CPUHotplugState. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - docs/specs/acpi_hw_reduced_hotplug.rst | 3 ++- - hw/acpi/cpu.c | 1 - - hw/acpi/generic_event_device.c | 35 ++++++++++++++++++++++++++ - hw/arm/Kconfig | 1 + - include/hw/acpi/cpu.h | 2 ++ - include/hw/acpi/generic_event_device.h | 4 +++ - 6 files changed, 44 insertions(+), 2 deletions(-) - -diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst -index 911a98255b..deb481555d 100644 ---- a/docs/specs/acpi_hw_reduced_hotplug.rst -+++ b/docs/specs/acpi_hw_reduced_hotplug.rst -@@ -63,7 +63,8 @@ GED IO interface (4 byte access) - bits: - 0: Memory hotplug event - 1: System power down event -- 2-31: Reserved -+ 2: CPU hotplug event -+ 3-31: Reserved - - **write_access:** - -diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c -index 72ad1fcff2..cb6bb67f3c 100644 ---- a/hw/acpi/cpu.c -+++ b/hw/acpi/cpu.c -@@ -6,7 +6,6 @@ - #include "trace.h" - #include "sysemu/numa.h" - --#define ACPI_CPU_HOTPLUG_REG_LEN 12 - #define ACPI_CPU_SELECTOR_OFFSET_WR 0 - #define ACPI_CPU_FLAGS_OFFSET_RW 4 - #define ACPI_CPU_CMD_OFFSET_WR 5 -diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c -index 82139b4314..478a4ee87c 100644 ---- a/hw/acpi/generic_event_device.c -+++ b/hw/acpi/generic_event_device.c -@@ -23,6 +23,7 @@ - static const uint32_t ged_supported_events[] = { - ACPI_GED_MEM_HOTPLUG_EVT, - ACPI_GED_PWR_DOWN_EVT, -+ ACPI_GED_CPU_HOTPLUG_EVT, - }; - - /* -@@ -110,6 +111,9 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, - aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), - aml_int(0x80))); - break; -+ case ACPI_GED_CPU_HOTPLUG_EVT: -+ aml_append(if_ctx, aml_call0("\\_SB.CPUS.CSCN")); -+ break; - default: - /* - * Please make sure all the events in ged_supported_events[] -@@ -176,6 +180,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); -+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { -+ acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); - } else { - error_setg(errp, "virt: device plug request for unsupported device" - " type: %s", object_get_typename(OBJECT(dev))); -@@ -192,6 +198,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) - sel = ACPI_GED_MEM_HOTPLUG_EVT; - } else if (ev & ACPI_POWER_DOWN_STATUS) { - sel = ACPI_GED_PWR_DOWN_EVT; -+ } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { -+ sel = ACPI_GED_CPU_HOTPLUG_EVT; - } else { - /* Unknown event. Return without generating interrupt. */ - warn_report("GED: Unsupported event %d. No irq injected", ev); -@@ -224,6 +232,16 @@ static const VMStateDescription vmstate_memhp_state = { - } - }; - -+static const VMStateDescription vmstate_cpuhp_state = { -+ .name = "acpi-ged/cpuhp", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .fields = (VMStateField[]) { -+ VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ - static const VMStateDescription vmstate_ged_state = { - .name = "acpi-ged-state", - .version_id = 1, -@@ -244,6 +262,7 @@ static const VMStateDescription vmstate_acpi_ged = { - }, - .subsections = (const VMStateDescription * []) { - &vmstate_memhp_state, -+ &vmstate_cpuhp_state, - NULL - } - }; -@@ -254,6 +273,7 @@ static void acpi_ged_initfn(Object *obj) - AcpiGedState *s = ACPI_GED(dev); - SysBusDevice *sbd = SYS_BUS_DEVICE(obj); - GEDState *ged_st = &s->ged_state; -+ MachineClass *mc; - - memory_region_init_io(&ged_st->io, obj, &ged_ops, ged_st, - TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); -@@ -273,6 +293,21 @@ static void acpi_ged_initfn(Object *obj) - sysbus_init_mmio(sbd, &s->container_memhp); - acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), - &s->memhp_state, 0); -+ -+ mc = MACHINE_GET_CLASS(qdev_get_machine()); -+ if (!mc->possible_cpu_arch_ids) { -+ /* -+ * MachineClass should support possible_cpu_arch_ids in -+ * cpu_hotplug_hw_init below. -+ */ -+ return; -+ } -+ -+ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", -+ ACPI_CPU_HOTPLUG_REG_LEN); -+ sysbus_init_mmio(sbd, &s->container_cpuhp); -+ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), -+ &s->cpuhp_state, 0); - } - - static void acpi_ged_class_init(ObjectClass *class, void *data) -diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig -index ad7f7c089b..15e18b0a48 100644 ---- a/hw/arm/Kconfig -+++ b/hw/arm/Kconfig -@@ -24,6 +24,7 @@ config ARM_VIRT - select DIMM - select ACPI_MEMORY_HOTPLUG - select ACPI_HW_REDUCED -+ select ACPI_CPU_HOTPLUG - - config CHEETAH - bool -diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h -index a30ec84a4f..e726414459 100644 ---- a/include/hw/acpi/cpu.h -+++ b/include/hw/acpi/cpu.h -@@ -17,6 +17,8 @@ - #include "hw/acpi/aml-build.h" - #include "hw/hotplug.h" - -+#define ACPI_CPU_HOTPLUG_REG_LEN 12 -+ - typedef struct AcpiCpuStatus { - struct CPUState *cpu; - uint64_t arch_id; -diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h -index f99efad7a3..e702ff1e18 100644 ---- a/include/hw/acpi/generic_event_device.h -+++ b/include/hw/acpi/generic_event_device.h -@@ -62,6 +62,7 @@ - #include "hw/sysbus.h" - #include "hw/acpi/memory_hotplug.h" - #include "hw/arm/virt.h" -+#include "hw/acpi/cpu.h" - - #define ACPI_POWER_BUTTON_DEVICE "PWRB" - -@@ -83,6 +84,7 @@ - */ - #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 - #define ACPI_GED_PWR_DOWN_EVT 0x2 -+#define ACPI_GED_CPU_HOTPLUG_EVT 0x4 - - typedef struct GEDState { - MemoryRegion io; -@@ -93,6 +95,8 @@ typedef struct AcpiGedState { - SysBusDevice parent_obj; - MemHotplugState memhp_state; - MemoryRegion container_memhp; -+ CPUHotplugState cpuhp_state; -+ MemoryRegion container_cpuhp; - GEDState ged_state; - uint32_t ged_event_bitmap; - qemu_irq irq; --- -2.19.1 diff --git a/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch b/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch new file mode 100644 index 0000000000000000000000000000000000000000..514292717255b282761e77810dbdcf922f8230d6 --- /dev/null +++ b/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch @@ -0,0 +1,47 @@ +From 6e17d32d6df25d4fac1a31da61d89e0bb9c8c7da Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:20:20 +0800 +Subject: [PATCH] acpi/ged: Init cpu hotplug only when machine support it + +Signed-off-by: Keqian Zhu +--- + hw/acpi/generic_event_device.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 0266733a54..6e4f5f075f 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -403,6 +403,7 @@ static void acpi_ged_initfn(Object *obj) + AcpiGedState *s = ACPI_GED(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + GEDState *ged_st = &s->ged_state; ++ MachineClass *mc; + + memory_region_init_io(&ged_st->evt, obj, &ged_evt_ops, ged_st, + TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); +@@ -427,12 +428,15 @@ static void acpi_ged_initfn(Object *obj) + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); + +- s->cpuhp.device = OBJECT(s); +- memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", +- ACPI_CPU_HOTPLUG_REG_LEN); +- sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); +- cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), +- &s->cpuhp_state, 0); ++ mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (mc->possible_cpu_arch_ids) { ++ s->cpuhp.device = OBJECT(s); ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); ++ } + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +-- +2.27.0 + diff --git a/acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch b/acpi-ged-Remove-cpuhp-field-of-ged.patch similarity index 30% rename from acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch rename to acpi-ged-Remove-cpuhp-field-of-ged.patch index f4e0a25c050e7948c1c7e1e2e0721c0b43d27fcc..760ad92d47604afba039502a7ff07598ac3d83fb 100644 --- a/acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch +++ b/acpi-ged-Remove-cpuhp-field-of-ged.patch @@ -1,41 +1,40 @@ -From 3cd6df0b9e7d7b544673ce9a63b405e236d8265b Mon Sep 17 00:00:00 2001 +From 7af2722536b4b0d80f6c508066e8e77158869923 Mon Sep 17 00:00:00 2001 From: Keqian Zhu -Date: Fri, 10 Apr 2020 10:05:54 +0800 -Subject: [PATCH] acpi/ged: Add virt_madt_cpu_entry to madt_cpu hook +Date: Tue, 26 Mar 2024 23:34:01 +0800 +Subject: [PATCH] acpi/ged: Remove cpuhp field of ged -In build_cpus_aml, we will invoke this hook to build _MAT -aml mehtod for cpus. +It's unused. Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta --- - hw/acpi/generic_event_device.c | 1 + - include/hw/acpi/generic_event_device.h | 1 + - 2 files changed, 2 insertions(+) + hw/acpi/generic_event_device.c | 1 - + include/hw/acpi/generic_event_device.h | 1 - + 2 files changed, 2 deletions(-) diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c -index 9cee90cc70..b834ae3ff6 100644 +index 6e4f5f075f..4731a614a3 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c -@@ -288,6 +288,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) - hc->plug = acpi_ged_device_plug_cb; - - adevc->send_event = acpi_ged_send_event; -+ adevc->madt_cpu = virt_madt_cpu_entry; - } - - static const TypeInfo acpi_ged_info = { +@@ -430,7 +430,6 @@ static void acpi_ged_initfn(Object *obj) + + mc = MACHINE_GET_CLASS(qdev_get_machine()); + if (mc->possible_cpu_arch_ids) { +- s->cpuhp.device = OBJECT(s); + memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", + ACPI_CPU_HOTPLUG_REG_LEN); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h -index d157eac088..f99efad7a3 100644 +index a803ea818e..90fc41cbb8 100644 --- a/include/hw/acpi/generic_event_device.h +++ b/include/hw/acpi/generic_event_device.h -@@ -61,6 +61,7 @@ - - #include "hw/sysbus.h" - #include "hw/acpi/memory_hotplug.h" -+#include "hw/arm/virt.h" - - #define ACPI_POWER_BUTTON_DEVICE "PWRB" +@@ -110,7 +110,6 @@ struct AcpiGedState { + MemoryRegion container_memhp; + CPUHotplugState cpuhp_state; + MemoryRegion container_cpuhp; +- AcpiCpuHotplug cpuhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; +-- +2.27.0 --- -2.19.1 diff --git a/acpi-gpex-Fix-PCI-Express-Slot-Information-function-.patch b/acpi-gpex-Fix-PCI-Express-Slot-Information-function-.patch new file mode 100644 index 0000000000000000000000000000000000000000..9238a792eb63cffc05255d675e2a6a84b7dbc3d7 --- /dev/null +++ b/acpi-gpex-Fix-PCI-Express-Slot-Information-function-.patch @@ -0,0 +1,32 @@ +From 237fdc8ddb0598234aace9c88ac4c8387119a12a Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 7 Jul 2022 11:55:25 -0400 +Subject: [PATCH] acpi/gpex: Fix PCI Express Slot Information function 0 + returned value + +At the moment we do not support other function than function 0. +So according to ACPI spec "_DSM (Device Specific Method)" +description, bit 0 should rather be 0, meaning no other function is +supported than function 0. + +Signed-off-by: Eric Auger +--- + hw/pci-host/gpex-acpi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c +index 1092dc3b70..ac5d229757 100644 +--- a/hw/pci-host/gpex-acpi.c ++++ b/hw/pci-host/gpex-acpi.c +@@ -113,7 +113,7 @@ static void acpi_dsdt_add_pci_osc(Aml *dev) + UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); + ifctx = aml_if(aml_equal(aml_arg(0), UUID)); + ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(0))); +- uint8_t byte_list[1] = {1}; ++ uint8_t byte_list[1] = {0}; + buf = aml_buffer(1, byte_list); + aml_append(ifctx1, aml_return(buf)); + aml_append(ifctx, ifctx1); +-- +2.41.0.windows.1 + diff --git a/acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch b/acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch deleted file mode 100644 index 30f210b33b66332cc0c16b5d2ddb706a177a2130..0000000000000000000000000000000000000000 --- a/acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 0288d98f0ef4d17a73cf2bad1b928cd7c044e318 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 13:40:44 +0800 -Subject: [PATCH] acpi/madt: Add pre-sizing capability to MADT GICC struct - -The count of possible CPUs is exposed to guest through the count -of MADT GICC struct, so we should pre-sizing MADT GICC too. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt-acpi-build.c | 26 +++++++++++++++++++++----- - include/hw/acpi/acpi-defs.h | 1 + - 2 files changed, 22 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index dbe9acb148..efac788ba1 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -678,6 +678,13 @@ void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - const MemMapEntry *memmap = vms->memmap; - AcpiMadtGenericCpuInterface *gicc = acpi_data_push(entry, sizeof(*gicc)); - ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(uid)); -+ static bool pmu; -+ -+ if (uid == 0) { -+ pmu = arm_feature(&armcpu->env, ARM_FEATURE_PMU); -+ } -+ /* FEATURE_PMU should be all enabled or disabled for CPUs */ -+ assert(!armcpu || arm_feature(&armcpu->env, ARM_FEATURE_PMU) == pmu); - - gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE; - gicc->length = sizeof(*gicc); -@@ -687,11 +694,15 @@ void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base); - } - gicc->cpu_interface_number = cpu_to_le32(uid); -- gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity); -+ gicc->arm_mpidr = possible_cpus->cpus[uid].arch_id; - gicc->uid = cpu_to_le32(uid); -- gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); -+ if (armcpu) { -+ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); -+ } else { -+ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_DISABLED); -+ } - -- if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { -+ if (pmu) { - gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ)); - } - if (vms->virt) { -@@ -704,12 +715,17 @@ static void - build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - { - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -+ MachineClass *mc = MACHINE_GET_CLASS(vms); -+ MachineState *ms = MACHINE(vms); -+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); - int madt_start = table_data->len; - const MemMapEntry *memmap = vms->memmap; - const int *irqmap = vms->irqmap; - AcpiMultipleApicTable *madt; - AcpiMadtGenericDistributor *gicd; - AcpiMadtGenericMsiFrame *gic_msi; -+ /* The MADT GICC numbers */ -+ int num_cpu = vms->smp_cpus; - int i; - - madt = acpi_data_push(table_data, sizeof *madt); -@@ -720,8 +736,8 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base); - gicd->version = vms->gic_version; - -- for (i = 0; i < vms->smp_cpus; i++) { -- virt_madt_cpu_entry(NULL, i, NULL, table_data); -+ for (i = 0; i < num_cpu; i++) { -+ virt_madt_cpu_entry(NULL, i, possible_cpus, table_data); - } - - if (vms->gic_version == 3) { -diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h -index 39ae91d3b8..6bfa7f9152 100644 ---- a/include/hw/acpi/acpi-defs.h -+++ b/include/hw/acpi/acpi-defs.h -@@ -306,6 +306,7 @@ typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface; - - /* GICC CPU Interface Flags */ - #define ACPI_MADT_GICC_ENABLED 1 -+#define ACPI_MADT_GICC_DISABLED 0 - - struct AcpiMadtGenericDistributor { - ACPI_SUB_HEADER_DEF --- -2.19.1 diff --git a/acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch b/acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch deleted file mode 100644 index 6bda35c51a3952121a155081bf0a37ce3534da25..0000000000000000000000000000000000000000 --- a/acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch +++ /dev/null @@ -1,108 +0,0 @@ -From a3097eed8b642dc6fe891112340821e869b90cc2 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Mon, 13 Jan 2020 19:02:20 +0800 -Subject: [PATCH] acpi/madt: Factor out the building of MADT GICC struct - -To realize CPU hotplug, the cpus aml within ACPI DSDT should contain -_MAT mathod, which is equal to the GICC struct in ACPI MADT. Factor -out the GICC building code from ACPI MADT and reuse it in build_cpus_aml. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt-acpi-build.c | 51 +++++++++++++++++++++++----------------- - include/hw/arm/virt.h | 3 +++ - 2 files changed, 32 insertions(+), 22 deletions(-) - -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index f48733d9f2..4b6aace433 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -664,6 +664,34 @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - table_data->len - gtdt_start, 2, NULL, NULL); - } - -+void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, -+ const CPUArchIdList *possible_cpus, GArray *entry) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine()); -+ const MemMapEntry *memmap = vms->memmap; -+ AcpiMadtGenericCpuInterface *gicc = acpi_data_push(entry, sizeof(*gicc)); -+ ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(uid)); -+ -+ gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE; -+ gicc->length = sizeof(*gicc); -+ if (vms->gic_version == 2) { -+ gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base); -+ gicc->gich_base_address = cpu_to_le64(memmap[VIRT_GIC_HYP].base); -+ gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base); -+ } -+ gicc->cpu_interface_number = cpu_to_le32(uid); -+ gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity); -+ gicc->uid = cpu_to_le32(uid); -+ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); -+ -+ if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { -+ gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ)); -+ } -+ if (vms->virt) { -+ gicc->vgic_interrupt = cpu_to_le32(PPI(ARCH_GIC_MAINT_IRQ)); -+ } -+} -+ - /* MADT */ - static void - build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) -@@ -686,28 +714,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - gicd->version = vms->gic_version; - - for (i = 0; i < vms->smp_cpus; i++) { -- AcpiMadtGenericCpuInterface *gicc = acpi_data_push(table_data, -- sizeof(*gicc)); -- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); -- -- gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE; -- gicc->length = sizeof(*gicc); -- if (vms->gic_version == 2) { -- gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base); -- gicc->gich_base_address = cpu_to_le64(memmap[VIRT_GIC_HYP].base); -- gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base); -- } -- gicc->cpu_interface_number = cpu_to_le32(i); -- gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity); -- gicc->uid = cpu_to_le32(i); -- gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); -- -- if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { -- gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ)); -- } -- if (vms->virt) { -- gicc->vgic_interrupt = cpu_to_le32(PPI(ARCH_GIC_MAINT_IRQ)); -- } -+ virt_madt_cpu_entry(NULL, i, NULL, table_data); - } - - if (vms->gic_version == 3) { -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 3dfefca93b..6b1f10b231 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -37,6 +37,7 @@ - #include "hw/block/flash.h" - #include "sysemu/kvm.h" - #include "hw/intc/arm_gicv3_common.h" -+#include "hw/acpi/acpi_dev_interface.h" - - #define NUM_GICV2M_SPIS 64 - #define NUM_VIRTIO_TRANSPORTS 32 -@@ -154,6 +155,8 @@ typedef struct { - OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) - - void virt_acpi_setup(VirtMachineState *vms); -+void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, -+ const CPUArchIdList *cpu_list, GArray *entry); - - /* Return the number of used redistributor regions */ - static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) --- -2.19.1 diff --git a/acpi-tests-avocado-bits-wait-for-200-seconds-for-SHU.patch b/acpi-tests-avocado-bits-wait-for-200-seconds-for-SHU.patch new file mode 100644 index 0000000000000000000000000000000000000000..a918621470fcfb17b5fb52885232992fe02715da --- /dev/null +++ b/acpi-tests-avocado-bits-wait-for-200-seconds-for-SHU.patch @@ -0,0 +1,66 @@ +From 1f6dde2350209e937a5676c6775d1500136caea2 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Mon, 18 Nov 2024 13:48:37 +0800 +Subject: [PATCH] acpi/tests/avocado/bits: wait for 200 seconds for SHUTDOWN + event from bits VM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 7ef4c41e91d59d72a3b8bc022a6cb3e81787a50a + +By default, the timeout to receive any specified event from the QEMU VM is 60 +seconds set by the python avocado test framework. Please see event_wait() and +events_wait() in python/qemu/machine/machine.py. If the matching event is not +triggered within that interval, an asyncio.TimeoutError is generated. Since the +timeout for the bits avocado test is 200 secs, we need to make event_wait() +timeout of the same value as well so that an early timeout is not triggered by +the avocado framework. + +CC: peter.maydell@linaro.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2077 +Signed-off-by: Ani Sinha +Reviewed-by: Daniel P. Berrangé +Message-id: 20240117042556.3360190-1-anisinha@redhat.com +Signed-off-by: Peter Maydell +Signed-off-by: gubin +--- + tests/avocado/acpi-bits.py | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py +index 68b9e98d4e..efe4f52ee0 100644 +--- a/tests/avocado/acpi-bits.py ++++ b/tests/avocado/acpi-bits.py +@@ -54,6 +54,8 @@ + deps = ["xorriso", "mformat"] # dependent tools needed in the test setup/box. + supported_platforms = ['x86_64'] # supported test platforms. + ++# default timeout of 120 secs is sometimes not enough for bits test. ++BITS_TIMEOUT = 200 + + def which(tool): + """ looks up the full path for @tool, returns None if not found +@@ -133,7 +135,7 @@ class AcpiBitsTest(QemuBaseTest): #pylint: disable=too-many-instance-attributes + + """ + # in slower systems the test can take as long as 3 minutes to complete. +- timeout = 200 ++ timeout = BITS_TIMEOUT + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) +@@ -400,7 +402,8 @@ def test_acpi_smbios_bits(self): + + # biosbits has been configured to run all the specified test suites + # in batch mode and then automatically initiate a vm shutdown. +- # Rely on avocado's unit test timeout. +- self._vm.event_wait('SHUTDOWN') ++ # Set timeout to BITS_TIMEOUT for SHUTDOWN event from bits VM at par ++ # with the avocado test timeout. ++ self._vm.event_wait('SHUTDOWN', timeout=BITS_TIMEOUT) + self._vm.wait(timeout=None) + self.parse_log() +-- +2.41.0.windows.1 + diff --git a/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch b/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch deleted file mode 100644 index 7926e7fa0db4ef16737a89782ea661273ef8c4d3..0000000000000000000000000000000000000000 --- a/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 929d29ec7bf9dd6ec3802bea2148a041ff30d59b Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 14 Apr 2020 21:17:09 +0800 -Subject: [PATCH] aio-wait: delegate polling of main AioContext if BQL not held - -Any thread that is not a iothread returns NULL for qemu_get_current_aio_context(). -As a result, it would also return true for -in_aio_context_home_thread(qemu_get_aio_context()), causing -AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect -if the BQL is not held, because aio_poll() does not expect to -run concurrently from multiple threads, and it can actually -happen when savevm writes to the vmstate file from the -migration thread. - -Therefore, restrict in_aio_context_home_thread to return true -for the main AioContext only if the BQL is held. - -The function is moved to aio-wait.h because it is mostly used -there and to avoid a circular reference between main-loop.h -and block/aio.h. - -Signed-off-by: Paolo Bonzini -Message-Id: <20200407140746.8041-5-pbonzini@redhat.com> -Signed-off-by: Stefan Hajnoczi ---- - include/block/aio-wait.h | 22 ++++++++++++++++++++++ - include/block/aio.h | 29 ++++++++++------------------- - 2 files changed, 32 insertions(+), 19 deletions(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index afeeb18f..716d2639 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -26,6 +26,7 @@ - #define QEMU_AIO_WAIT_H - - #include "block/aio.h" -+#include "qemu/main-loop.h" - - /** - * AioWait: -@@ -124,4 +125,25 @@ void aio_wait_kick(void); - */ - void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); - -+/** -+ * in_aio_context_home_thread: -+ * @ctx: the aio context -+ * -+ * Return whether we are running in the thread that normally runs @ctx. Note -+ * that acquiring/releasing ctx does not affect the outcome, each AioContext -+ * still only has one home thread that is responsible for running it. -+ */ -+static inline bool in_aio_context_home_thread(AioContext *ctx) -+{ -+ if (ctx == qemu_get_current_aio_context()) { -+ return true; -+ } -+ -+ if (ctx == qemu_get_aio_context()) { -+ return qemu_mutex_iothread_locked(); -+ } else { -+ return false; -+ } -+} -+ - #endif /* QEMU_AIO_WAIT_H */ -diff --git a/include/block/aio.h b/include/block/aio.h -index 6b0d52f7..9d28e247 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -60,12 +60,16 @@ struct AioContext { - QLIST_HEAD(, AioHandler) aio_handlers; - - /* Used to avoid unnecessary event_notifier_set calls in aio_notify; -- * accessed with atomic primitives. If this field is 0, everything -- * (file descriptors, bottom halves, timers) will be re-evaluated -- * before the next blocking poll(), thus the event_notifier_set call -- * can be skipped. If it is non-zero, you may need to wake up a -- * concurrent aio_poll or the glib main event loop, making -- * event_notifier_set necessary. -+ * only written from the AioContext home thread, or under the BQL in -+ * the case of the main AioContext. However, it is read from any -+ * thread so it is still accessed with atomic primitives. -+ * -+ * If this field is 0, everything (file descriptors, bottom halves, -+ * timers) will be re-evaluated before the next blocking poll() or -+ * io_uring wait; therefore, the event_notifier_set call can be -+ * skipped. If it is non-zero, you may need to wake up a concurrent -+ * aio_poll or the glib main event loop, making event_notifier_set -+ * necessary. - * - * Bit 0 is reserved for GSource usage of the AioContext, and is 1 - * between a call to aio_ctx_prepare and the next call to aio_ctx_check. -@@ -580,19 +584,6 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); - */ - AioContext *qemu_get_current_aio_context(void); - --/** -- * in_aio_context_home_thread: -- * @ctx: the aio context -- * -- * Return whether we are running in the thread that normally runs @ctx. Note -- * that acquiring/releasing ctx does not affect the outcome, each AioContext -- * still only has one home thread that is responsible for running it. -- */ --static inline bool in_aio_context_home_thread(AioContext *ctx) --{ -- return ctx == qemu_get_current_aio_context(); --} -- - /** - * aio_context_setup: - * @ctx: the aio context --- -2.23.0 diff --git a/apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch deleted file mode 100644 index 4a96fc5ce1372f0e2f59ab9019cf8d72e0ee7bee..0000000000000000000000000000000000000000 --- a/apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 3bdd21c4b7d80cacc6b5f1b26ab52ef3a0aceb06 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 16 Oct 2019 10:29:32 +0800 -Subject: [PATCH 7/8] apic: Use 32bit APIC ID for migration instance ID - -Migration is silently broken now with x2apic config like this: - - -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ - -device intel-iommu,intremap=on,eim=on - -After migration, the guest kernel could hang at anything, due to -x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so -any operations related to x2apic could be broken then (e.g., RDMSR on -x2apic MSRs could fail because KVM would think that the vcpu hasn't -enabled x2apic at all). - -The issue is that the x2apic bit was never applied correctly for vcpus -whose ID > 255 when migrate completes, and that's because when we -migrate APIC we use the APICCommonState.id as instance ID of the -migration stream, while that's too short for x2apic. - -Let's use the newly introduced initial_apic_id for that. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: Eduardo Habkost -Signed-off-by: Juan Quintela ---- - hw/intc/apic_common.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index 07adba0..2c0cb1e 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -313,7 +313,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- uint32_t instance_id = s->id; -+ uint32_t instance_id = s->initial_apic_id; -+ -+ /* Normally initial APIC ID should be no more than hundreds */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); --- -1.8.3.1 - diff --git a/arm-VirtCCA-CVM-support-UEFI-boot.patch b/arm-VirtCCA-CVM-support-UEFI-boot.patch new file mode 100644 index 0000000000000000000000000000000000000000..37af6303f4801f9f1fb9735e4fc6d6772db66459 --- /dev/null +++ b/arm-VirtCCA-CVM-support-UEFI-boot.patch @@ -0,0 +1,189 @@ +From 9eacd1a6df6861b76663e98133adb15059bf65cc Mon Sep 17 00:00:00 2001 +From: gongchangsui +Date: Mon, 17 Mar 2025 02:40:50 -0400 +Subject: [PATCH] arm: VirtCCA: CVM support UEFI boot + +1. Add UEFI boot support for Confidential VMs. +2. Modify the base memory address of Confidential VMs from 3GB to 1GB. +3. Disable pflash boot support for Confidential VMs; use the`-bios`option to specify`QEMU_EFI.fd`during launch. + +Signed-off-by: gongchangsui +--- + hw/arm/boot.c | 38 ++++++++++++++++++++++++++++++++++++-- + hw/arm/virt.c | 33 ++++++++++++++++++++++++++++++++- + include/hw/arm/boot.h | 3 +++ + 3 files changed, 71 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 42110b0f18..6b2f46af4d 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -43,6 +43,9 @@ + + #define BOOTLOADER_MAX_SIZE (4 * KiB) + ++#define UEFI_MAX_SIZE 0x8000000 ++#define UEFI_LOADER_START 0x0 ++#define DTB_MAX 0x200000 + AddressSpace *arm_boot_address_space(ARMCPU *cpu, + const struct arm_boot_info *info) + { +@@ -1155,7 +1158,31 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + } + } + +-static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info) ++static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, ++ struct arm_boot_info *info, ++ const char *firmware_filename) ++{ ++ ssize_t fw_size; ++ const char *fname; ++ AddressSpace *as = arm_boot_address_space(cpu, info); ++ ++ fname = qemu_find_file(QEMU_FILE_TYPE_BIOS, firmware_filename); ++ if (!fname) { ++ error_report("Could not find firmware image '%s'", firmware_filename); ++ exit(EXIT_FAILURE); ++ } ++ ++ fw_size = load_image_targphys_as(firmware_filename, ++ info->firmware_base, ++ info->firmware_max_size, as); ++ ++ if (fw_size <= 0) { ++ error_report("could not load firmware '%s'", firmware_filename); ++ exit(EXIT_FAILURE); ++ } ++} ++ ++static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, const char *firmware_filename) + { + /* Set up for booting firmware (which might load a kernel via fw_cfg) */ + +@@ -1166,6 +1193,8 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info) + * DTB to the base of RAM for the bootloader to pick up. + */ + info->dtb_start = info->loader_start; ++ if (info->confidential) ++ tmm_add_ram_region(UEFI_LOADER_START, UEFI_MAX_SIZE, info->dtb_start, DTB_MAX , true); + } + + if (info->kernel_filename) { +@@ -1206,6 +1235,11 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info) + } + } + ++ if (info->confidential) { ++ arm_setup_confidential_firmware_boot(cpu, info, firmware_filename); ++ kvm_load_user_data(UEFI_LOADER_START, UEFI_MAX_SIZE, info->loader_start, info->loader_start + DTB_MAX, info->ram_size, ++ (struct kvm_numa_info *)info->numa_info); ++ } + /* + * We will start from address 0 (typically a boot ROM image) in the + * same way as hardware. Leave env->boot_info NULL, so that +@@ -1282,7 +1316,7 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) + + /* Load the kernel. */ + if (!info->kernel_filename || info->firmware_loaded) { +- arm_setup_firmware_boot(cpu, info); ++ arm_setup_firmware_boot(cpu, info, ms->firmware); + } else { + arm_setup_direct_kernel_boot(cpu, info); + } +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8823f2ed1c..6ffb26e7e6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1398,6 +1398,9 @@ static void virt_flash_map1(PFlashCFI01 *flash, + qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + ++ if (virtcca_cvm_enabled()) { ++ return; ++ } + memory_region_add_subregion(sysmem, base, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), + 0)); +@@ -1433,6 +1436,10 @@ static void virt_flash_fdt(VirtMachineState *vms, + MachineState *ms = MACHINE(vms); + char *nodename; + ++ if (virtcca_cvm_enabled()) { ++ return; ++ } ++ + if (sysmem == secure_sysmem) { + /* Report both flash devices as a single node in the DT */ + nodename = g_strdup_printf("/flash@%" PRIx64, flashbase); +@@ -1468,6 +1475,23 @@ static void virt_flash_fdt(VirtMachineState *vms, + } + } + ++static bool virt_confidential_firmware_init(VirtMachineState *vms, ++ MemoryRegion *sysmem) ++{ ++ MemoryRegion *fw_ram; ++ hwaddr fw_base = vms->memmap[VIRT_FLASH].base; ++ hwaddr fw_size = vms->memmap[VIRT_FLASH].size; ++ ++ if (!MACHINE(vms)->firmware) { ++ return false; ++ } ++ ++ fw_ram = g_new(MemoryRegion, 1); ++ memory_region_init_ram(fw_ram, NULL, "fw_ram", fw_size, NULL); ++ memory_region_add_subregion(sysmem, fw_base, fw_ram); ++ return true; ++} ++ + static bool virt_firmware_init(VirtMachineState *vms, + MemoryRegion *sysmem, + MemoryRegion *secure_sysmem) +@@ -1486,6 +1510,10 @@ static bool virt_firmware_init(VirtMachineState *vms, + + pflash_blk0 = pflash_cfi01_get_blk(vms->flash[0]); + ++ if (virtcca_cvm_enabled()) { ++ return virt_confidential_firmware_init(vms, sysmem); ++ } ++ + bios_name = MACHINE(vms)->firmware; + if (bios_name) { + char *fname; +@@ -2023,7 +2051,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + vms->memmap[VIRT_PCIE_MMIO] = (MemMapEntry) { 0x10000000, 0x2edf0000 }; + vms->memmap[VIRT_KAE_DEVICE] = (MemMapEntry) { 0x3edf0000, 0x00200000 }; + +- vms->memmap[VIRT_MEM].base = 3 * GiB; ++ vms->memmap[VIRT_MEM].base = 1 * GiB; + vms->memmap[VIRT_MEM].size = ms->ram_size; + info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base), + (unsigned long long)(vms->memmap[VIRT_MEM].base + ms->ram_size)); +@@ -2822,6 +2850,9 @@ static void machvirt_init(MachineState *machine) + vms->bootinfo.get_dtb = machvirt_dtb; + vms->bootinfo.skip_dtb_autoload = true; + vms->bootinfo.firmware_loaded = firmware_loaded; ++ vms->bootinfo.firmware_base = vms->memmap[VIRT_FLASH].base; ++ vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size; ++ vms->bootinfo.confidential = virtcca_cvm_enabled(); + vms->bootinfo.psci_conduit = vms->psci_conduit; + arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); + +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 4491b1f85b..06ca1d90b2 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -133,6 +133,9 @@ struct arm_boot_info { + bool secure_board_setup; + + arm_endianness endianness; ++ hwaddr firmware_base; ++ hwaddr firmware_max_size; ++ bool confidential; + }; + + /** +-- +2.41.0.windows.1 + diff --git a/arm-VirtCCA-Compatibility-with-older-versions-of-TMM.patch b/arm-VirtCCA-Compatibility-with-older-versions-of-TMM.patch new file mode 100644 index 0000000000000000000000000000000000000000..6141b8399a91cd3233d3ecc7bea3316756060268 --- /dev/null +++ b/arm-VirtCCA-Compatibility-with-older-versions-of-TMM.patch @@ -0,0 +1,117 @@ +From 5ed17a43a4cc7fc76397d6d8cad8246063b5b2f3 Mon Sep 17 00:00:00 2001 +From: gongchangsui +Date: Mon, 17 Mar 2025 02:43:55 -0400 +Subject: [PATCH] arm: VirtCCA: Compatibility with older versions of TMM and + the kernel + +Since the base memory address of Confidential VMs in QEMU was changed +from 3GB to 1GB, corresponding adjustments are required in both the TMM +and kernel components. To maintain backward compatibility, the following +modifications were implemented: + 1. **TMM Versioning**: The TMM version number was incremented to +reflect the update + 2. **Kernel Interface**: A new interface was exposed in the kernel +to retrieve the TMM version number. + 3. **QEMU Compatibility Logic**: During initialization, QEMU checks +the TMM version via the kernel interface. If the TMM version is**<2.1**(legacy), +QEMU sets the Confidential VM's base memory address to**3GB**. For TMM versions +**2.1**(updated), the address is configured to**1GB**to align with the new memory layout +This approach ensures seamless backward compatibility while transitioning +to the revised memory addressing scheme. + +Signed-off-by: gongchangsui +--- + accel/kvm/kvm-all.c | 3 +-- + hw/arm/boot.c | 9 +++++++++ + hw/arm/virt.c | 9 +++++++-- + linux-headers/asm-arm64/kvm.h | 2 ++ + linux-headers/linux/kvm.h | 3 +++ + 5 files changed, 22 insertions(+), 4 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index a8e29f148e..38a48cc031 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2390,6 +2390,7 @@ static int kvm_init(MachineState *ms) + qemu_mutex_init(&kml_slots_lock); + + s = KVM_STATE(ms->accelerator); ++ kvm_state = s; + + /* + * On systems where the kernel can support different base page +@@ -2609,8 +2610,6 @@ static int kvm_init(MachineState *ms) + #endif + } + +- kvm_state = s; +- + ret = kvm_arch_init(ms, s); + if (ret < 0) { + goto err; +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 6b2f46af4d..ca9f69fd3d 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -1162,6 +1162,15 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, + struct arm_boot_info *info, + const char *firmware_filename) + { ++ uint64_t tmi_version = 0; ++ if (kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version) < 0) { ++ error_report("please check the kernel version!"); ++ exit(EXIT_FAILURE); ++ } ++ if (tmi_version < MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM) { ++ error_report("please check the tmi version!"); ++ exit(EXIT_FAILURE); ++ } + ssize_t fw_size; + const char *fname; + AddressSpace *as = arm_boot_address_space(cpu, info); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6ffb26e7e6..39dfec0877 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2050,8 +2050,13 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + /* support kae vf device tree nodes */ + vms->memmap[VIRT_PCIE_MMIO] = (MemMapEntry) { 0x10000000, 0x2edf0000 }; + vms->memmap[VIRT_KAE_DEVICE] = (MemMapEntry) { 0x3edf0000, 0x00200000 }; +- +- vms->memmap[VIRT_MEM].base = 1 * GiB; ++ uint64_t tmi_version = 0; ++ if (kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version) < 0) { ++ warn_report("can not get tmi version"); ++ } ++ if (tmi_version < MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM) { ++ vms->memmap[VIRT_MEM].base = 3 * GiB; ++ } + vms->memmap[VIRT_MEM].size = ms->ram_size; + info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base), + (unsigned long long)(vms->memmap[VIRT_MEM].base + ms->ram_size)); +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index 552fdcb18f..d69a71cbec 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -597,4 +597,6 @@ struct kvm_cap_arm_tmm_populate_region_args { + + #endif + ++#define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001 ++ + #endif /* __ARM_KVM_H__ */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 84cec64b88..7a08f9b1e9 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2422,4 +2422,7 @@ struct kvm_s390_zpci_op { + /* flags for kvm_s390_zpci_op->u.reg_aen.flags */ + #define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + ++/* get tmi version */ ++#define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, uint64_t) ++ + #endif /* __LINUX_KVM_H */ +-- +2.41.0.windows.1 + diff --git a/arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch b/arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch new file mode 100644 index 0000000000000000000000000000000000000000..b28cd3ebe0c8edc32bdb833887d0eee042e407f0 --- /dev/null +++ b/arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch @@ -0,0 +1,30 @@ +From 3f2e953c7faf3043396a649d4891d3d95441e70f Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 17:06:57 +0800 +Subject: [PATCH 3/4] arm: VirtCCA: fix arm-softmmu build on x86 platform + +Add stub function for kvm_load_user_data(). + +Fixes: 9eacd1a6df68 ("arm: VirtCCA: CVM support UEFI boot") +Signed-off-by: Jason Zeng +--- + accel/stubs/kvm-stub.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index 2625175b99..e68f3433ad 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -133,3 +133,9 @@ uint32_t kvm_dirty_ring_size(void) + { + return 0; + } ++ ++int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, ++ struct kvm_numa_info *numa_info) ++{ ++ return -ENOSYS; ++} +-- +2.33.0 + diff --git a/arm-VirtCCA-qemu-CoDA-support-UEFI-boot.patch b/arm-VirtCCA-qemu-CoDA-support-UEFI-boot.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d412930f90820a89c7a2b0a73744aa5882b94b2 --- /dev/null +++ b/arm-VirtCCA-qemu-CoDA-support-UEFI-boot.patch @@ -0,0 +1,137 @@ +From 0119389040e4d78c6238875b812827d4f07b5f0f Mon Sep 17 00:00:00 2001 +From: gongchangsui +Date: Mon, 17 Mar 2025 02:51:16 -0400 +Subject: [PATCH] arm: VirtCCA: qemu CoDA support UEFI boot + +1. Expose PCIe MMIO region from QEMU memory map. +2. Refactor struct kvm_user_data data_start and data_size represent +the address base and size of the MMIO in UEFI boot modedata_start +and data_size represent the address base and size of the DTB in direct boot mode. + +Signed-off-by: gongchangsui +--- + accel/kvm/kvm-all.c | 8 ++++---- + hw/arm/boot.c | 10 ++++++---- + hw/arm/virt.c | 6 ++++++ + linux-headers/linux/kvm.h | 12 +++++++++--- + target/arm/kvm_arm.h | 2 ++ + 5 files changed, 27 insertions(+), 11 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 38a48cc031..57c6718b77 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -3527,7 +3527,7 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) + return r; + } + +-int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, ++int kvm_load_user_data(hwaddr loader_start, hwaddr dtb_info, hwaddr data_start, hwaddr data_size, hwaddr ram_size, + struct kvm_numa_info *numa_info) + { + KVMState *state = kvm_state; +@@ -3535,9 +3535,9 @@ int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_star + int ret; + + data.loader_start = loader_start; +- data.image_end = image_end; +- data.initrd_start = initrd_start; +- data.dtb_end = dtb_end; ++ data.dtb_info = dtb_info; ++ data.data_start = data_start; ++ data.data_size = data_size; + data.ram_size = ram_size; + memcpy(&data.numa_info, numa_info, sizeof(struct kvm_numa_info)); + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index ca9f69fd3d..a3e0dbb68c 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -1149,10 +1149,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + + if (kvm_enabled() && virtcca_cvm_enabled()) { + if (info->dtb_limit == 0) { +- info->dtb_limit = info->dtb_start + 0x200000; ++ info->dtb_limit = info->dtb_start + DTB_MAX; + } +- kvm_load_user_data(info->loader_start, image_high_addr, info->initrd_start, +- info->dtb_limit, info->ram_size, (struct kvm_numa_info *)info->numa_info); ++ kvm_load_user_data(info->loader_start, 0x1, info->dtb_start, ++ info->dtb_limit - info->dtb_start, info->ram_size, (struct kvm_numa_info *)info->numa_info); + tmm_add_ram_region(info->loader_start, image_high_addr - info->loader_start, + info->initrd_start, info->dtb_limit - info->initrd_start, true); + } +@@ -1193,6 +1193,7 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, + + static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, const char *firmware_filename) + { ++ hwaddr mmio_start, mmio_size; + /* Set up for booting firmware (which might load a kernel via fw_cfg) */ + + if (have_dtb(info)) { +@@ -1246,7 +1247,8 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, con + + if (info->confidential) { + arm_setup_confidential_firmware_boot(cpu, info, firmware_filename); +- kvm_load_user_data(UEFI_LOADER_START, UEFI_MAX_SIZE, info->loader_start, info->loader_start + DTB_MAX, info->ram_size, ++ virtcca_kvm_get_mmio_addr(&mmio_start, &mmio_size); ++ kvm_load_user_data(info->loader_start, DTB_MAX, mmio_start, mmio_size, info->ram_size, + (struct kvm_numa_info *)info->numa_info); + } + /* +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 39dfec0877..6c5611826c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -176,6 +176,12 @@ static const MemMapEntry base_memmap[] = { + [VIRT_MEM] = { GiB, LEGACY_RAMLIMIT_BYTES }, + }; + ++void virtcca_kvm_get_mmio_addr(hwaddr *mmio_start, hwaddr *mmio_size) ++{ ++ *mmio_start = base_memmap[VIRT_PCIE_MMIO].base; ++ *mmio_size = base_memmap[VIRT_PCIE_MMIO].size; ++} ++ + /* + * Highmem IO Regions: This memory map is floating, located after the RAM. + * Each MemMapEntry base (GPA) will be dynamically computed, depending on the +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 7a08f9b1e9..c9ec7f862a 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1510,9 +1510,15 @@ struct kvm_numa_info { + + struct kvm_user_data { + __u64 loader_start; +- __u64 image_end; +- __u64 initrd_start; +- __u64 dtb_end; ++ /* ++ * When the lowest bit of dtb_info is 0, the value of dtb_info represents the size of the DTB, ++ * and data_start and data_size represent the address base and size of the MMIO. ++ * When the lowest bit of dtb_info is 1, data_start and data_size represent the address base ++ * and size of the DTB. ++ */ ++ __u64 dtb_info; ++ __u64 data_start; ++ __u64 data_size; + __u64 ram_size; + struct kvm_numa_info numa_info; + }; +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 31457a57f7..62fbb713f4 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -73,6 +73,8 @@ int kvm_arm_vcpu_finalize(CPUState *cs, int feature); + void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + uint64_t attr, int dev_fd, uint64_t addr_ormask); + ++void virtcca_kvm_get_mmio_addr(hwaddr *mmio_start, hwaddr *mmio_size); ++ + /** + * kvm_arm_init_cpreg_list: + * @cpu: ARMCPU +-- +2.41.0.windows.1 + diff --git a/arm-VirtCCA-qemu-uefi-boot-support-kae.patch b/arm-VirtCCA-qemu-uefi-boot-support-kae.patch new file mode 100644 index 0000000000000000000000000000000000000000..399b2dfe7003c3ef179eb4f1e345a0103482cd76 --- /dev/null +++ b/arm-VirtCCA-qemu-uefi-boot-support-kae.patch @@ -0,0 +1,100 @@ +From 5bffeb311c969a0e05106e4bf54282431c5ba907 Mon Sep 17 00:00:00 2001 +From: gongchangsui +Date: Mon, 17 Mar 2025 02:42:43 -0400 +Subject: [PATCH] arm: VirtCCA: qemu uefi boot support kae + +This commit introduces modifications to enable KAE functionality +during UEFI boot in cVMs. Additionally,the ACPI feature must be +configured in cVM. + +Signed-off-by: gongchangsui +--- + hw/arm/virt-acpi-build.c | 58 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 58 insertions(+) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 076781423b..f78331d69f 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -58,6 +58,7 @@ + #include "migration/vmstate.h" + #include "hw/acpi/ghes.h" + #include "hw/acpi/viot.h" ++#include "kvm_arm.h" + + #define ARM_SPI_BASE 32 + +@@ -405,6 +406,54 @@ static void acpi_dsdt_add_virtio(Aml *scope, + } + } + ++static void acpi_dsdt_add_hisi_sec(Aml *scope, ++ const MemMapEntry *virtio_mmio_memmap, ++ int dev_id) ++{ ++ hwaddr size = 0x10000; ++ ++ /* ++ * Calculate the base address for the sec device node. ++ * Each device group contains one sec device and one hpre device,spaced by 2 * size. ++ */ ++ hwaddr base = virtio_mmio_memmap->base + dev_id * 2 * size; ++ ++ Aml *dev = aml_device("SE%02u", dev_id); ++ aml_append(dev, aml_name_decl("_HID", aml_string("SEC07"))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(dev_id))); ++ aml_append(dev, aml_name_decl("_CCA", aml_int(1))); ++ ++ Aml *crs = aml_resource_template(); ++ ++ aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ aml_append(scope, dev); ++} ++ ++static void acpi_dsdt_add_hisi_hpre(Aml *scope, ++ const MemMapEntry *virtio_mmio_memmap, ++ int dev_id) ++{ ++ hwaddr size = 0x10000; ++ ++ /* ++ * Calculate the base address for the hpre device node. ++ * Each hpre device follows the corresponding sec device by an additional offset of size. ++ */ ++ hwaddr base = virtio_mmio_memmap->base + dev_id * 2 * size + size; ++ ++ Aml *dev = aml_device("HP%02u", dev_id); ++ aml_append(dev, aml_name_decl("_HID", aml_string("HPRE07"))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(dev_id))); ++ aml_append(dev, aml_name_decl("_CCA", aml_int(1))); ++ ++ Aml *crs = aml_resource_template(); ++ ++ aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ aml_append(scope, dev); ++} ++ + static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, + uint32_t irq, VirtMachineState *vms) + { +@@ -1201,6 +1250,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO], + (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS); + acpi_dsdt_add_pci(scope, memmap, irqmap[VIRT_PCIE] + ARM_SPI_BASE, vms); ++ ++ if (virtcca_cvm_enabled()) { ++ int kae_num = tmm_get_kae_num(); ++ for (int i = 0; i < kae_num; i++) { ++ acpi_dsdt_add_hisi_sec(scope, &memmap[VIRT_KAE_DEVICE], i); ++ acpi_dsdt_add_hisi_hpre(scope, &memmap[VIRT_KAE_DEVICE], i); ++ } ++ } ++ + if (vms->acpi_dev) { + build_ged_aml(scope, "\\_SB."GED_DEVICE, + HOTPLUG_HANDLER(vms->acpi_dev), +-- +2.41.0.windows.1 + diff --git a/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch b/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..0296a6428ac4fb1666e518b9ec80ca20f08fc8a6 --- /dev/null +++ b/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch @@ -0,0 +1,51 @@ +From 37aab238363c8242aa76853396c4f272b5508bca Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 8 Jun 2020 15:25:35 +0100 +Subject: [PATCH] arm/acpi: Enable ACPI support for vcpu hotplug + +ACPI is required to interface QEMU with the guest. Roughly falls into below +cases, + +1. Convey the possible vcpus config at the machine init time to the guest + using various DSDT tables like MADT etc. +2. Convey vcpu hotplug events to guest(using GED) +3. Assist in evaluation of various ACPI methods(like _EVT, _STA, _OST, _EJ0, + _MAT etc.) +4. Provides ACPI cpu hotplug state and 12 Byte memory mapped cpu hotplug + control register interface to the OSPM/guest corresponding to each possible + vcpu. The register interface consists of various R/W fields and their + handling operations. These are called when ever register fields or memory + regions are accessed(i.e. read or written) by OSPM when ever it evaluates + various ACPI methods. + +Note: lot of this framework code is inherited from the changes already done for + x86 but still some minor changes are required to make it compatible with + ARM64.) + +This patch enables the ACPI support for virtual cpu hotplug. ACPI changes +required will follow in subsequent patches. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 3ada335a24..c0a7d0bd58 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -29,6 +29,7 @@ config ARM_VIRT + select ACPI_HW_REDUCED + select ACPI_APEI + select ACPI_VIOT ++ select ACPI_CPU_HOTPLUG + select VIRTIO_MEM_SUPPORTED + select ACPI_CXL + select ACPI_HMAT +-- +2.27.0 + diff --git a/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch b/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch new file mode 100644 index 0000000000000000000000000000000000000000..1dac436c9f2e12709c9f97fe198c9ffd7115265e --- /dev/null +++ b/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch @@ -0,0 +1,98 @@ +From d269fb9a41abf5888a9bfeec2f8d1684b2d4dfb0 Mon Sep 17 00:00:00 2001 +From: saarloos <9090-90-90-9090@163.com> +Date: Sat, 30 Mar 2024 21:32:27 +0800 +Subject: [PATCH] arm/acpi: Fix when make qemu-system-aarch64 at x86_64 host + bios_tables_test fail reason: __aarch64__ macro let build_pptt at x86_64 and + aarch64 host build different function that let bios_tables_test fail. + +Signed-off-by: Yangzi Zhang +Signed-off-by: Yuan Zhang +--- + hw/acpi/aml-build.c | 5 +---- + hw/arm/virt-acpi-build.c | 2 +- + include/hw/acpi/aml-build.h | 5 +++-- + 3 files changed, 5 insertions(+), 7 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 714498165a..bf9c59f544 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -2016,7 +2016,6 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + +-#ifdef __aarch64__ + /* + * ACPI spec, Revision 6.3 + * 5.2.29.2 Cache Type Structure (Type 1) +@@ -2072,7 +2071,7 @@ static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) + */ +-void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -2172,7 +2171,6 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + acpi_table_end(linker, &table); + } + +-#else + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +@@ -2248,7 +2246,6 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + + acpi_table_end(linker, &table); + } +-#endif + + /* build rev1/rev3/rev5.1/rev6.0 FADT */ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 3cb50bdc65..48fc77fb83 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -1024,7 +1024,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) + + if (!vmc->no_cpu_topology) { + acpi_add_table(table_offsets, tables_blob); +- build_pptt(tables_blob, tables->linker, ms, ++ build_pptt_arm(tables_blob, tables->linker, ms, + vms->oem_id, vms->oem_table_id); + } + +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 200cb113de..7281c281f6 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -221,7 +221,6 @@ struct AcpiBuildTables { + BIOSLinker *linker; + } AcpiBuildTables; + +-#ifdef __aarch64__ + /* Definitions of the hardcoded cache info*/ + + typedef enum { +@@ -266,7 +265,6 @@ struct offset_status { + uint32_t l1i_offset; + }; + +-#endif + + typedef + struct CrsRangeEntry { +@@ -542,6 +540,9 @@ void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, + void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id); + ++void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++ const char *oem_id, const char *oem_table_id); ++ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + const char *oem_id, const char *oem_table_id); + +-- +2.27.0 + diff --git a/arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch b/arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch deleted file mode 100644 index 68814a8d8135faf707bacbff017fe8bc84d71f54..0000000000000000000000000000000000000000 --- a/arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 6d795b30ff09bc1f799daa454f776d682cc77197 Mon Sep 17 00:00:00 2001 -From: zhanghao1 -Date: Tue, 11 May 2021 20:17:16 +0800 -Subject: [PATCH] arm/cpu: Fixed function undefined error at compile time under - arm - - Add the compilation option CONFIG_KVM while using - "kvm_arm_cpu_feature_supported" and "kvm_arm_get_one_reg". - In arm, the default value of CONFIG_KVM is no. - - While the target is arm, the compilation fails because - the function "kvm_arm_cpu_feature_supporte" is declared - or the function "kvm_arm_get_one_reg" is not defined. - -Signed-off-by: zhanghao1 ---- - target/arm/helper.c | 4 ++++ - target/arm/kvm_arm.h | 4 ++++ - 2 files changed, 8 insertions(+) - -diff --git a/target/arm/helper.c b/target/arm/helper.c -index bddd355fa0..9d2b2659f6 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -284,6 +284,7 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) - - newval = read_raw_cp_reg(&cpu->env, ri); - if (kvm_sync) { -+#ifdef CONFIG_KVM - if (is_id_reg(ri)) { - /* Only sync if we can sync to KVM successfully. */ - uint64_t oldval; -@@ -306,6 +307,7 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) - - kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); - } else { -+#endif - /* - * Only sync if the previous list->cpustate sync succeeded. - * Rather than tracking the success/failure state for every -@@ -324,7 +326,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) - } - - write_raw_cp_reg(&cpu->env, ri, newval); -+#ifdef CONFIG_KVM - } -+#endif - } - cpu->cpreg_values[i] = newval; - } -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 49e80878f4..a223967d4d 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -312,6 +312,10 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - - static inline void kvm_arm_add_vcpu_properties(Object *obj) {} - -+static inline bool kvm_arm_cpu_feature_supported(void) { -+ return false; -+} -+ - static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms) - { - return -ENOENT; --- -2.27.0 - diff --git a/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch b/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b70c456bf22b5738d1f5d172f3ccacd0fc58eb5 --- /dev/null +++ b/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch @@ -0,0 +1,78 @@ +From b394996c99c0af0de870a5d79fff69f01d504b0c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:47:07 +0800 +Subject: [PATCH] arm/cpu: Some fixes for arm_cpu_unrealizefn() + +Some minor fixes for arm_cpu_unrealizefn(). + +Signed-off-by: Keqian Zhu +--- + target/arm/cpu.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 501f88eb2f..9dd61c10ea 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2418,6 +2418,7 @@ static void arm_cpu_unrealizefn(DeviceState *dev) + CPUState *cs = CPU(dev); + bool has_secure; + ++#ifndef CONFIG_USER_ONLY + has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); + + /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */ +@@ -2433,30 +2434,38 @@ static void arm_cpu_unrealizefn(DeviceState *dev) + if (has_secure) { + cpu_address_space_destroy(cs, ARMASIdx_S); + } ++#endif + + destroy_cpreg_list(cpu); + arm_cpu_unregister_gdb_regs(cpu); + unregister_cp_regs_for_features(cpu); + ++#ifndef CONFIG_USER_ONLY ++ if (tcg_enabled() && cpu_isar_feature(aa64_rme, cpu)) { ++ arm_unregister_el_change_hooks(cpu); ++ } ++#endif ++ + if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) { + g_free(env->sau.rbar); + g_free(env->sau.rlar); + } + + if (arm_feature(env, ARM_FEATURE_PMSA) && +- arm_feature(env, ARM_FEATURE_V7) && +- cpu->pmsav7_dregion) { +- if (arm_feature(env, ARM_FEATURE_V8)) { +- g_free(env->pmsav8.rbar[M_REG_NS]); +- g_free(env->pmsav8.rlar[M_REG_NS]); +- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { +- g_free(env->pmsav8.rbar[M_REG_S]); +- g_free(env->pmsav8.rlar[M_REG_S]); ++ arm_feature(env, ARM_FEATURE_V7)) { ++ if (cpu->pmsav7_dregion) { ++ if (arm_feature(env, ARM_FEATURE_V8)) { ++ g_free(env->pmsav8.rbar[M_REG_NS]); ++ g_free(env->pmsav8.rlar[M_REG_NS]); ++ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->pmsav8.rbar[M_REG_S]); ++ g_free(env->pmsav8.rlar[M_REG_S]); ++ } ++ } else { ++ g_free(env->pmsav7.drbar); ++ g_free(env->pmsav7.drsr); ++ g_free(env->pmsav7.dracr); + } +- } else { +- g_free(env->pmsav7.drbar); +- g_free(env->pmsav7.drsr); +- g_free(env->pmsav7.dracr); + } + if (cpu->pmsav8r_hdregion) { + g_free(env->pmsav8.hprbar); +-- +2.27.0 + diff --git a/arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch b/arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch deleted file mode 100644 index 84903c34d5a86cac12aadf7d35271295b49f143d..0000000000000000000000000000000000000000 --- a/arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch +++ /dev/null @@ -1,42 +0,0 @@ -From d8e0b51447d8c64788cd7f9b0fa75c4ccb06f8eb Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 10:17:27 +0800 -Subject: [PATCH] arm/cpu: assign arm_get_arch_id handler to get_arch_id hook - -This hook will be called in get_cpu_status, which is called -during cpu hotplug. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - target/arm/cpu.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 39bbe7e2d7..1ccb30e5eb 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2575,6 +2575,13 @@ static gchar *arm_gdb_arch_name(CPUState *cs) - return g_strdup("arm"); - } - -+static int64_t arm_cpu_get_arch_id(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ -+ return cpu->mp_affinity; -+} -+ - static void arm_cpu_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -@@ -2596,6 +2603,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) - cc->synchronize_from_tb = arm_cpu_synchronize_from_tb; - cc->gdb_read_register = arm_cpu_gdb_read_register; - cc->gdb_write_register = arm_cpu_gdb_write_register; -+ cc->get_arch_id = arm_cpu_get_arch_id; - #ifndef CONFIG_USER_ONLY - cc->do_interrupt = arm_cpu_do_interrupt; - cc->get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug; --- -2.19.1 diff --git a/arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch b/arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch new file mode 100644 index 0000000000000000000000000000000000000000..a8d4850525e8b0ebd6e72ed5f83090ac93b6067d --- /dev/null +++ b/arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch @@ -0,0 +1,37 @@ +From e97171b8b362b0122754a936053c9793a6ad2f57 Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 17:08:49 +0800 +Subject: [PATCH 4/4] arm: cvm: fix arm-softmmu build on x86 platform + +Add stub function for tmm_set_sec_addr() and tmm_set_hpre_addr() + +Fixes: dffc0f55d93e ("cvm : Add support for TEE-based national encryption acceleration.") +Signed-off-by: Jason Zeng +--- + target/arm/kvm_arm.h | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 62fbb713f4..76137289df 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -497,6 +497,16 @@ static inline void tmm_add_ram_region(hwaddr base1, hwaddr len1, hwaddr base2, + { + g_assert_not_reached(); + } ++ ++static inline void tmm_set_sec_addr(hwaddr base, int num) ++{ ++ g_assert_not_reached(); ++} ++ ++static inline void tmm_set_hpre_addr(hwaddr base, int num) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +-- +2.33.0 + diff --git a/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch b/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..d457c6f33275472cb1e5a546a2822de4b4b979ee --- /dev/null +++ b/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch @@ -0,0 +1,72 @@ +From 85e8e1ee8560e587845142342f81b218e44cba6a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:07:33 +0800 +Subject: [PATCH] arm/kvm: Set psci smccc filter only with vcpu hotplug + +The smccc filter mechanism is supported by newer Linux kernel, +don't try to do it unconditionaly. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 4 +++- + target/arm/kvm.c | 21 ++++++++++++--------- + 2 files changed, 15 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e60f3431f9..38b5d214a1 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2366,8 +2366,10 @@ static void machvirt_init(MachineState *machine) + finalize_gic_version(vms); + if (tcg_enabled() || hvf_enabled() || qtest_enabled() || + (vms->gic_version < VIRT_GIC_VERSION_3)) { +- machine->smp.max_cpus = smp_cpus; + mc->has_hotpluggable_cpus = false; ++ } ++ if (!mc->has_hotpluggable_cpus) { ++ machine->smp.max_cpus = smp_cpus; + warn_report("cpu hotplug feature has been disabled"); + } + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 66caf9e5e7..19783d567f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -259,6 +259,7 @@ int kvm_arch_get_default_type(MachineState *ms) + + int kvm_arch_init(MachineState *ms, KVMState *s) + { ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + int ret = 0; + + /* For ARM interrupt delivery is always asynchronous, +@@ -316,15 +317,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + * filter in the Host KVM. This is required to support features like + * virtual CPU Hotplug on ARM platforms. + */ +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, +- KVM_SMCCC_FILTER_FWD_TO_USER)) { +- error_report("CPU On PSCI-to-user-space fwd filter install failed"); +- abort(); +- } +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, +- KVM_SMCCC_FILTER_FWD_TO_USER)) { +- error_report("CPU Off PSCI-to-user-space fwd filter install failed"); +- abort(); ++ if (mc->has_hotpluggable_cpus && ms->smp.max_cpus > ms->smp.cpus) { ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU On PSCI-to-user-space fwd filter install failed"); ++ mc->has_hotpluggable_cpus = false; ++ } ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU Off PSCI-to-user-space fwd filter install failed"); ++ mc->has_hotpluggable_cpus = false; ++ } + } + + kvm_arm_init_debug(s); +-- +2.27.0 + diff --git a/arm-translate-a64-fix-uninitialized-variable-warning.patch b/arm-translate-a64-fix-uninitialized-variable-warning.patch deleted file mode 100644 index 8e31bbf51e12d63818e74aca17f94020301c7855..0000000000000000000000000000000000000000 --- a/arm-translate-a64-fix-uninitialized-variable-warning.patch +++ /dev/null @@ -1,36 +0,0 @@ -From b4bab3bf6a75d97d2f1098c4dc52d35ced003c70 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Mon, 13 Jan 2020 17:01:11 +0800 -Subject: [PATCH] arm/translate-a64: fix uninitialized variable warning - -Fixes: -target/arm/translate-a64.c: In function 'disas_crypto_three_reg_sha512': -target/arm/translate-a64.c:13625:9: error: 'genfn' may be used uninitialized in this function [-Werror=maybe-uninitialized] - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -qemu/target/arm/translate-a64.c:13609:8: error: 'feature' may be used uninitialized in this function [-Werror=maybe-uninitialized] - if (!feature) { - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan ---- - target/arm/translate-a64.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c -index dcdeb801..5f423d5d 100644 ---- a/target/arm/translate-a64.c -+++ b/target/arm/translate-a64.c -@@ -13767,6 +13767,8 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) - feature = dc_isar_feature(aa64_sha3, s); - genfn = NULL; - break; -+ default: -+ g_assert_not_reached(); - } - } else { - switch (opcode) { --- -2.18.1 - - diff --git a/arm-virt-Add-CPU-hotplug-framework.patch b/arm-virt-Add-CPU-hotplug-framework.patch deleted file mode 100644 index 5de672afeab55fe5af5da3d2ce58aa2e9ae1435d..0000000000000000000000000000000000000000 --- a/arm-virt-Add-CPU-hotplug-framework.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 6d287b3f1d961cc4adda1c6a452f41db84466f5a Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 3 Apr 2020 16:16:18 +0800 -Subject: [PATCH] arm/virt: Add CPU hotplug framework - -Establish the CPU hotplug framework for arm/virt, we will add -necessary code legs to this framework gradually to realize CPU -hotplug finally. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 19 ++++++++++++++++++- - 1 file changed, 18 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d09a5773df..0bd37af26c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2077,11 +2077,25 @@ out: - error_propagate(errp, local_err); - } - -+static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, -+ DeviceState *dev, Error **errp) -+{ -+ /* Currently nothing to do */ -+} -+ -+static void virt_cpu_plug(HotplugHandler *hotplug_dev, -+ DeviceState *dev, Error **errp) -+{ -+ /* Currently nothing to do */ -+} -+ - static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - virt_memory_pre_plug(hotplug_dev, dev, errp); -+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { -+ virt_cpu_pre_plug(hotplug_dev, dev, errp); - } - } - -@@ -2098,6 +2112,8 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, - } - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - virt_memory_plug(hotplug_dev, dev, errp); -+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { -+ virt_cpu_plug(hotplug_dev, dev, errp); - } - } - -@@ -2112,7 +2128,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, - DeviceState *dev) - { - if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE) || -- (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) { -+ object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || -+ object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { - return HOTPLUG_HANDLER(machine); - } - --- -2.19.1 diff --git a/arm-virt-Add-CPU-topology-support.patch b/arm-virt-Add-CPU-topology-support.patch deleted file mode 100644 index c7813c637449328c5fecf7575d27a06fa1fa0700..0000000000000000000000000000000000000000 --- a/arm-virt-Add-CPU-topology-support.patch +++ /dev/null @@ -1,219 +0,0 @@ -From cde57fcae2ed16a10e1ef7f2da0ec368883988ba Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Mon, 6 Apr 2020 10:54:35 +0800 -Subject: [PATCH] arm/virt: Add CPU topology support - -The CPU topology specified by user (through -smp options) is used in -ACPI PPTT. Now we will use this information to locate which CPU to -plug or unplug. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 68 +++++++++++++++++++++++++++++++++++++-- - include/hw/arm/topology.h | 61 +++++++++++++++++++++++++++++++++++ - target/arm/cpu.c | 3 ++ - target/arm/cpu.h | 3 ++ - 4 files changed, 133 insertions(+), 2 deletions(-) - create mode 100644 include/hw/arm/topology.h - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 0bd37af26c..64532b61b2 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -36,6 +36,7 @@ - #include "hw/sysbus.h" - #include "hw/arm/boot.h" - #include "hw/arm/primecell.h" -+#include "hw/arm/topology.h" - #include "hw/arm/virt.h" - #include "hw/block/flash.h" - #include "hw/vfio/vfio-calxeda-xgmac.h" -@@ -2020,6 +2021,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - int n; - unsigned int max_cpus = ms->smp.max_cpus; - VirtMachineState *vms = VIRT_MACHINE(ms); -+ ARMCPUTopoInfo topo; - - if (ms->possible_cpus) { - assert(ms->possible_cpus->len == max_cpus); -@@ -2031,10 +2033,17 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - ms->possible_cpus->len = max_cpus; - for (n = 0; n < ms->possible_cpus->len; n++) { - ms->possible_cpus->cpus[n].type = ms->cpu_type; -+ ms->possible_cpus->cpus[n].vcpus_count = 1; - ms->possible_cpus->cpus[n].arch_id = - virt_cpu_mp_affinity(vms, n); -+ -+ topo_ids_from_idx(n, ms->smp.cores, ms->smp.threads, &topo); -+ ms->possible_cpus->cpus[n].props.has_socket_id = true; -+ ms->possible_cpus->cpus[n].props.socket_id = topo.pkg_id; -+ ms->possible_cpus->cpus[n].props.has_core_id = true; -+ ms->possible_cpus->cpus[n].props.core_id = topo.core_id; - ms->possible_cpus->cpus[n].props.has_thread_id = true; -- ms->possible_cpus->cpus[n].props.thread_id = n; -+ ms->possible_cpus->cpus[n].props.thread_id = topo.smt_id; - } - return ms->possible_cpus; - } -@@ -2080,7 +2089,62 @@ out: - static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -- /* Currently nothing to do */ -+ CPUState *cs = CPU(dev); -+ ARMCPUTopoInfo topo; -+ ARMCPU *cpu = ARM_CPU(dev); -+ MachineState *ms = MACHINE(hotplug_dev); -+ int smp_cores = ms->smp.cores; -+ int smp_threads = ms->smp.threads; -+ -+ /* if cpu idx is not set, set it based on socket/core/thread properties */ -+ if (cs->cpu_index == UNASSIGNED_CPU_INDEX) { -+ int max_socket = ms->smp.max_cpus / smp_threads / smp_cores; -+ if (cpu->socket_id < 0 || cpu->socket_id >= max_socket) { -+ error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", -+ cpu->socket_id, max_socket - 1); -+ return; -+ } -+ if (cpu->core_id < 0 || cpu->core_id >= smp_cores) { -+ error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", -+ cpu->core_id, smp_cores - 1); -+ return; -+ } -+ if (cpu->thread_id < 0 || cpu->thread_id >= smp_threads) { -+ error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", -+ cpu->thread_id, smp_threads - 1); -+ return; -+ } -+ -+ topo.pkg_id = cpu->socket_id; -+ topo.core_id = cpu->core_id; -+ topo.smt_id = cpu->thread_id; -+ cs->cpu_index = idx_from_topo_ids(smp_cores, smp_threads, &topo); -+ } -+ -+ /* if 'address' properties socket-id/core-id/thread-id are not set, set them -+ * so that machine_query_hotpluggable_cpus would show correct values -+ */ -+ topo_ids_from_idx(cs->cpu_index, smp_cores, smp_threads, &topo); -+ if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { -+ error_setg(errp, "property socket-id: %u doesn't match set idx:" -+ " 0x%x (socket-id: %u)", cpu->socket_id, cs->cpu_index, topo.pkg_id); -+ return; -+ } -+ cpu->socket_id = topo.pkg_id; -+ -+ if (cpu->core_id != -1 && cpu->core_id != topo.core_id) { -+ error_setg(errp, "property core-id: %u doesn't match set idx:" -+ " 0x%x (core-id: %u)", cpu->core_id, cs->cpu_index, topo.core_id); -+ return; -+ } -+ cpu->core_id = topo.core_id; -+ -+ if (cpu->thread_id != -1 && cpu->thread_id != topo.smt_id) { -+ error_setg(errp, "property thread-id: %u doesn't match set idx:" -+ " 0x%x (thread-id: %u)", cpu->thread_id, cs->cpu_index, topo.smt_id); -+ return; -+ } -+ cpu->thread_id = topo.smt_id; - } - - static void virt_cpu_plug(HotplugHandler *hotplug_dev, -diff --git a/include/hw/arm/topology.h b/include/hw/arm/topology.h -new file mode 100644 -index 0000000000..a3e5f436c5 ---- /dev/null -+++ b/include/hw/arm/topology.h -@@ -0,0 +1,61 @@ -+/* -+ * ARM CPU topology data structures and functions -+ * -+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, see . -+ */ -+ -+#ifndef HW_ARM_TOPOLOGY_H -+#define HW_ARM_TOPOLOGY_H -+ -+typedef struct ARMCPUTopoInfo { -+ unsigned pkg_id; -+ unsigned core_id; -+ unsigned smt_id; -+} ARMCPUTopoInfo; -+ -+/* Calculate (contiguous) CPU index based on topology */ -+static inline unsigned idx_from_topo_ids(unsigned nr_cores, -+ unsigned nr_threads, -+ const ARMCPUTopoInfo *topo) -+{ -+ assert(nr_cores > 0); -+ assert(nr_threads > 0); -+ assert(topo != NULL); -+ -+ return topo->pkg_id * nr_cores * nr_threads + -+ topo->core_id * nr_threads + -+ topo->smt_id; -+} -+ -+/* Calculate thread/core/package topology -+ * based on (contiguous) CPU index -+ */ -+static inline void topo_ids_from_idx(unsigned cpu_index, -+ unsigned nr_cores, -+ unsigned nr_threads, -+ ARMCPUTopoInfo *topo) -+{ -+ assert(nr_cores > 0); -+ assert(nr_threads > 0); -+ assert(topo != NULL); -+ -+ topo->smt_id = cpu_index % nr_threads; -+ topo->core_id = cpu_index / nr_threads % nr_cores; -+ topo->pkg_id = cpu_index / nr_threads / nr_cores; -+} -+ -+#endif /* HW_ARM_TOPOLOGY_H */ -+ -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 1ccb30e5eb..91f1e36cd8 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2560,6 +2560,9 @@ static Property arm_cpu_properties[] = { - DEFINE_PROP_UINT64("mp-affinity", ARMCPU, - mp_affinity, ARM64_AFFINITY_INVALID), - DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), -+ DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, -1), -+ DEFINE_PROP_INT32("core-id", ARMCPU, core_id, -1), -+ DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, -1), - DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), - DEFINE_PROP_END_OF_LIST() - }; -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index e19531a77b..219c222b89 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -916,6 +916,9 @@ struct ARMCPU { - QLIST_HEAD(, ARMELChangeHook) el_change_hooks; - - int32_t node_id; /* NUMA node this CPU belongs to */ -+ int32_t socket_id; -+ int32_t core_id; -+ int32_t thread_id; - - /* Used to synchronize KVM and QEMU in-kernel device levels */ - uint8_t device_irq_level; --- -2.19.1 diff --git a/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch b/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch new file mode 100644 index 0000000000000000000000000000000000000000..61f5f9718628156d742d697fe0736400ffe16faf --- /dev/null +++ b/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch @@ -0,0 +1,67 @@ +From f8914ec04d4d892520aa443eaf8018c80516adee Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 16:27:01 +0000 +Subject: [PATCH] arm/virt: Add cpu hotplug events to GED during creation + +Add CPU Hotplug event to the set of supported ged-events during the creation of +GED device during VM init. Also initialize the memory map for CPU Hotplug +control device used in event exchanges between Qemu/VMM and the guest. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 5 ++++- + include/hw/arm/virt.h | 1 + + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 78ed3c4ba8..155000f22f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -78,6 +78,7 @@ + #include "hw/mem/pc-dimm.h" + #include "hw/mem/nvdimm.h" + #include "hw/acpi/generic_event_device.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/virtio/virtio-md-pci.h" + #include "hw/virtio/virtio-iommu.h" + #include "hw/char/pl011.h" +@@ -157,6 +158,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN}, + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, ++ [VIRT_CPUHP_ACPI] = { 0x090c0000, ACPI_CPU_HOTPLUG_REG_LEN}, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ +@@ -725,7 +727,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + DeviceState *dev; + MachineState *ms = MACHINE(vms); + int irq = vms->irqmap[VIRT_ACPI_GED]; +- uint32_t event = ACPI_GED_PWR_DOWN_EVT; ++ uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_CPU_HOTPLUG_EVT; + + if (ms->ram_slots) { + event |= ACPI_GED_MEM_HOTPLUG_EVT; +@@ -741,6 +743,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, vms->memmap[VIRT_CPUHP_ACPI].base); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + + return dev; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index c2fde0522c..5de0185063 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -76,6 +76,7 @@ enum { + VIRT_PCDIMM_ACPI, + VIRT_ACPI_GED, + VIRT_NVDIMM_ACPI, ++ VIRT_CPUHP_ACPI, + VIRT_PVTIME, + VIRT_LOWMEMMAP_LAST, + }; +-- +2.27.0 + diff --git a/arm-virt-Add-cpu_hotplug_enabled-field.patch b/arm-virt-Add-cpu_hotplug_enabled-field.patch deleted file mode 100644 index 0b8bc47f6d31dba63e1148c22cc803204ce82e70..0000000000000000000000000000000000000000 --- a/arm-virt-Add-cpu_hotplug_enabled-field.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 31873c4c0454fb17654f57adece2bc396415f8bf Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 13:50:40 +0800 -Subject: [PATCH] arm/virt: Add cpu_hotplug_enabled field - -Some conditions must be satisfied to support CPU hotplug, including -ACPI, GED, 64bit CPU, GICv3. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 7 +++++++ - include/hw/arm/virt.h | 1 + - 2 files changed, 8 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index dda22194b5..304a4c2d31 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1645,6 +1645,7 @@ static void machvirt_init(MachineState *machine) - { - VirtMachineState *vms = VIRT_MACHINE(machine); - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); -+ MachineState *ms = MACHINE(machine); - MachineClass *mc = MACHINE_GET_CLASS(machine); - const CPUArchIdList *possible_cpus; - MemoryRegion *sysmem = get_system_memory(); -@@ -1655,6 +1656,7 @@ static void machvirt_init(MachineState *machine) - bool has_ged = !vmc->no_ged; - unsigned int smp_cpus = machine->smp.cpus; - unsigned int max_cpus = machine->smp.max_cpus; -+ ObjectClass *cpu_class; - - /* - * In accelerated mode, the memory map is computed earlier in kvm_type() -@@ -1760,6 +1762,11 @@ static void machvirt_init(MachineState *machine) - - create_fdt(vms); - -+ cpu_class = object_class_by_name(ms->cpu_type); -+ vms->cpu_hotplug_enabled = has_ged && firmware_loaded && -+ acpi_enabled && vms->gic_version == 3 && -+ !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); -+ - possible_cpus = mc->possible_cpu_arch_ids(machine); - for (n = 0; n < possible_cpus->len; n++) { - Object *cpuobj; -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index beef4c8002..b4c53d920e 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -126,6 +126,7 @@ typedef struct { - bool highmem_ecam; - bool its; - bool virt; -+ bool cpu_hotplug_enabled; - int32_t gic_version; - VirtIOMMUType iommu; - struct arm_boot_info bootinfo; --- -2.19.1 diff --git a/arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch b/arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch deleted file mode 100644 index c81227d8a3ef4ff3ffc74f7b848b42e8cc79c762..0000000000000000000000000000000000000000 --- a/arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 7cfb37c50209208f853c6fbd0df6673a95e03ef9 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 14:16:40 +0800 -Subject: [PATCH] arm/virt: Add some sanity checks in cpu_pre_plug hook - -For that user will try to hotplug a CPU when preconditions -are not satisfied, check these CPU hotplug preconditions in -pre_plug hook. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 983084c459..c6a99e683a 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2086,10 +2086,30 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(hotplug_dev); - const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); -+ const CPUArchId *cpu_slot = NULL; - MemoryRegion *sysmem = get_system_memory(); - int smp_cores = ms->smp.cores; - int smp_threads = ms->smp.threads; - -+ /* Some hotplug capability checks */ -+ -+ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { -+ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", -+ ms->cpu_type); -+ return; -+ } -+ -+ if (dev->hotplugged && !vms->acpi_dev) { -+ error_setg(errp, "CPU hotplug is disabled: missing acpi device."); -+ return; -+ } -+ -+ if (dev->hotplugged && !vms->cpu_hotplug_enabled) { -+ error_setg(errp, "CPU hotplug is disabled: " -+ "should use AArch64 CPU and GICv3."); -+ return; -+ } -+ - /* if cpu idx is not set, set it based on socket/core/thread properties */ - if (cs->cpu_index == UNASSIGNED_CPU_INDEX) { - int max_socket = ms->smp.max_cpus / smp_threads / smp_cores; -@@ -2145,6 +2165,13 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - object_property_set_int(cpuobj, possible_cpus->cpus[cs->cpu_index].arch_id, - "mp-affinity", NULL); - -+ cpu_slot = &possible_cpus->cpus[cs->cpu_index]; -+ if (cpu_slot->cpu) { -+ error_setg(errp, "CPU[%d] with mp_affinity %" PRIu64 " exists", -+ cs->cpu_index, cpu->mp_affinity); -+ return; -+ } -+ - numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), - &error_fatal); - --- -2.19.1 diff --git a/arm-virt-Add-update-basic-hot-un-plug-framework.patch b/arm-virt-Add-update-basic-hot-un-plug-framework.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea7c3772eab061766e8df4f16ec75bfb6d399aff --- /dev/null +++ b/arm-virt-Add-update-basic-hot-un-plug-framework.patch @@ -0,0 +1,197 @@ +From 724ab355c047cfb3e970d9ea78577087568eb095 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 18:40:19 +0100 +Subject: [PATCH] arm/virt: Add/update basic hot-(un)plug framework + +Add CPU hot-unplug hooks and update hotplug hooks with additional sanity checks +for use in hotplug paths. + +Note, Functional contents of the hooks(now left with TODO comment) shall be +gradually filled in the subsequent patches in an incremental approach to patch +and logic building which would be roughly as follows: +1. (Un-)wiring of interrupts between vCPU<->GIC +2. Sending events to Guest for hot-(un)plug so that guest can take appropriate + actions. +3. Notifying GIC about hot-(un)plug action so that vCPU could be (un-)stitched + to the GIC CPU interface. +4. Updating the Guest with Next boot info for this vCPU in the firmware. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 104 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bf385a469c..ed354be326 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -83,6 +83,7 @@ + #include "hw/virtio/virtio-iommu.h" + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" ++#include "qapi/qmp/qdict.h" + + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ +@@ -3083,12 +3084,23 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; + int32_t min_cpuid = 0; + int32_t max_cpuid; + ++ if (dev->hotplugged && !vms->acpi_dev) { ++ error_setg(errp, "GED acpi device does not exists"); ++ return; ++ } ++ ++ if (dev->hotplugged && !mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU hotplug not supported on this machine"); ++ return; ++ } ++ + /* sanity check the cpu */ + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +@@ -3137,6 +3149,22 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + } + virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); + ++ /* ++ * Fix the GIC for this new vCPU being plugged. The QOM CPU object for the ++ * new vCPU need to be updated in the corresponding QOM GICv3CPUState object ++ * We also need to re-wire the IRQs for this new CPU object. This update ++ * is limited to the QOM only and does not affects the KVM. Later has ++ * already been pre-sized with possible CPU at VM init time. This is a ++ * workaround to the constraints posed by ARM architecture w.r.t supporting ++ * CPU Hotplug. Specification does not exist for the later. ++ * This patch-up is required both for {cold,hot}-plugged vCPUs. Cold-inited ++ * vCPUs have their GIC state initialized during machvit_init(). ++ */ ++ if (vms->acpi_dev) { ++ /* TODO: update GIC about this hotplug change here */ ++ /* TODO: wire the GIC<->CPU irqs */ ++ } ++ + /* + * To give persistent presence view of vCPUs to the guest, ACPI might need + * to fake the presence of the vCPUs to the guest but keep them disabled. +@@ -3148,6 +3176,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +@@ -3156,10 +3185,81 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + cpu_slot->cpu = OBJECT(dev); + ++ /* ++ * Update the ACPI Hotplug state both for vCPUs being {hot,cold}-plugged. ++ * vCPUs can be cold-plugged using '-device' option. For vCPUs being hot ++ * plugged, guest is also notified. ++ */ ++ if (vms->acpi_dev) { ++ /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */ ++ /* TODO: register cpu for reset & update F/W info for the next boot */ ++ } ++ + cs->disabled = false; + return; + } + ++static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ if (!mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU hot(un)plug not supported on this machine"); ++ return; ++ } ++ ++ if (cs->cpu_index == first_cpu->cpu_index) { ++ error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported", ++ first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id, ++ cpu->core_id, cpu->thread_id); ++ return; ++ } ++ ++ /* TODO: request cpu hotplug from guest */ ++ ++ return; ++} ++ ++static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ ++ /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ ++ ++ /* TODO: unwire the gic-cpu irqs here */ ++ /* TODO: update the GIC about this hot unplug change */ ++ ++ /* TODO: unregister cpu for reset & update F/W info for the next boot */ ++ ++ qobject_unref(dev->opts); ++ dev->opts = NULL; ++ ++ cpu_slot->cpu = NULL; ++ cs->disabled = true; ++ ++ return; ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -3284,6 +3384,8 @@ static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug_request(hotplug_dev, dev, errp); + } else { + error_setg(errp, "device unplug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -3297,6 +3399,8 @@ static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev, + virt_dimm_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug(hotplug_dev, dev, errp); + } else { + error_setg(errp, "virt: device unplug for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +-- +2.27.0 + diff --git a/arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch b/arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch deleted file mode 100644 index ade3ccfd9fe50a78c8ef33ded3463e52b5f6d6c3..0000000000000000000000000000000000000000 --- a/arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch +++ /dev/null @@ -1,100 +0,0 @@ -From d38d1d4e859450535ddc6bf0c7a59f6217b1403c Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Sun, 5 Apr 2020 16:03:15 +0800 -Subject: [PATCH] arm/virt: Attach ACPI CPU hotplug support to virt - -Attach cpus aml building and GED support for CPU hotplug to -arm/virt, but currently we make it diabled by not add CPU -hotplug event to GED. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt-acpi-build.c | 15 ++++++++++++++- - hw/arm/virt.c | 6 ++++++ - include/hw/arm/virt.h | 1 + - 3 files changed, 21 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index 8b68a15d76..dbe9acb148 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -806,6 +806,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - MachineState *ms = MACHINE(vms); - const MemMapEntry *memmap = vms->memmap; - const int *irqmap = vms->irqmap; -+ bool cpu_aml_built = false; - - dsdt = init_aml_allocator(); - /* Reserve space for header */ -@@ -817,7 +818,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - * the RTC ACPI device at all when using UEFI. - */ - scope = aml_scope("\\_SB"); -- acpi_dsdt_add_cpus(scope, vms->smp_cpus, vms); - acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], - (irqmap[VIRT_UART] + ARM_SPI_BASE)); - acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); -@@ -845,6 +845,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - AML_SYSTEM_MEMORY, - memmap[VIRT_PCDIMM_ACPI].base); - } -+ -+ if (event & ACPI_GED_CPU_HOTPLUG_EVT) { -+ CPUHotplugFeatures opts = { -+ .acpi_1_compatible = false, .has_legacy_cphp = false -+ }; -+ build_cpus_aml(dsdt, ms, opts, memmap[VIRT_CPU_ACPI].base, -+ "\\_SB", NULL, AML_SYSTEM_MEMORY); -+ cpu_aml_built = true; -+ } -+ } -+ -+ if (!cpu_aml_built) { -+ acpi_dsdt_add_cpus(scope, vms->smp_cpus, vms); - } - - acpi_dsdt_add_power_button(scope); -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 8638aeedb7..d09a5773df 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -140,6 +140,7 @@ static const MemMapEntry base_memmap[] = { - [VIRT_SMMU] = { 0x09050000, 0x00020000 }, - [VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN }, - [VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN }, -+ [VIRT_CPU_ACPI] = { 0x09090000, ACPI_CPU_HOTPLUG_REG_LEN }, - [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, - [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, - /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ -@@ -645,11 +646,16 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) - event |= ACPI_GED_MEM_HOTPLUG_EVT; - } - -+ /* event |= ACPI_GED_CPU_HOTPLUG_EVT; -+ * Currently CPU hotplug is not enabled. -+ */ -+ - dev = qdev_create(NULL, TYPE_ACPI_GED); - qdev_prop_set_uint32(dev, "ged-event", event); - - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); -+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, vms->memmap[VIRT_CPU_ACPI].base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); - - qdev_init_nofail(dev); -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index cbdea7ff32..6880ebe07c 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -81,6 +81,7 @@ enum { - VIRT_SECURE_MEM, - VIRT_PCDIMM_ACPI, - VIRT_ACPI_GED, -+ VIRT_CPU_ACPI, - VIRT_LOWMEMMAP_LAST, - }; - --- -2.19.1 diff --git a/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch b/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch new file mode 100644 index 0000000000000000000000000000000000000000..61c298d7bec0adbb7fc302343e5b09f94dd947b5 --- /dev/null +++ b/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch @@ -0,0 +1,221 @@ +From a68abeefcbd78daaf7179b922f6b9040b4b63101 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 15:50:33 +0100 +Subject: [PATCH] arm/virt: Changes to (un)wire GICC<->vCPU IRQs during + hot-(un)plug + +Refactors the existing GIC create code to extract common code to wire the +vcpu<->gic interrupts. This function could be used with cold-plug case and also +used when vCPU is hot-plugged. It also introduces a new function to unwire the +vcpu<->gic interrupts for the vCPU hot-unplug cases. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 138 ++++++++++++++++++++++++++++------------- + hw/core/gpio.c | 2 +- + include/hw/qdev-core.h | 2 + + 3 files changed, 99 insertions(+), 43 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ed354be326..97bf4cca11 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -798,6 +798,99 @@ static void create_v2m(VirtMachineState *vms) + vms->msi_controller = VIRT_MSI_CTRL_GICV2M; + } + ++/* ++ * Mapping from the output timer irq lines from the CPU to the GIC PPI inputs ++ * we use for the virt board. ++ */ ++const int timer_irq[] = { ++ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, ++ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, ++ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, ++ [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, ++}; ++ ++static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ int irq; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_disconnect_gpio_out_named(cpudev, NULL, irq); ++ } ++ ++ if (type != VIRT_GIC_VERSION_2) { ++ qdev_disconnect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0); ++ } else if (vms->virt) { ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 4 * max_cpus); ++ } ++ ++ /* ++ * RFC: Question: This currently does not takes care of intimating the ++ * devices which might be sitting on system bus. Do we need a ++ * sysbus_disconnect_irq() which also does the job of notification beside ++ * disconnection? ++ */ ++ qdev_disconnect_gpio_out_named(cpudev, "pmu-interrupt", 0); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, cpu); ++ qdev_disconnect_gpio_out_named(gicdev, ++ SYSBUS_DEVICE_GPIO_IRQ, cpu + max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 2 * max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 3 * max_cpus); ++} ++ ++static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ SysBusDevice *gicbusdev; ++ int intidbase; ++ int irq; ++ ++ intidbase = NUM_IRQS + cpu * GIC_INTERNAL; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_connect_gpio_out(cpudev, irq, ++ qdev_get_gpio_in(gicdev, ++ intidbase + timer_irq[irq])); ++ } ++ ++ gicbusdev = SYS_BUS_DEVICE(gicdev); ++ if (type != VIRT_GIC_VERSION_2) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0, qirq); ++ } else if (vms->virt) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ sysbus_connect_irq(gicbusdev, cpu + 4 * max_cpus, qirq); ++ } ++ ++ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, ++ qdev_get_gpio_in(gicdev, ++ intidbase + VIRTUAL_PMU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 2 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 3 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++} ++ + static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + { + MachineState *ms = MACHINE(vms); +@@ -894,46 +987,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. + */ + for (i = 0; i < smp_cpus; i++) { +- DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); +- int intidbase = NUM_IRQS + i * GIC_INTERNAL; +- /* Mapping from the output timer irq lines from the CPU to the +- * GIC PPI inputs we use for the virt board. +- */ +- const int timer_irq[] = { +- [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, +- [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, +- [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, +- [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, +- }; +- +- for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { +- qdev_connect_gpio_out(cpudev, irq, +- qdev_get_gpio_in(vms->gic, +- intidbase + timer_irq[irq])); +- } +- +- if (vms->gic_version != VIRT_GIC_VERSION_2) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", +- 0, irq); +- } else if (vms->virt) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq); +- } +- +- qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +- qdev_get_gpio_in(vms->gic, intidbase +- + VIRTUAL_PMU_IRQ)); +- +- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++ wire_gic_cpu_irqs(vms, qemu_get_cpu(i)); + } + + fdt_add_gic_node(vms); +@@ -3162,7 +3216,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + */ + if (vms->acpi_dev) { + /* TODO: update GIC about this hotplug change here */ +- /* TODO: wire the GIC<->CPU irqs */ ++ wire_gic_cpu_irqs(vms, cs); + } + + /* +@@ -3246,7 +3300,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + + /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ + +- /* TODO: unwire the gic-cpu irqs here */ ++ unwire_gic_cpu_irqs(vms, cs); + /* TODO: update the GIC about this hot unplug change */ + + /* TODO: unregister cpu for reset & update F/W info for the next boot */ +diff --git a/hw/core/gpio.c b/hw/core/gpio.c +index 80d07a6ec9..abb164d5c0 100644 +--- a/hw/core/gpio.c ++++ b/hw/core/gpio.c +@@ -143,7 +143,7 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n) + + /* disconnect a GPIO output, returning the disconnected input (if any) */ + +-static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, + const char *name, int n) + { + char *propname = g_strdup_printf("%s[%d]", +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index 151d968238..2d3661d6cd 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -739,6 +739,8 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n); + */ + qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt, + const char *name, int n); ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++ const char *name, int n); + + BusState *qdev_get_child_bus(DeviceState *dev, const char *name); + +-- +2.27.0 + diff --git a/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch b/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch new file mode 100644 index 0000000000000000000000000000000000000000..27ca6d7ab1b918bffda8a3e78beae1626d19d6fb --- /dev/null +++ b/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch @@ -0,0 +1,73 @@ +From baa26f2fc075522f91c3e9a332fc4fa3f3b167bf Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:55:49 +0800 +Subject: [PATCH] arm/virt: Consider has_ged when set mc->has_hotpluggable_cpus + +Vcpu hotplug relies on ged device. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 26 ++++++++++++++++---------- + 1 file changed, 16 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 38b5d214a1..00e57f2d75 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2357,6 +2357,7 @@ static void machvirt_init(MachineState *machine) + bool has_ged = !vmc->no_ged; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; ++ ObjectClass *cpu_class; + + if (!cpu_type_valid(machine->cpu_type)) { + error_report("mach-virt: CPU type %s not supported", machine->cpu_type); +@@ -2364,14 +2365,6 @@ static void machvirt_init(MachineState *machine) + } + + finalize_gic_version(vms); +- if (tcg_enabled() || hvf_enabled() || qtest_enabled() || +- (vms->gic_version < VIRT_GIC_VERSION_3)) { +- mc->has_hotpluggable_cpus = false; +- } +- if (!mc->has_hotpluggable_cpus) { +- machine->smp.max_cpus = smp_cpus; +- warn_report("cpu hotplug feature has been disabled"); +- } + + possible_cpus = mc->possible_cpu_arch_ids(machine); + +@@ -2501,6 +2494,21 @@ static void machvirt_init(MachineState *machine) + create_fdt(vms); + qemu_log("cpu init start\n"); + ++ cpu_class = object_class_by_name(machine->cpu_type); ++ has_ged = has_ged && firmware_loaded && ++ virt_is_acpi_enabled(vms) && ++ !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); ++ if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (vms->gic_version < VIRT_GIC_VERSION_3) || !has_ged) { ++ mc->has_hotpluggable_cpus = false; ++ } ++ if (!mc->has_hotpluggable_cpus) { ++ if (machine->smp.max_cpus > smp_cpus) { ++ warn_report("cpu hotplug feature has been disabled"); ++ } ++ machine->smp.max_cpus = smp_cpus; ++ } ++ + notifier_list_init(&vms->cpuhp_notifiers); + possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); +@@ -2581,8 +2589,6 @@ static void machvirt_init(MachineState *machine) + + create_gic(vms, sysmem); + +- has_ged = has_ged && aarch64 && firmware_loaded && +- virt_is_acpi_enabled(vms); + if (has_ged) { + vms->acpi_dev = create_acpi_ged(vms); + } +-- +2.27.0 + diff --git a/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch b/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch new file mode 100644 index 0000000000000000000000000000000000000000..d120fe42a27c540e4c58a345b6425c7278453f26 --- /dev/null +++ b/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch @@ -0,0 +1,54 @@ +From 028d71744dfeedabfa67d629c71a6ed5e494cc68 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 29 Aug 2023 00:47:05 +0000 +Subject: [PATCH] arm/virt: Create GED dev before *disabled* CPU Objs are + destroyed + +ACPI CPU hotplug state (is_present=_STA.PRESENT, is_enabled=_STA.ENABLED) for +all the possible vCPUs MUST be initialized during machine init. This is done +during the creation of the GED device. VMM/Qemu MUST expose/fake the ACPI state +of the disabled vCPUs to the Guest kernel as 'present' (_STA.PRESENT) always +i.e. ACPI persistent. if the 'disabled' vCPU objectes are destroyed before the +GED device has been created then their ACPI hotplug state might not get +initialized correctly as acpi_persistent flag is part of the CPUState. This will +expose wrong status of the unplugged vCPUs to the Guest kernel. + +Hence, moving the GED device creation before disabled vCPU objects get destroyed +as part of the post CPU init routine. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 155000f22f..818398e753 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2472,6 +2472,12 @@ static void machvirt_init(MachineState *machine) + + create_gic(vms, sysmem); + ++ has_ged = has_ged && aarch64 && firmware_loaded && ++ virt_is_acpi_enabled(vms); ++ if (has_ged) { ++ vms->acpi_dev = create_acpi_ged(vms); ++ } ++ + virt_cpu_post_init(vms, sysmem); + + fdt_add_pmu_nodes(vms); +@@ -2496,9 +2502,7 @@ static void machvirt_init(MachineState *machine) + + create_pcie(vms); + +- if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) { +- vms->acpi_dev = create_acpi_ged(vms); +- } else { ++ if (!has_ged) { + create_gpio_devices(vms, VIRT_GPIO, sysmem); + } + +-- +2.27.0 + diff --git a/arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch b/arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..3347c5836b9f4527ec96abf6ea6ea0f2d76facf1 --- /dev/null +++ b/arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch @@ -0,0 +1,152 @@ +From 52909d74ec37e851df3762a6eab1d7a6eeb89fba Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 28 Apr 2024 12:56:47 +0800 +Subject: [PATCH] arm/virt: Don't modify smp.max_cpus when vcpu hotplug + disabled + +The smp.max_cpus has been used when create possible_cpus, so +we must not change it after that. + +We should use smp.cpus when create cpu and acpi table if vcpu +hotplug is disabled, instead of change smp.max_cpus to smp.cpus +and use it everywhere. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt-acpi-build.c | 8 +++++++- + hw/arm/virt.c | 24 ++++++++++++++++++++++-- + include/hw/arm/virt.h | 8 +++++++- + 3 files changed, 36 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 99296fc6d8..179600d4fe 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -814,9 +814,15 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + int i; + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; + AcpiTable table = { .sig = "APIC", .rev = 4, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; ++ unsigned int max_cpus = ms->smp.max_cpus; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } + + acpi_table_begin(&table, table_data); + /* Local Interrupt Controller Address */ +@@ -835,7 +841,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vms->gic_version, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + +- for (i = 0; i < MACHINE(vms)->smp.max_cpus; i++) { ++ for (i = 0; i < max_cpus; i++) { + CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e4473354d4..507b09d96c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -831,6 +831,10 @@ static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) + int type = vms->gic_version; + int irq; + ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ + for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { + qdev_disconnect_gpio_out_named(cpudev, NULL, irq); + } +@@ -871,6 +875,10 @@ static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) + int intidbase; + int irq; + ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ + intidbase = NUM_IRQS + cpu * GIC_INTERNAL; + + for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { +@@ -915,6 +923,10 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + uint32_t nb_redist_regions = 0; + int revision; + ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ + if (vms->gic_version == VIRT_GIC_VERSION_2) { + gictype = gic_class_name(); + } else { +@@ -2165,6 +2177,9 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + + for (n = 0; n < possible_cpus->len; n++) { + cpu = qemu_get_possible_cpu(n); ++ if (!qemu_present_cpu(cpu)) { ++ continue; ++ } + + if (vms->pmu) { + assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); +@@ -2195,6 +2210,9 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + if (kvm_enabled() || tcg_enabled()) { + for (n = 0; n < possible_cpus->len; n++) { + cpu = qemu_get_possible_cpu(n); ++ if (!qemu_present_cpu(cpu)) { ++ continue; ++ } + + /* + * Now, GIC has been sized with possible CPUs and we dont require +@@ -2511,16 +2529,18 @@ static void machvirt_init(MachineState *machine) + if (machine->smp.max_cpus > smp_cpus) { + warn_report("cpu hotplug feature has been disabled"); + } +- machine->smp.max_cpus = smp_cpus; + } + + notifier_list_init(&vms->cpuhp_notifiers); +- possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; + CPUState *cs; + ++ if (!vms->cpu_hotplug_enabled && n >= smp_cpus) { ++ break; ++ } ++ + cpuobj = object_new(possible_cpus->cpus[n].type); + cs = CPU(cpuobj); + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 138531f9c1..7a734f07f7 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -210,10 +210,16 @@ static uint32_t virt_redist_capacity(VirtMachineState *vms, int region) + static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + { + uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST); ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } + + assert(vms->gic_version != VIRT_GIC_VERSION_2); + +- return (MACHINE(vms)->smp.max_cpus > redist0_capacity && ++ return (max_cpus > redist0_capacity && + vms->highmem_redists) ? 2 : 1; + } + +-- +2.27.0 + diff --git a/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch b/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..30a11521e48cbfda6ec3bbf2d9861bf189398472 --- /dev/null +++ b/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch @@ -0,0 +1,47 @@ +From 00a78edf572783c18a1d4945758371c0f175e321 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:41:14 +0800 +Subject: [PATCH] arm/virt: Fix adjudgement of core_id for vcpu hotplugged + +The core_id should between 0 and ms->smp.cores - 1. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 934b0412ef..e60f3431f9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3170,8 +3170,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +- int32_t min_cpuid = 0; +- int32_t max_cpuid; + + if (dev->hotplugged && !vms->acpi_dev) { + error_setg(errp, "GED acpi device does not exists"); +@@ -3196,15 +3194,9 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + +- max_cpuid = ms->possible_cpus->len - 1; +- if (!dev->hotplugged) { +- min_cpuid = vms->acpi_dev ? ms->smp.cpus : 0; +- max_cpuid = vms->acpi_dev ? max_cpuid : ms->smp.cpus - 1; +- } +- +- if ((cpu->core_id < min_cpuid) || (cpu->core_id > max_cpuid)) { +- error_setg(errp, "Invalid core-id %d specified, correct range %d:%d", +- cpu->core_id, min_cpuid, max_cpuid); ++ if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) { ++ error_setg(errp, "Invalid core-id %d specified, correct range 0:%u", ++ cpu->core_id, ms->smp.cores - 1); + return; + } + +-- +2.27.0 + diff --git a/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch b/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch new file mode 100644 index 0000000000000000000000000000000000000000..89fd4ca49fe6115d3ed7d19eb2a265bbe462bc46 --- /dev/null +++ b/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch @@ -0,0 +1,71 @@ +From c375e6fdc49f7d3d0232786e4cfd8b792379107c Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 14:12:34 +0100 +Subject: [PATCH] arm/virt: Init PMU at host for all possible vcpus + +PMU for all possible vCPUs must be initialized at the VM initialization time. +Refactor existing code to accomodate possible vCPUs. This also assumes that all +processor being used are identical. + +Past discussion for reference: +Link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00131.html + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 12 ++++++++---- + include/hw/arm/virt.h | 1 + + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 08ba255317..78ed3c4ba8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2055,12 +2055,14 @@ static void finalize_gic_version(VirtMachineState *vms) + */ + static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + { ++ CPUArchIdList *possible_cpus = vms->parent.possible_cpus; + int max_cpus = MACHINE(vms)->smp.max_cpus; +- bool aarch64, pmu, steal_time; ++ bool aarch64, steal_time; + CPUState *cpu; ++ int n; + + aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL); +- pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); ++ vms->pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); + steal_time = object_property_get_bool(OBJECT(first_cpu), + "kvm-steal-time", NULL); + +@@ -2087,8 +2089,10 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime); + } + +- CPU_FOREACH(cpu) { +- if (pmu) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ ++ if (vms->pmu) { + assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); + if (kvm_irqchip_in_kernel()) { + kvm_arm_pmu_set_irq(cpu, VIRTUAL_PMU_IRQ); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index a6977bade5..c2fde0522c 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -155,6 +155,7 @@ struct VirtMachineState { + bool ras; + bool mte; + bool dtb_randomness; ++ bool pmu; + OnOffAuto acpi; + VirtGICType gic_version; + VirtIOMMUType iommu; +-- +2.27.0 + diff --git a/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch b/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch new file mode 100644 index 0000000000000000000000000000000000000000..c8e661145e7b1b42272971979d463d51a5ee7b4e --- /dev/null +++ b/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch @@ -0,0 +1,97 @@ +From 3780dddd4fc8f0471525c50893e24846d1474692 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 8 Aug 2023 00:43:18 +0000 +Subject: [PATCH] arm/virt: Make ARM vCPU *present* status ACPI *persistent* + +ARM arch does not allow CPUs presence to be changed [1] after kernel has booted. +Hence, firmware/ACPI/Qemu must ensure persistent view of the vCPUs to the Guest +kernel even when they are not present in the QoM i.e. are unplugged or are +yet-to-be-plugged + +References: +[1] Check comment 5 in the bugzilla entry + Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5 + +Signed-off-by: Salil Mehta +--- + cpu-common.c | 6 ++++++ + hw/arm/virt.c | 7 +++++++ + include/hw/core/cpu.h | 20 ++++++++++++++++++++ + 3 files changed, 33 insertions(+) + +diff --git a/cpu-common.c b/cpu-common.c +index d041a351ab..da52e45760 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -128,6 +128,12 @@ bool qemu_enabled_cpu(CPUState *cpu) + return cpu && !cpu->disabled; + } + ++bool qemu_persistent_cpu(CPUState *cpu) ++{ ++ /* cpu state can be faked to the guest via acpi */ ++ return cpu->acpi_persistent; ++} ++ + uint64_t qemu_get_cpu_archid(int cpu_index) + { + MachineState *ms = MACHINE(qdev_get_machine()); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 818398e753..91b2653c03 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3104,6 +3104,13 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); ++ ++ /* ++ * To give persistent presence view of vCPUs to the guest, ACPI might need ++ * to fake the presence of the vCPUs to the guest but keep them disabled. ++ * This shall be used during the init of ACPI Hotplug state and hot-unplug ++ */ ++ cs->acpi_persistent = true; + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index fdfb952259..0ca778eb75 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -550,6 +550,13 @@ struct CPUState { + * By default every CPUState is enabled as of now across all archs. + */ + bool disabled; ++ /* ++ * On certain architectures, to give persistent view of the 'presence' of ++ * vCPUs to the guest, ACPI might need to fake the 'presence' of the vCPUs ++ * but keep them ACPI disabled to the guest. This is done by returning ++ * _STA.PRES=True and _STA.Ena=False for the unplugged vCPUs in QEMU QoM. ++ */ ++ bool acpi_persistent; + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; +@@ -957,6 +964,19 @@ bool qemu_present_cpu(CPUState *cpu); + */ + bool qemu_enabled_cpu(CPUState *cpu); + ++/** ++ * qemu_persistent_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU state should always be reflected as *present* via ACPI ++ * to the Guest. By default, this is False on all architectures and has to be ++ * explicity set during initialization. ++ * ++ * Returns: True if it is ACPI 'persistent' CPU ++ * ++ */ ++bool qemu_persistent_cpu(CPUState *cpu); ++ + /** + * qemu_get_cpu_archid: + * @cpu_index: possible vCPU for which arch-id needs to be retreived +-- +2.27.0 + diff --git a/arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch b/arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch deleted file mode 100644 index c2d9a3cb0a48436433a30670e8517d7dafb9bca4..0000000000000000000000000000000000000000 --- a/arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch +++ /dev/null @@ -1,124 +0,0 @@ -From bf47ef282bfe8b0a98e1f87d8708051ffa7192a1 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 13:55:11 +0800 -Subject: [PATCH] arm/virt: Pre-sizing MADT-GICC PPTT GICv3 and Pre-park KVM - vCPU - -Establish all pre-sizing facilities based on cpu_hotplug_enabled -field. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt-acpi-build.c | 12 +++++++++++- - hw/arm/virt.c | 14 ++++++++++++-- - target/arm/kvm.c | 6 +++--- - 3 files changed, 26 insertions(+), 6 deletions(-) - -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index efac788ba1..2cfac7b84f 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -736,6 +736,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base); - gicd->version = vms->gic_version; - -+ if (vms->cpu_hotplug_enabled) { -+ num_cpu = ms->smp.max_cpus; -+ } - for (i = 0; i < num_cpu; i++) { - virt_madt_cpu_entry(NULL, i, possible_cpus, table_data); - } -@@ -902,9 +905,11 @@ static - void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) - { - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -+ MachineState *ms = MACHINE(vms); - GArray *table_offsets; - unsigned dsdt, xsdt; - GArray *tables_blob = tables->table_data; -+ int num_cpus; - - table_offsets = g_array_new(false, true /* clear */, - sizeof(uint32_t)); -@@ -923,7 +928,12 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) - - acpi_add_table(table_offsets, tables_blob); - -- build_pptt(tables_blob, tables->linker, vms->smp_cpus); -+ if (vms->cpu_hotplug_enabled) { -+ num_cpus = ms->smp.max_cpus; -+ } else { -+ num_cpus = ms->smp.cpus; -+ } -+ build_pptt(tables_blob, tables->linker, num_cpus); - - acpi_add_table(table_offsets, tables_blob); - build_madt(tables_blob, tables->linker, vms); -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 304a4c2d31..983084c459 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -767,6 +767,9 @@ static void create_gic(VirtMachineState *vms) - unsigned int smp_cpus = ms->smp.cpus; - uint32_t nb_redist_regions = 0; - -+ if (vms->cpu_hotplug_enabled) { -+ num_cpus = ms->smp.max_cpus; -+ } - assert(num_cpus >= smp_cpus); - - gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); -@@ -1772,8 +1775,15 @@ static void machvirt_init(MachineState *machine) - Object *cpuobj; - CPUState *cs; - -+ if (kvm_enabled() && vms->cpu_hotplug_enabled) { -+ if (kvm_create_parked_vcpu(n) < 0) { -+ error_report("mach-virt: Create KVM parked vCPU failed"); -+ exit(1); -+ } -+ } -+ - if (n >= smp_cpus) { -- break; -+ continue; - } - - cpuobj = object_new(possible_cpus->cpus[n].type); -@@ -1857,7 +1867,7 @@ static void machvirt_init(MachineState *machine) - vms->bootinfo.kernel_filename = machine->kernel_filename; - vms->bootinfo.kernel_cmdline = machine->kernel_cmdline; - vms->bootinfo.initrd_filename = machine->initrd_filename; -- vms->bootinfo.nb_cpus = smp_cpus; -+ vms->bootinfo.nb_cpus = vms->cpu_hotplug_enabled ? max_cpus : smp_cpus; - vms->bootinfo.board_id = -1; - vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; - vms->bootinfo.get_dtb = machvirt_dtb; -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 327b3bc338..4f131f687d 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -202,7 +202,7 @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms) - int kvm_arch_init(MachineState *ms, KVMState *s) - { - int ret = 0; -- unsigned int smp_cpus = ms->smp.cpus; -+ unsigned int max_cpus = ms->smp.max_cpus; - /* For ARM interrupt delivery is always asynchronous, - * whether we are using an in-kernel VGIC or not. - */ -@@ -216,9 +216,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - - cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); - -- if (smp_cpus > 256 && -+ if (max_cpus > 256 && - !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { -- error_report("Using more than 256 vcpus requires a host kernel " -+ error_report("Using more than max 256 vcpus requires a host kernel " - "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); - ret = -EINVAL; - } --- -2.19.1 diff --git a/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch b/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a1198a9d63b5203f251134500fc03dc3847611a --- /dev/null +++ b/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch @@ -0,0 +1,88 @@ +From 097e3b46a7eede0182a846f7b993e14d3eed83b7 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 9 Jun 2020 03:01:08 +0100 +Subject: [PATCH] arm/virt: Release objects for *disabled* possible vCPUs after + init + +During machvirt_init(), QOM ARMCPU objects are also pre-created along with the +corresponding KVM vCPUs in the host for all possible vCPUs. This necessary +because of the architectural constraint, KVM restricts the deferred creation of +the KVM vCPUs and VGIC initialization/sizing after VM init. Hence, VGIC is +pre-sized with possible vCPUs. + +After initialization of the machine is complete disabled possible KVM vCPUs are +then parked at the per-virt-machine list "kvm_parked_vcpus" and we release the +QOM ARMCPU objects for the disabled vCPUs. These shall be re-created at the time +when vCPU is hotplugged again. QOM ARMCPU object is then re-attached with +corresponding parked KVM vCPU. + +Alternatively, we could've never released the QOM CPU objects and kept on +reusing. This approach might require some modifications of qdevice_add() +interface to get old ARMCPU object instead of creating a new one for the hotplug +request. + +Each of the above approaches come with their own pros and cons. This prototype +uses the 1st approach.(suggestions are welcome!) + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 91b2653c03..bf385a469c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2060,6 +2060,7 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + { + CPUArchIdList *possible_cpus = vms->parent.possible_cpus; + int max_cpus = MACHINE(vms)->smp.max_cpus; ++ MachineState *ms = MACHINE(vms); + bool aarch64, steal_time; + CPUState *cpu; + int n; +@@ -2120,6 +2121,37 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + } ++ ++ if (kvm_enabled() || tcg_enabled()) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ ++ /* ++ * Now, GIC has been sized with possible CPUs and we dont require ++ * disabled vCPU objects to be represented in the QOM. Release the ++ * disabled ARMCPU objects earlier used during init for pre-sizing. ++ * ++ * We fake to the guest through ACPI about the presence(_STA.PRES=1) ++ * of these non-existent vCPUs at VMM/qemu and present these as ++ * disabled vCPUs(_STA.ENA=0) so that they cant be used. These vCPUs ++ * can be later added to the guest through hotplug exchanges when ++ * ARMCPU objects are created back again using 'device_add' QMP ++ * command. ++ */ ++ /* ++ * RFC: Question: Other approach could've been to keep them forever ++ * and release it only once when qemu exits as part of finalize or ++ * when new vCPU is hotplugged. In the later old could be released ++ * for the newly created object for the same vCPU? ++ */ ++ if (!qemu_enabled_cpu(cpu)) { ++ CPUArchId *cpu_slot; ++ cpu_slot = virt_find_cpu_slot(ms, cpu->cpu_index); ++ cpu_slot->cpu = NULL; ++ object_unref(OBJECT(cpu)); ++ } ++ } ++ } + } + + static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, +-- +2.27.0 + diff --git a/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch b/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..d64e1a07a1cdb9de75b21fe11f1cac4b340a596f --- /dev/null +++ b/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch @@ -0,0 +1,55 @@ +From 519699c61eeb980bb7d7f443eb95c0406aae82da Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:05:39 +0800 +Subject: [PATCH] arm/virt: Require mc->has_hotpluggable_cpus for cold-plugged + vcpu + +Cold-plugged vCPU also need mc->has_hotpluggable_cpus. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 00e57f2d75..73b29c7f73 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3179,16 +3179,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; + +- if (dev->hotplugged && !vms->acpi_dev) { +- error_setg(errp, "GED acpi device does not exists"); +- return; +- } +- +- if (dev->hotplugged && !mc->has_hotpluggable_cpus) { +- error_setg(errp, "CPU hotplug not supported on this machine"); +- return; +- } +- + /* sanity check the cpu */ + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +@@ -3222,6 +3212,17 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + + cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev); + ++ /* Except for cold-booted vCPUs, this should check presence of ACPI GED */ ++ if (cs->cpu_index >= ms->smp.cpus && !vms->acpi_dev) { ++ error_setg(errp, "GED acpi device does not exists"); ++ return; ++ } ++ ++ if (cs->cpu_index >= ms->smp.cpus && !mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU [cold|hot]plug not supported on this machine"); ++ return; ++ } ++ + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + if (qemu_present_cpu(CPU(cpu_slot->cpu))) { + error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist", +-- +2.27.0 + diff --git a/arm-virt-Set-vcpus_count-of-CPU-as-1-to-compatible-w.patch b/arm-virt-Set-vcpus_count-of-CPU-as-1-to-compatible-w.patch new file mode 100644 index 0000000000000000000000000000000000000000..76e00a1d2507d3edcd1415b9e59d5efdbabe0ce9 --- /dev/null +++ b/arm-virt-Set-vcpus_count-of-CPU-as-1-to-compatible-w.patch @@ -0,0 +1,38 @@ +From 85d1711807bc1ec0118cdc9f7cbf9a6e6b96db76 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Thu, 23 May 2024 15:51:35 +0800 +Subject: [PATCH] arm/virt: Set vcpus_count of CPU as 1 to compatible with + libvirt + +If vcpus_count is greater than 1, use libvirt to hotplug vcpu +will fail: "An error occurred, but the cause is unknown". + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index dfe4d9e129..a6e324c6f8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3064,7 +3064,6 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + { + int n; + unsigned int max_cpus = ms->smp.max_cpus; +- unsigned int smp_threads = ms->smp.threads; + VirtMachineState *vms = VIRT_MACHINE(ms); + MachineClass *mc = MACHINE_GET_CLASS(vms); + +@@ -3078,7 +3077,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].type = ms->cpu_type; +- ms->possible_cpus->cpus[n].vcpus_count = smp_threads; ++ ms->possible_cpus->cpus[n].vcpus_count = 1; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); + +-- +2.41.0.windows.1 + diff --git a/arm-virt-Start-up-CPU-hot-plug.patch b/arm-virt-Start-up-CPU-hot-plug.patch deleted file mode 100644 index 5ba620a2215710682afa4ccdfd0d5cad53556680..0000000000000000000000000000000000000000 --- a/arm-virt-Start-up-CPU-hot-plug.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 11f9628ceff019259ff12ce469deafbf50eb3075 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 14:20:59 +0800 -Subject: [PATCH] arm/virt: Start up CPU hot-plug - -All the CPU hotplug facilities are ready. Assemble them -to start up CPU hot-plug capability for arm/virt. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 61 ++++++++++++++++++++++++++++++++++++++++--- - include/hw/arm/virt.h | 1 + - qom/cpu.c | 5 ++++ - target/arm/cpu.c | 2 ++ - 4 files changed, 65 insertions(+), 4 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c6a99e683a..112a6ae7cb 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -48,6 +48,8 @@ - #include "sysemu/cpus.h" - #include "sysemu/sysemu.h" - #include "sysemu/kvm.h" -+#include "sysemu/cpus.h" -+#include "sysemu/hw_accel.h" - #include "hw/loader.h" - #include "exec/address-spaces.h" - #include "qemu/bitops.h" -@@ -649,9 +651,9 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) - event |= ACPI_GED_MEM_HOTPLUG_EVT; - } - -- /* event |= ACPI_GED_CPU_HOTPLUG_EVT; -- * Currently CPU hotplug is not enabled. -- */ -+ if (vms->cpu_hotplug_enabled) { -+ event |= ACPI_GED_CPU_HOTPLUG_EVT; -+ } - - dev = qdev_create(NULL, TYPE_ACPI_GED); - qdev_prop_set_uint32(dev, "ged-event", event); -@@ -2214,12 +2216,62 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - object_property_set_link(cpuobj, OBJECT(secure_sysmem), - "secure-memory", &error_abort); - } -+ -+ /* If we use KVM accel, we should pause all vcpus to -+ * allow hot access of vcpu registers. -+ */ -+ if (dev->hotplugged && kvm_enabled()) { -+ pause_all_vcpus(); -+ } - } - - static void virt_cpu_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -- /* Currently nothing to do */ -+ CPUArchId *cpu_slot; -+ CPUState *cs = CPU(dev); -+ int ncpu = cs->cpu_index; -+ MachineState *ms = MACHINE(hotplug_dev); -+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); -+ GICv3State *gicv3; -+ ARMGICv3CommonClass *agcc; -+ Error *local_err = NULL; -+ -+ if (dev->hotplugged) { -+ /* Realize GIC related parts of CPU */ -+ assert(vms->gic_version == 3); -+ gicv3 = ARM_GICV3_COMMON(vms->gic); -+ agcc = ARM_GICV3_COMMON_GET_CLASS(gicv3); -+ agcc->cpu_hotplug_realize(gicv3, ncpu); -+ connect_gic_cpu_irqs(vms, ncpu); -+ -+ /* Register CPU reset and trigger it manually */ -+ cpu_synchronize_state(cs); -+ cpu_hotplug_register_reset(ncpu); -+ cpu_hotplug_reset_manually(ncpu); -+ cpu_synchronize_post_reset(cs); -+ -+ if (kvm_enabled()) { -+ resume_all_vcpus(); -+ } -+ } -+ -+ if (vms->acpi_dev) { -+ hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); -+ if (local_err) { -+ goto out; -+ } -+ } -+ -+ vms->boot_cpus++; -+ if (vms->fw_cfg) { -+ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); -+ } -+ -+ cpu_slot = &ms->possible_cpus->cpus[ncpu]; -+ cpu_slot->cpu = OBJECT(dev); -+out: -+ error_propagate(errp, local_err); - } - - static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, -@@ -2324,6 +2376,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15"); - mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; - mc->kvm_type = virt_kvm_type; -+ mc->has_hotpluggable_cpus = true; - assert(!mc->get_hotplug_handler); - mc->get_hotplug_handler = virt_machine_get_hotplug_handler; - hc->pre_plug = virt_machine_device_pre_plug_cb; -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index b4c53d920e..a9429bed25 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -140,6 +140,7 @@ typedef struct { - uint32_t msi_phandle; - uint32_t iommu_phandle; - int psci_conduit; -+ uint32_t boot_cpus; - hwaddr highest_gpa; - DeviceState *gic; - DeviceState *acpi_dev; -diff --git a/qom/cpu.c b/qom/cpu.c -index f376f782d8..58cd9d5bbc 100644 ---- a/qom/cpu.c -+++ b/qom/cpu.c -@@ -342,7 +342,12 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp) - - if (dev->hotplugged) { - cpu_synchronize_post_init(cpu); -+ -+#ifdef __aarch64__ -+ if (!kvm_enabled()) -+#endif - cpu_resume(cpu); -+ - } - - /* NOTE: latest generic point where the cpu is fully realized */ -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 91f1e36cd8..811e5c6365 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2598,6 +2598,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) - acc->parent_reset = cc->reset; - cc->reset = arm_cpu_reset; - -+ dc->user_creatable = true; -+ - cc->class_by_name = arm_cpu_class_by_name; - cc->has_work = arm_cpu_has_work; - cc->cpu_exec_interrupt = arm_cpu_exec_interrupt; --- -2.19.1 diff --git a/arm-virt-Support-CPU-cold-plug.patch b/arm-virt-Support-CPU-cold-plug.patch deleted file mode 100644 index 3f96fede24c1e4f18d0f05c1987b20cc5a883b93..0000000000000000000000000000000000000000 --- a/arm-virt-Support-CPU-cold-plug.patch +++ /dev/null @@ -1,92 +0,0 @@ -From e3a1af72fca5bbcc840fba44d512bbe69ec55ca9 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Tue, 12 May 2020 15:05:06 +0800 -Subject: [PATCH] arm/virt: Support CPU cold plug - -This adds CPU cold plug support to arm virt machine board. -CPU cold plug means adding CPU by using "-device xx-arm-cpu" -when we bring up Qemu. - -Signed-off-by: Keqian Zhu ---- - hw/arm/virt.c | 36 +++++++++++++++++++----------------- - 1 file changed, 19 insertions(+), 17 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 112a6ae7cb..4c7279392f 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2093,25 +2093,12 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - int smp_cores = ms->smp.cores; - int smp_threads = ms->smp.threads; - -- /* Some hotplug capability checks */ -- - if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { - error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", - ms->cpu_type); - return; - } - -- if (dev->hotplugged && !vms->acpi_dev) { -- error_setg(errp, "CPU hotplug is disabled: missing acpi device."); -- return; -- } -- -- if (dev->hotplugged && !vms->cpu_hotplug_enabled) { -- error_setg(errp, "CPU hotplug is disabled: " -- "should use AArch64 CPU and GICv3."); -- return; -- } -- - /* if cpu idx is not set, set it based on socket/core/thread properties */ - if (cs->cpu_index == UNASSIGNED_CPU_INDEX) { - int max_socket = ms->smp.max_cpus / smp_threads / smp_cores; -@@ -2137,6 +2124,20 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - cs->cpu_index = idx_from_topo_ids(smp_cores, smp_threads, &topo); - } - -+ /* Some hotplug capability checks */ -+ if (cs->cpu_index >= ms->smp.cpus) { -+ if (!vms->acpi_dev) { -+ error_setg(errp, "CPU cold/hot plug is disabled: " -+ "missing acpi device."); -+ return; -+ } -+ if (!vms->cpu_hotplug_enabled) { -+ error_setg(errp, "CPU cold/hot plug is disabled: " -+ "should use AArch64 CPU and GICv3."); -+ return; -+ } -+ } -+ - /* if 'address' properties socket-id/core-id/thread-id are not set, set them - * so that machine_query_hotpluggable_cpus would show correct values - */ -@@ -2237,7 +2238,8 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, - ARMGICv3CommonClass *agcc; - Error *local_err = NULL; - -- if (dev->hotplugged) { -+ /* For CPU that is cold/hot plugged */ -+ if (ncpu >= ms->smp.cpus) { - /* Realize GIC related parts of CPU */ - assert(vms->gic_version == 3); - gicv3 = ARM_GICV3_COMMON(vms->gic); -@@ -2250,10 +2252,10 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, - cpu_hotplug_register_reset(ncpu); - cpu_hotplug_reset_manually(ncpu); - cpu_synchronize_post_reset(cs); -+ } - -- if (kvm_enabled()) { -- resume_all_vcpus(); -- } -+ if (dev->hotplugged && kvm_enabled()) { -+ resume_all_vcpus(); - } - - if (vms->acpi_dev) { --- -2.19.1 - diff --git a/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch b/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..a45f47dea256e42e6e8b8f1817f724c9cac6cb10 --- /dev/null +++ b/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch @@ -0,0 +1,123 @@ +From afb71c88d935349cdf9763e8f51f77334ab615ec Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 18:54:10 +0100 +Subject: [PATCH] arm/virt: Update the guest(via GED) about CPU hot-(un)plug + events + +During any vCPU hot-(un)plug, running guest VM needs to be intimated about the +new vCPU being added or request the deletion of the vCPU which is already part +of the guest VM. This is done using the ACPI GED event which eventually gets +demultiplexed to a CPU hotplug event and further to specific hot-(un)plug event +of a particular vCPU. + +This change adds the ACPI calls to the existing hot-(un)plug hooks to trigger +ACPI GED events from QEMU to guest VM. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 33 ++++++++++++++++++++++++++++++--- + 1 file changed, 30 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0312fa366d..60cd560ab9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3256,6 +3256,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + CPUArchId *cpu_slot; + + /* insert the cold/hot-plugged vcpu in the slot */ +@@ -3268,12 +3269,20 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * plugged, guest is also notified. + */ + if (vms->acpi_dev) { +- /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */ ++ HotplugHandlerClass *hhc; ++ /* update acpi hotplug state and send cpu hotplug event to guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + /* TODO: register cpu for reset & update F/W info for the next boot */ + } + + cs->disabled = false; + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, +@@ -3281,8 +3290,10 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + { + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + + if (!vms->acpi_dev || !dev->realized) { + error_setg(errp, "GED does not exists or device is not realized!"); +@@ -3301,9 +3312,16 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + +- /* TODO: request cpu hotplug from guest */ ++ /* request cpu hotplug from guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -3311,7 +3329,9 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + CPUArchId *cpu_slot; + + if (!vms->acpi_dev || !dev->realized) { +@@ -3321,7 +3341,12 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + +- /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ ++ /* update the acpi cpu hotplug state for cpu hot-unplug */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + + unwire_gic_cpu_irqs(vms, cs); + virt_update_gic(vms, cs); +@@ -3335,6 +3360,8 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + cs->disabled = true; + + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, +-- +2.27.0 + diff --git a/arm-virt-Use-max_cpus-to-calculate-redist1_count.patch b/arm-virt-Use-max_cpus-to-calculate-redist1_count.patch new file mode 100644 index 0000000000000000000000000000000000000000..2a40a2ace8b01d0d7897c1c1f61e248995cd7780 --- /dev/null +++ b/arm-virt-Use-max_cpus-to-calculate-redist1_count.patch @@ -0,0 +1,29 @@ +From 4a3d9e9dc874f6825b8b5f18a4dece1609d48d2f Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 15 Apr 2024 22:40:29 +0800 +Subject: [PATCH] arm/virt: Use max_cpus to calculate redist1_count + +When cpu hotplug is enabled, the redist1_count should +include all possible cpus. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 44931355d6..e4473354d4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -959,7 +959,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); + + qlist_append_int(redist_region_count, +- MIN(smp_cpus - redist0_count, redist1_capacity)); ++ MIN(max_cpus - redist0_count, redist1_capacity)); + } + qdev_prop_set_array(vms->gic, "redist-region-count", + redist_region_count); +-- +2.41.0 + diff --git a/arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch b/arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c5d0e75a15ba0adf3a36d38e29f44d5437ad658 --- /dev/null +++ b/arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch @@ -0,0 +1,226 @@ +From 0ec1c95eea8c68243919ee4f8cd28b9a97dfc2f0 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 15 Apr 2024 22:37:53 +0800 +Subject: [PATCH] arm/virt: Use separate filed to identify cpu-hotplug enable + +The mc->has_hotpluggable_cpus should not be modified after +machine class init. + +Signed-off-by: Keqian Zhu +--- + accel/kvm/kvm-all.c | 6 ++++++ + hw/arm/virt-acpi-build.c | 13 +++++-------- + hw/arm/virt.c | 20 +++++++++++++------- + include/hw/arm/virt.h | 1 + + include/sysemu/kvm.h | 2 ++ + include/sysemu/kvm_int.h | 1 + + target/arm/kvm.c | 7 +++---- + 7 files changed, 31 insertions(+), 19 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 75a3075c14..b791aad1d6 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -3603,6 +3603,11 @@ bool kvm_kernel_irqchip_split(void) + return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON; + } + ++bool kvm_smccc_filter_enabled(void) ++{ ++ return kvm_state->kvm_smccc_filter_enabled; ++} ++ + static void kvm_get_dirty_ring_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +@@ -3648,6 +3653,7 @@ static void kvm_accel_instance_init(Object *obj) + /* KVM dirty ring is by default off */ + s->kvm_dirty_ring_size = 0; + s->kvm_dirty_ring_with_bitmap = false; ++ s->kvm_smccc_filter_enabled = false; + s->kvm_eager_split_size = 0; + s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN; + s->notify_window = 0; +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 46642efac4..99296fc6d8 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -779,12 +779,10 @@ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) + build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */ + } + +-static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) ++static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu, VirtMachineState *vms) + { +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- + /* can only exist in 'enabled' state */ +- if (!mc->has_hotpluggable_cpus) { ++ if (!vms->cpu_hotplug_enabled) { + return 1; + } + +@@ -842,7 +840,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; + uint32_t pmu_interrupt = vms->pmu ? VIRTUAL_PMU_IRQ : 0; +- uint32_t flags = virt_acpi_get_gicc_flags(cpu); ++ uint32_t flags = virt_acpi_get_gicc_flags(cpu, vms); + uint64_t mpidr = qemu_get_cpu_archid(i); + + if (vms->gic_version == VIRT_GIC_VERSION_2) { +@@ -1003,7 +1001,6 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); +- MachineClass *mc = MACHINE_GET_CLASS(vms); + Aml *scope, *dsdt; + MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; +@@ -1020,8 +1017,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ +- if (mc->has_hotpluggable_cpus) { ++ ++ if (vms->cpu_hotplug_enabled) { + CPUHotplugFeatures opts = { + .acpi_1_compatible = false, + .has_legacy_cphp = false +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 73b29c7f73..44931355d6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -756,7 +756,7 @@ static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms) + { + MachineClass *mc = MACHINE_GET_CLASS(vms); + +- if (mc->has_hotpluggable_cpus) { ++ if (mc->has_hotpluggable_cpus && vms->gic_version >= VIRT_GIC_VERSION_3) { + Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic); + notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier); + } +@@ -2498,11 +2498,16 @@ static void machvirt_init(MachineState *machine) + has_ged = has_ged && firmware_loaded && + virt_is_acpi_enabled(vms) && + !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); ++ + if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (kvm_enabled() && !kvm_smccc_filter_enabled()) || + (vms->gic_version < VIRT_GIC_VERSION_3) || !has_ged) { +- mc->has_hotpluggable_cpus = false; ++ vms->cpu_hotplug_enabled = false; ++ } else { ++ vms->cpu_hotplug_enabled = true; + } +- if (!mc->has_hotpluggable_cpus) { ++ ++ if (!vms->cpu_hotplug_enabled) { + if (machine->smp.max_cpus > smp_cpus) { + warn_report("cpu hotplug feature has been disabled"); + } +@@ -3174,7 +3179,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); +- MachineClass *mc = MACHINE_GET_CLASS(ms); + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +@@ -3218,7 +3222,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + +- if (cs->cpu_index >= ms->smp.cpus && !mc->has_hotpluggable_cpus) { ++ if (cs->cpu_index >= ms->smp.cpus && !vms->cpu_hotplug_enabled) { + error_setg(errp, "CPU [cold|hot]plug not supported on this machine"); + return; + } +@@ -3304,7 +3308,6 @@ fail: + static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + HotplugHandlerClass *hhc; + ARMCPU *cpu = ARM_CPU(dev); +@@ -3316,7 +3319,7 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + +- if (!mc->has_hotpluggable_cpus) { ++ if (!vms->cpu_hotplug_enabled) { + error_setg(errp, "CPU hot(un)plug not supported on this machine"); + return; + } +@@ -3780,6 +3783,9 @@ static void virt_instance_init(Object *obj) + /* EL2 is also disabled by default, for similar reasons */ + vms->virt = false; + ++ /* CPU hotplug is enabled by default */ ++ vms->cpu_hotplug_enabled = true; ++ + /* High memory is enabled by default */ + vms->highmem = true; + vms->highmem_compact = !vmc->no_highmem_compact; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index ae0f5beb26..138531f9c1 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -153,6 +153,7 @@ struct VirtMachineState { + bool its; + bool tcg_its; + bool virt; ++ bool cpu_hotplug_enabled; + bool ras; + bool mte; + bool dtb_randomness; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index e534411ddc..cfa77cc15b 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -492,6 +492,8 @@ bool kvm_kernel_irqchip_allowed(void); + bool kvm_kernel_irqchip_required(void); + bool kvm_kernel_irqchip_split(void); + ++bool kvm_smccc_filter_enabled(void); ++ + /** + * kvm_arch_irqchip_create: + * @KVMState: The KVMState pointer +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index fd846394be..b2d2c59477 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -112,6 +112,7 @@ struct KVMState + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + bool kvm_dirty_ring_with_bitmap; ++ bool kvm_smccc_filter_enabled; + uint64_t kvm_eager_split_size; /* Eager Page Splitting chunk size */ + struct KVMDirtyRingReaper reaper; + NotifyVmexitOption notify_vmexit; +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 19783d567f..12c1b4b328 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -321,12 +321,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, + KVM_SMCCC_FILTER_FWD_TO_USER)) { + error_report("CPU On PSCI-to-user-space fwd filter install failed"); +- mc->has_hotpluggable_cpus = false; +- } +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ } else if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, + KVM_SMCCC_FILTER_FWD_TO_USER)) { + error_report("CPU Off PSCI-to-user-space fwd filter install failed"); +- mc->has_hotpluggable_cpus = false; ++ } else { ++ s->kvm_smccc_filter_enabled = true; + } + } + +-- +2.41.0 + diff --git a/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch b/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch new file mode 100644 index 0000000000000000000000000000000000000000..cde5af36c19f45400e9c75b3755d57fcd19967ba --- /dev/null +++ b/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch @@ -0,0 +1,43 @@ +From bea23b0f82cedbd860b66c7b9e1f6bb0ca85d1cf Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 17:05:30 +0000 +Subject: [PATCH] arm/virt/acpi: Build CPUs AML with CPU Hotplug support + +Support of vCPU Hotplug requires sequence of ACPI handshakes between Qemu and +Guest kernel when a vCPU is plugged or unplugged. Most of the AML code to +support these handshakes already exists. This AML need to be build during VM +init for ARM architecture as well if the GED support exists. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 084c8abc7c..d88f3cded1 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -937,7 +937,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms); ++ /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ ++ if (vms->acpi_dev) { ++ CPUHotplugFeatures opts = { ++ .acpi_1_compatible = false, ++ .has_legacy_cphp = false ++ }; ++ ++ build_cpus_aml(scope, ms, opts, NULL, virt_acpi_dsdt_cpu_cppc, ++ memmap[VIRT_CPUHP_ACPI].base, ++ "\\_SB", NULL, AML_SYSTEM_MEMORY); ++ } else { ++ acpi_dsdt_add_cpus(scope, vms); ++ } + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch b/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch index 297ccf633fe69840b863f10109ee4271f16c11a1..b4efd1f14b68b65373b69f4f2508cd507873b1d2 100644 --- a/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch +++ b/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch @@ -1,9 +1,9 @@ -From 91fed8840b004ec7bc91969afa10f03e13f311c4 Mon Sep 17 00:00:00 2001 +From fb27704692362d151eb191f0c687ded09b04e04c Mon Sep 17 00:00:00 2001 From: Keqian Zhu -Date: Wed, 22 Apr 2020 19:52:58 +0800 +Date: Sun, 28 Apr 2024 14:14:07 +0800 Subject: [PATCH] arm/virt/acpi: Extend cpufreq to support max_cpus -We will support CPU hotplug soon, so extend memory region size to +We support vcpu hotplug now, so extend memory region size to allow hotplugged CPU access cpufreq space. Signed-off-by: Keqian Zhu @@ -12,21 +12,21 @@ Signed-off-by: Keqian Zhu 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c -index d02a25a6de..38dcab5683 100644 +index a84db490b3..a76f7b8fa2 100644 --- a/hw/acpi/cpufreq.c +++ b/hw/acpi/cpufreq.c -@@ -84,6 +84,7 @@ typedef struct CpuhzState { +@@ -83,6 +83,7 @@ typedef struct CpuhzState { uint32_t PerformanceLimited; uint32_t LowestFreq; uint32_t NominalFreq; + uint32_t num_cpu; uint32_t reg_size; } CpuhzState; - -@@ -95,10 +96,7 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, + +@@ -93,10 +94,7 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size) uint64_t r; uint64_t n; - + - MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int smp_cpus = ms->smp.cpus; - @@ -35,7 +35,7 @@ index d02a25a6de..38dcab5683 100644 warn_report("cpufreq_read: offset 0x%lx out of range", offset); return 0; } -@@ -166,11 +164,10 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, +@@ -163,11 +161,10 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size) static void cpufreq_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { @@ -43,23 +43,24 @@ index d02a25a6de..38dcab5683 100644 uint64_t n; - MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int smp_cpus = ms->smp.cpus; - + - if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { + if (offset >= s->num_cpu * CPPC_REG_PER_CPU_STRIDE) { error_printf("cpufreq_write: offset 0x%lx out of range", offset); return; } -@@ -251,9 +248,9 @@ static void cpufreq_init(Object *obj) +@@ -248,9 +245,9 @@ static void cpufreq_init(Object *obj) CpuhzState *s = CPUFREQ(obj); - + MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int smp_cpus = ms->smp.cpus; + s->num_cpu = ms->smp.max_cpus; - + - s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE; + s->reg_size = s->num_cpu * CPPC_REG_PER_CPU_STRIDE; if (s->reg_size > MAX_SUPPORT_SPACE) { error_report("Required space 0x%x excesses the max support 0x%x", s->reg_size, MAX_SUPPORT_SPACE); --- -2.19.1 +-- +2.27.0 + diff --git a/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch index f08f83de6505d7810cb7b6753bd00034e3921862..1a599cbfe434992133a647dd0b1f0e278a649397 100644 --- a/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch +++ b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch @@ -1,51 +1,34 @@ -From 2fdece10dac6161cb6c1f0f05247391aa3269eed Mon Sep 17 00:00:00 2001 +From 2d5040ce21af5fc02a8588456be7316fcd5bc2a0 Mon Sep 17 00:00:00 2001 From: Keqian Zhu -Date: Wed, 22 Apr 2020 15:58:27 +0800 +Date: Tue, 2 Apr 2024 16:36:38 +0800 Subject: [PATCH] arm/virt/acpi: Factor out CPPC building from DSDT CPU aml When CPU hotplug is enabled, we will use build_cpus_aml instead of -acpi_dsdt_add_cpus, so factor out CPPC building and we can reuse it -in build_cpus_aml. +acpi_dsdt_add_cpus, so factor out CPPC building to reuse it. Signed-off-by: Keqian Zhu --- - hw/acpi/generic_event_device.c | 1 + - hw/arm/virt-acpi-build.c | 33 +++++++++++++++++----------- - include/hw/acpi/acpi_dev_interface.h | 2 ++ - include/hw/arm/virt.h | 2 ++ - 4 files changed, 25 insertions(+), 13 deletions(-) + hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) -diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c -index b834ae3ff6..82139b4314 100644 ---- a/hw/acpi/generic_event_device.c -+++ b/hw/acpi/generic_event_device.c -@@ -289,6 +289,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) - - adevc->send_event = acpi_ged_send_event; - adevc->madt_cpu = virt_madt_cpu_entry; -+ adevc->cpu_cppc = virt_acpi_dsdt_cpu_cppc; - } - - static const TypeInfo acpi_ged_info = { diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index 4b6aace433..8b68a15d76 100644 +index 48fc77fb83..084c8abc7c 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c -@@ -111,8 +111,24 @@ static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) +@@ -123,8 +123,23 @@ static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) aml_append(dev, aml_name_decl("_CPC", cpc)); } - --static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, -- const MemMapEntry *cppc_memmap) -+void virt_acpi_dsdt_cpu_cppc(AcpiDeviceIf *adev, int ncpu, int num_cpu, Aml *dev) -+{ + +-static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, +- const MemMapEntry *cppc_memmap) ++static void virt_acpi_dsdt_cpu_cppc(int ncpu, int num_cpu, Aml *dev) { + VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine()); + const MemMapEntry *cppc_memmap = &vms->memmap[VIRT_CPUFREQ]; + + /* -+ * Append _CPC and _PSD to support CPU frequence show -+ * Check CPPC available by DESIRED_PERF register -+ */ ++ * Append _CPC and _PSD to support CPU frequence show ++ * Check CPPC available by DESIRED_PERF register ++ */ + if (cppc_regs_offset[DESIRED_PERF] != -1) { + acpi_dsdt_add_cppc(dev, + cppc_memmap->base + ncpu * CPPC_REG_PER_CPU_STRIDE, @@ -54,15 +37,15 @@ index 4b6aace433..8b68a15d76 100644 + } +} + -+static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, VirtMachineState *vms) ++static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) { + MachineState *ms = MACHINE(vms); uint16_t i; - -@@ -121,16 +137,7 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, +@@ -134,18 +149,9 @@ static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); aml_append(dev, aml_name_decl("_UID", aml_int(i))); - -- /* + +- /* - * Append _CPC and _PSD to support CPU frequence show - * Check CPPC available by DESIRED_PERF register - */ @@ -70,52 +53,24 @@ index 4b6aace433..8b68a15d76 100644 - acpi_dsdt_add_cppc(dev, - cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, - cppc_regs_offset); -- acpi_dsdt_add_psd(dev, smp_cpus); +- acpi_dsdt_add_psd(dev, ms->smp.cpus); - } -+ virt_acpi_dsdt_cpu_cppc(NULL, i, smp_cpus, dev); - - aml_append(scope, dev); ++ virt_acpi_dsdt_cpu_cppc(i, ms->smp.cpus, dev); + +- aml_append(scope, dev); ++ aml_append(scope, dev); } -@@ -810,7 +817,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + } + +@@ -931,7 +937,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) * the RTC ACPI device at all when using UEFI. */ scope = aml_scope("\\_SB"); -- acpi_dsdt_add_cpus(scope, vms->smp_cpus, &memmap[VIRT_CPUFREQ]); -+ acpi_dsdt_add_cpus(scope, vms->smp_cpus, vms); +- acpi_dsdt_add_cpus(scope, vms, &memmap[VIRT_CPUFREQ]); ++ acpi_dsdt_add_cpus(scope, vms); acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], (irqmap[VIRT_UART] + ARM_SPI_BASE)); - acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); -diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h -index adcb3a816c..2952914569 100644 ---- a/include/hw/acpi/acpi_dev_interface.h -+++ b/include/hw/acpi/acpi_dev_interface.h -@@ -3,6 +3,7 @@ - - #include "qom/object.h" - #include "hw/boards.h" -+#include "hw/acpi/aml-build.h" - - /* These values are part of guest ABI, and can not be changed */ - typedef enum { -@@ -55,5 +56,6 @@ typedef struct AcpiDeviceIfClass { - void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); - void (*madt_cpu)(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *apic_ids, GArray *entry); -+ void (*cpu_cppc)(AcpiDeviceIf *adev, int uid, int num_cpu, Aml *dev); - } AcpiDeviceIfClass; - #endif -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 6b1f10b231..cbdea7ff32 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -157,6 +157,8 @@ typedef struct { - void virt_acpi_setup(VirtMachineState *vms); - void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *cpu_list, GArray *entry); -+void virt_acpi_dsdt_cpu_cppc(AcpiDeviceIf *adev, int uid, -+ int num_cpu, Aml *dev); + if (vmc->acpi_expose_flash) { +-- +2.27.0 - /* Return the number of used redistributor regions */ - static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) --- -2.19.1 diff --git a/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch b/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch new file mode 100644 index 0000000000000000000000000000000000000000..c323c440952a681f11f46ce4f1f3bcfbcb954f91 --- /dev/null +++ b/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch @@ -0,0 +1,38 @@ +From 0bee56446962676992d11e5879f6fbac57e785e8 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:38:31 +0800 +Subject: [PATCH] arm/virt-acpi: Require possible_cpu_arch_ids for + build_cpus_aml() + +As the acpi_dev requires possible_cpu_arch_ids to support +vcpu hotplug. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt-acpi-build.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 590afcfa98..46642efac4 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -1003,6 +1003,7 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); + Aml *scope, *dsdt; + MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; +@@ -1020,7 +1021,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + */ + scope = aml_scope("\\_SB"); + /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ +- if (vms->acpi_dev) { ++ if (mc->has_hotpluggable_cpus) { + CPUHotplugFeatures opts = { + .acpi_1_compatible = false, + .has_legacy_cphp = false +-- +2.27.0 + diff --git a/arm-virt-gic-Construct-irqs-connection-from-create_g.patch b/arm-virt-gic-Construct-irqs-connection-from-create_g.patch deleted file mode 100644 index 7e9506425f1de938d2ba70a3fb31a83561a4154e..0000000000000000000000000000000000000000 --- a/arm-virt-gic-Construct-irqs-connection-from-create_g.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 92124743f4560c490780a229f53ea5881f706383 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Sun, 5 Apr 2020 15:29:16 +0800 -Subject: [PATCH] arm/virt/gic: Construct irqs connection from create_gic - -Make the irqs can be connected to for individual CPU. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 90 ++++++++++++++++++++++++++++----------------------- - 1 file changed, 49 insertions(+), 41 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 83f4887e57..55d403bad6 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -706,6 +706,54 @@ static void create_v2m(VirtMachineState *vms) - fdt_add_v2m_gic_node(vms); - } - -+static void connect_gic_cpu_irqs(VirtMachineState *vms, int i) -+{ -+ DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); -+ SysBusDevice *gicbusdev = SYS_BUS_DEVICE(vms->gic); -+ int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; -+ int num_cpus = object_property_get_uint(OBJECT(vms->gic), "num-cpu", NULL); -+ int gic_type = vms->gic_version; -+ int irq; -+ /* Mapping from the output timer irq lines from the CPU to the -+ * GIC PPI inputs we use for the virt board. -+ */ -+ const int timer_irq[] = { -+ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, -+ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, -+ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, -+ [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, -+ }; -+ -+ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { -+ qdev_connect_gpio_out(cpudev, irq, -+ qdev_get_gpio_in(vms->gic, -+ ppibase + timer_irq[irq])); -+ } -+ -+ if (gic_type == 3) { -+ qemu_irq irq = qdev_get_gpio_in(vms->gic, -+ ppibase + ARCH_GIC_MAINT_IRQ); -+ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", -+ 0, irq); -+ } else if (vms->virt) { -+ qemu_irq irq = qdev_get_gpio_in(vms->gic, -+ ppibase + ARCH_GIC_MAINT_IRQ); -+ sysbus_connect_irq(gicbusdev, i + 4 * num_cpus, irq); -+ } -+ -+ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, -+ qdev_get_gpio_in(vms->gic, ppibase -+ + VIRTUAL_PMU_IRQ)); -+ -+ sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); -+ sysbus_connect_irq(gicbusdev, i + num_cpus, -+ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); -+ sysbus_connect_irq(gicbusdev, i + 2 * num_cpus, -+ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); -+ sysbus_connect_irq(gicbusdev, i + 3 * num_cpus, -+ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); -+} -+ - static void create_gic(VirtMachineState *vms) - { - MachineState *ms = MACHINE(vms); -@@ -775,47 +823,7 @@ static void create_gic(VirtMachineState *vms) - * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. - */ - for (i = 0; i < smp_cpus; i++) { -- DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); -- int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; -- int irq; -- /* Mapping from the output timer irq lines from the CPU to the -- * GIC PPI inputs we use for the virt board. -- */ -- const int timer_irq[] = { -- [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, -- [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, -- [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, -- [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, -- }; -- -- for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { -- qdev_connect_gpio_out(cpudev, irq, -- qdev_get_gpio_in(vms->gic, -- ppibase + timer_irq[irq])); -- } -- -- if (type == 3) { -- qemu_irq irq = qdev_get_gpio_in(vms->gic, -- ppibase + ARCH_GIC_MAINT_IRQ); -- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", -- 0, irq); -- } else if (vms->virt) { -- qemu_irq irq = qdev_get_gpio_in(vms->gic, -- ppibase + ARCH_GIC_MAINT_IRQ); -- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); -- } -- -- qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, -- qdev_get_gpio_in(vms->gic, ppibase -- + VIRTUAL_PMU_IRQ)); -- -- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); -- sysbus_connect_irq(gicbusdev, i + smp_cpus, -- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); -- sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus, -- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); -- sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, -- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); -+ connect_gic_cpu_irqs(vms, i); - } - - fdt_add_gic_node(vms); --- -2.19.1 diff --git a/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch b/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch new file mode 100644 index 0000000000000000000000000000000000000000..b2f8c67fce6c0c005d1e5537abc3faddfd22f250 --- /dev/null +++ b/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch @@ -0,0 +1,225 @@ +From fe61cbaf2dc92b062c8d147b05c3ce213734c24a Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 02:20:23 +0100 +Subject: [PATCH] arm/virt,gicv3: Changes to pre-size GIC with possible vcpus + @machine init + +GIC needs to be pre-sized with possible vcpus at the initialization time. This +is necessary because Memory regions and resources associated with GICC/GICR +etc cannot be changed (add/del/modified) after VM has inited. Also, GIC_TYPER +needs to be initialized with mp_affinity and cpu interface number association. +This cannot be changed after GIC has initialized. + +Once all the cpu interfaces of the GIC has been inited it needs to be ensured +that any updates to the GICC during reset only takes place for the present +vcpus and not the disabled ones. Therefore, proper checks are required at +various places. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Jean-Philippe Brucker +[changed the comment in arm_gicv3_icc_reset] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 13 +++++++------ + hw/intc/arm_gicv3_common.c | 7 +++++-- + hw/intc/arm_gicv3_cpuif.c | 8 ++++++++ + hw/intc/arm_gicv3_kvm.c | 34 +++++++++++++++++++++++++++++++--- + include/hw/arm/virt.h | 2 +- + 5 files changed, 52 insertions(+), 12 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f10d75366b..08ba255317 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -802,6 +802,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + const char *gictype; + int i; + unsigned int smp_cpus = ms->smp.cpus; ++ unsigned int max_cpus = ms->smp.max_cpus; + uint32_t nb_redist_regions = 0; + int revision; + +@@ -826,7 +827,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } + vms->gic = qdev_new(gictype); + qdev_prop_set_uint32(vms->gic, "revision", revision); +- qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); ++ qdev_prop_set_uint32(vms->gic, "num-cpu", max_cpus); + /* Note that the num-irq property counts both internal and external + * interrupts; there are always 32 of the former (mandated by GIC spec). + */ +@@ -838,7 +839,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + if (vms->gic_version != VIRT_GIC_VERSION_2) { + QList *redist_region_count; + uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST); +- uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); ++ uint32_t redist0_count = MIN(max_cpus, redist0_capacity); + + nb_redist_regions = virt_gicv3_redist_region_count(vms); + +@@ -915,7 +916,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } else if (vms->virt) { + qemu_irq irq = qdev_get_gpio_in(vms->gic, + intidbase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); ++ sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq); + } + + qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +@@ -923,11 +924,11 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + + VIRTUAL_PMU_IRQ)); + + sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + 2 * max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + 3 * max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index 2ebf880ead..ebd99af610 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -392,10 +392,13 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu = g_new0(GICv3CPUState, s->num_cpu); + + for (i = 0; i < s->num_cpu; i++) { +- CPUState *cpu = qemu_get_cpu(i); ++ CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t cpu_affid; + +- s->cpu[i].cpu = cpu; ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[i].cpu = cpu; ++ } ++ + s->cpu[i].gic = s; + /* Store GICv3CPUState in CPUARMState gicv3state pointer */ + gicv3_set_gicv3state(cpu, &s->cpu[i]); +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index ab1a00508e..0d0eb2f62f 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -934,6 +934,10 @@ void gicv3_cpuif_update(GICv3CPUState *cs) + ARMCPU *cpu = ARM_CPU(cs->cpu); + CPUARMState *env = &cpu->env; + ++ if (!qemu_enabled_cpu(cs->cpu)) { ++ return; ++ } ++ + g_assert(qemu_mutex_iothread_locked()); + + trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq, +@@ -1826,6 +1830,10 @@ static void icc_generate_sgi(CPUARMState *env, GICv3CPUState *cs, + for (i = 0; i < s->num_cpu; i++) { + GICv3CPUState *ocs = &s->cpu[i]; + ++ if (!qemu_enabled_cpu(ocs->cpu)) { ++ continue; ++ } ++ + if (irm) { + /* IRM == 1 : route to all CPUs except self */ + if (cs == ocs) { +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index 77eb37e131..db06c75e2b 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -24,6 +24,7 @@ + #include "hw/intc/arm_gicv3_common.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/cpus.h" + #include "sysemu/kvm.h" + #include "sysemu/runstate.h" + #include "kvm_arm.h" +@@ -458,6 +459,18 @@ static void kvm_arm_gicv3_put(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* ++ * To support hotplug of vcpus we need to make sure all gic cpuif/GICC ++ * are initialized at machvirt init time. Once the init is done we ++ * release the ARMCPU object for disabled vcpus but this leg could hit ++ * during reset of GICC later as well i.e. after init has happened and ++ * all of the cases we want to make sure we dont acess the GICC for ++ * the disabled VCPUs. ++ */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); +@@ -616,6 +629,11 @@ static void kvm_arm_gicv3_get(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* don't access GICC for the disabled vCPUs. */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], false); +@@ -695,10 +713,19 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) + return; + } + ++ /* ++ * This shall be called even when vcpu is being hotplugged or onlined and ++ * other vcpus might be running. Host kernel KVM code to handle device ++ * access of IOCTLs KVM_{GET|SET}_DEVICE_ATTR might fail due to inability to ++ * grab vcpu locks for all the vcpus. Hence, we need to pause all vcpus to ++ * facilitate locking within host. ++ */ ++ pause_all_vcpus(); + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer), + &c->icc_ctlr_el1[GICV3_NS], false, &error_abort); ++ resume_all_vcpus(); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; + } +@@ -808,9 +835,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); + + for (i = 0; i < s->num_cpu; i++) { +- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ CPUState *cs = qemu_get_cpu(i); ++ if (qemu_enabled_cpu(cs)) { ++ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++ } + } + + /* Try to create the device via the device control API */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 49d1ec8656..a6977bade5 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -208,7 +208,7 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + + assert(vms->gic_version != VIRT_GIC_VERSION_2); + +- return (MACHINE(vms)->smp.cpus > redist0_capacity && ++ return (MACHINE(vms)->smp.max_cpus > redist0_capacity && + vms->highmem_redists) ? 2 : 1; + } + +-- +2.27.0 + diff --git a/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch b/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch new file mode 100644 index 0000000000000000000000000000000000000000..b752e1fd85490dc2292692b29bc34652b29d460b --- /dev/null +++ b/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch @@ -0,0 +1,221 @@ +From 2669fd26cbc36e24ebfc844c240b45ad831701cc Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 5 May 2020 18:44:59 +0100 +Subject: [PATCH] arm/virt,kvm: Pre-create disabled possible vCPUs @machine + init + +In ARMv8 architecture, GIC needs all the vCPUs to be created and present when +it is initialized. This is because: +1. GICC and MPIDR association must be fixed at the VM initialization time. + This is represented by register GIC_TYPER(mp_afffinity, proc_num) +2. GICC(cpu interfaces), GICR(redistributors) etc all must be initialized + at the boot time as well. +3. Memory regions associated with GICR etc. cannot be changed(add/del/mod) + after VM has inited. + +This patch adds the support to pre-create all such possible vCPUs within the +host using the KVM interface as part of the virt machine initialization. These +vCPUs could later be attached to QOM/ACPI while they are actually hot plugged +and made present. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Vishnu Pajjuri +[VP: Identified CPU stall issue & suggested probable fix] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 53 +++++++++++++++++++++++++++++++++++++++++-- + include/hw/core/cpu.h | 1 + + target/arm/cpu64.c | 1 + + target/arm/kvm.c | 32 ++++++++++++++++++++++++++ + target/arm/kvm64.c | 9 +++++++- + target/arm/kvm_arm.h | 11 +++++++++ + 6 files changed, 104 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 2f04bc7666..f10d75366b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2389,8 +2389,10 @@ static void machvirt_init(MachineState *machine) + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; ++ CPUState *cs; + + cpuobj = object_new(possible_cpus->cpus[n].type); ++ cs = CPU(cpuobj); + + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", +@@ -2402,8 +2404,55 @@ static void machvirt_init(MachineState *machine) + object_property_set_int(cpuobj, "thread-id", + virt_get_thread_id(machine, n), NULL); + +- qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); +- object_unref(cpuobj); ++ if (n < smp_cpus) { ++ qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); ++ object_unref(cpuobj); ++ } else { ++ CPUArchId *cpu_slot; ++ ++ /* handling for vcpus which are yet to be hot-plugged */ ++ cs->cpu_index = n; ++ cpu_slot = virt_find_cpu_slot(machine, cs->cpu_index); ++ ++ /* ++ * ARM host vCPU features need to be fixed at the boot time. But as ++ * per current approach this CPU object will be destroyed during ++ * cpu_post_init(). During hotplug of vCPUs these properties are ++ * initialized again. ++ */ ++ virt_cpu_set_properties(cpuobj, cpu_slot, &error_fatal); ++ ++ /* ++ * For KVM, we shall be pre-creating the now disabled/un-plugged ++ * possbile host vcpus and park them till the time they are ++ * actually hot plugged. This is required to pre-size the host ++ * GICC and GICR with the all possible vcpus for this VM. ++ */ ++ if (kvm_enabled()) { ++ kvm_arm_create_host_vcpu(ARM_CPU(cs)); ++ } ++ /* ++ * Add disabled vCPU to CPU slot during the init phase of the virt ++ * machine ++ * 1. We need this ARMCPU object during the GIC init. This object ++ * will facilitate in pre-realizing the GIC. Any info like ++ * mp-affinity(required to derive gicr_type) etc. could still be ++ * fetched while preserving QOM abstraction akin to realized ++ * vCPUs. ++ * 2. Now, after initialization of the virt machine is complete we ++ * could use two approaches to deal with this ARMCPU object: ++ * (i) re-use this ARMCPU object during hotplug of this vCPU. ++ * OR ++ * (ii) defer release this ARMCPU object after gic has been ++ * initialized or during pre-plug phase when a vCPU is ++ * hotplugged. ++ * ++ * We will use the (ii) approach and release the ARMCPU objects ++ * after GIC and machine has been fully initialized during ++ * machine_init_done() phase. ++ */ ++ cpu_slot->cpu = OBJECT(cs); ++ } + } + fdt_add_timer_nodes(vms); + fdt_add_cpu_nodes(vms); +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index c30636a936..fdfb952259 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -528,6 +528,7 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + int kvm_vcpu_stats_fd; ++ VMChangeStateEntry *vmcse; + + /* Use by accel-block: CPU is executing an ioctl() */ + QemuLockCnt in_ioctl_lock; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index e226b60b72..5d28838175 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -859,6 +859,7 @@ static void aarch64_cpu_initfn(Object *obj) + * enabled explicitly + */ + cs->disabled = true; ++ cs->thread_id = 0; + } + + static void aarch64_cpu_finalizefn(Object *obj) +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index f59f4f81b2..70cf15b550 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -659,6 +659,38 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + write_list_to_cpustate(cpu); + } + ++void kvm_arm_create_host_vcpu(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ unsigned long vcpu_id = cs->cpu_index; ++ int ret; ++ ++ ret = kvm_create_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to create host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * Initialize the vCPU in the host. This will reset the sys regs ++ * for this vCPU and related registers like MPIDR_EL1 etc. also ++ * gets programmed during this call to host. These are referred ++ * later while setting device attributes of the GICR during GICv3 ++ * reset ++ */ ++ ret = kvm_arch_init_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to initialize host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * park the created vCPU. shall be used during kvm_get_vcpu() when ++ * threads are created during realization of ARM vCPUs. ++ */ ++ kvm_park_vcpu(cs); ++} ++ + /* + * Update KVM's MP_STATE based on what QEMU thinks it is + */ +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 3c175c93a7..03ce1e7525 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -562,7 +562,14 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + +- qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ /* ++ * Install VM change handler only when vCPU thread has been spawned ++ * i.e. vCPU is being realized ++ */ ++ if (cs->thread_id) { ++ cs->vmcse = qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, ++ cs); ++ } + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 051a0da41c..31408499b3 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -163,6 +163,17 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu); + */ + void kvm_arm_reset_vcpu(ARMCPU *cpu); + ++/** ++ * kvm_arm_create_host_vcpu: ++ * @cpu: ARMCPU ++ * ++ * Called at to pre create all possible kvm vCPUs within the the host at the ++ * virt machine init time. This will also init this pre-created vCPU and ++ * hence result in vCPU reset at host. These pre created and inited vCPUs ++ * shall be parked for use when ARM vCPUs are actually realized. ++ */ ++void kvm_arm_create_host_vcpu(ARMCPU *cpu); ++ + /** + * kvm_arm_init_serror_injection: + * @cs: CPUState +-- +2.27.0 + diff --git a/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch b/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch new file mode 100644 index 0000000000000000000000000000000000000000..71f2ff037965e861699d5448b53f67381156c7bb --- /dev/null +++ b/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch @@ -0,0 +1,153 @@ +From c8e062285078e688e692214baf97b35246fc2552 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 5 May 2020 23:19:17 +0100 +Subject: [PATCH] arm/virt,target/arm: Add new ARMCPU + {socket,cluster,core,thread}-id property + +This shall be used to store user specified topology{socket,cluster,core,thread} +and shall be converted to a unique 'vcpu-id' which is used as slot-index during +hot(un)plug of vCPU. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ + target/arm/cpu.c | 4 +++ + target/arm/cpu.h | 4 +++ + 3 files changed, 71 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f4c3d47f30..94481d45d4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -227,6 +227,11 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static int virt_get_socket_id(const MachineState *ms, int cpu_index); ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index); ++static int virt_get_core_id(const MachineState *ms, int cpu_index); ++static int virt_get_thread_id(const MachineState *ms, int cpu_index); ++ + static bool cpu_type_valid(const char *cpu) + { + int i; +@@ -2264,6 +2269,14 @@ static void machvirt_init(MachineState *machine) + &error_fatal); + + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); ++ object_property_set_int(cpuobj, "socket-id", ++ virt_get_socket_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "cluster-id", ++ virt_get_cluster_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "core-id", ++ virt_get_core_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "thread-id", ++ virt_get_thread_id(machine, n), NULL); + + if (!vms->secure) { + object_property_set_bool(cpuobj, "has_el3", false, NULL); +@@ -2750,10 +2763,59 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + return socket_id % ms->numa_state->num_nodes; + } + ++static int virt_get_socket_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.socket_id; ++} ++ ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.cluster_id; ++} ++ ++static int virt_get_core_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.core_id; ++} ++ ++static int virt_get_thread_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.thread_id; ++} ++ ++static int ++virt_get_cpu_id_from_cpu_topo(const MachineState *ms, DeviceState *dev) ++{ ++ int cpu_id, sock_vcpu_num, clus_vcpu_num, core_vcpu_num; ++ ARMCPU *cpu = ARM_CPU(dev); ++ ++ /* calculate total logical cpus across socket/cluster/core */ ++ sock_vcpu_num = cpu->socket_id * (ms->smp.threads * ms->smp.cores * ++ ms->smp.clusters); ++ clus_vcpu_num = cpu->cluster_id * (ms->smp.threads * ms->smp.cores); ++ core_vcpu_num = cpu->core_id * ms->smp.threads; ++ ++ /* get vcpu-id(logical cpu index) for this vcpu from this topology */ ++ cpu_id = (sock_vcpu_num + clus_vcpu_num + core_vcpu_num) + cpu->thread_id; ++ ++ assert(cpu_id >= 0 && cpu_id < ms->possible_cpus->len); ++ ++ return cpu_id; ++} ++ + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + { + int n; + unsigned int max_cpus = ms->smp.max_cpus; ++ unsigned int smp_threads = ms->smp.threads; + VirtMachineState *vms = VIRT_MACHINE(ms); + MachineClass *mc = MACHINE_GET_CLASS(vms); + +@@ -2767,6 +2829,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].type = ms->cpu_type; ++ ms->possible_cpus->cpus[n].vcpus_count = smp_threads; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index efb22a87f9..cce315c18a 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2422,6 +2422,10 @@ static Property arm_cpu_properties[] = { + DEFINE_PROP_UINT64("mp-affinity", ARMCPU, + mp_affinity, ARM64_AFFINITY_INVALID), + DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), ++ DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, 0), ++ DEFINE_PROP_INT32("cluster-id", ARMCPU, cluster_id, 0), ++ DEFINE_PROP_INT32("core-id", ARMCPU, core_id, 0), ++ DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, 0), + DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), + DEFINE_PROP_END_OF_LIST() + }; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a0282e0d28..145d3dbf13 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1096,6 +1096,10 @@ struct ArchCPU { + QLIST_HEAD(, ARMELChangeHook) el_change_hooks; + + int32_t node_id; /* NUMA node this CPU belongs to */ ++ int32_t socket_id; ++ int32_t cluster_id; ++ int32_t core_id; ++ int32_t thread_id; + + /* Used to synchronize KVM and QEMU in-kernel device levels */ + uint8_t device_irq_level; +-- +2.27.0 + diff --git a/arm-virt-target-arm-Machine-init-time-change-common-.patch b/arm-virt-target-arm-Machine-init-time-change-common-.patch new file mode 100644 index 0000000000000000000000000000000000000000..d8199f7ce05614c00088072be19912fda08e3c13 --- /dev/null +++ b/arm-virt-target-arm-Machine-init-time-change-common-.patch @@ -0,0 +1,328 @@ +From 7cd2d7ef7bb7f6c6a97988d86b97922ff700ab06 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 00:13:31 +0100 +Subject: [PATCH] arm/virt,target/arm: Machine init time change common to vCPU + {cold|hot}-plug + +Refactor and introduce the common logic required during the initialization of +both cold and hot plugged vCPUs. Also initialize the *disabled* state of the +vCPUs which shall be used further during init phases of various other components +like GIC, PMU, ACPI etc as part of the virt machine initialization. + +KVM vCPUs corresponding to unplugged/yet-to-be-plugged QOM CPUs are kept in +powered-off state in the KVM Host and do not run the guest code. Plugged vCPUs +are also kept in powered-off state but vCPU threads exist and is kept sleeping. + +TBD: +For the cold booted vCPUs, this change also exists in the arm_load_kernel() +in boot.c but for the hotplugged CPUs this change should still remain part of +the pre-plug phase. We are duplicating the powering-off of the cold booted CPUs. +Shall we remove the duplicate change from boot.c? + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Gavin Shan +[GS: pointed the assertion due to wrong range check] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 149 ++++++++++++++++++++++++++++++++++++++++----- + target/arm/cpu.c | 7 +++ + target/arm/cpu64.c | 14 +++++ + 3 files changed, 156 insertions(+), 14 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8f647422d8..2f04bc7666 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -227,6 +227,7 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid); + static int virt_get_socket_id(const MachineState *ms, int cpu_index); + static int virt_get_cluster_id(const MachineState *ms, int cpu_index); + static int virt_get_core_id(const MachineState *ms, int cpu_index); +@@ -2249,6 +2250,14 @@ static void machvirt_init(MachineState *machine) + exit(1); + } + ++ finalize_gic_version(vms); ++ if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (vms->gic_version < VIRT_GIC_VERSION_3)) { ++ machine->smp.max_cpus = smp_cpus; ++ mc->has_hotpluggable_cpus = false; ++ warn_report("cpu hotplug feature has been disabled"); ++ } ++ + possible_cpus = mc->possible_cpu_arch_ids(machine); + + /* +@@ -2275,11 +2284,6 @@ static void machvirt_init(MachineState *machine) + virt_set_memmap(vms, pa_bits); + } + +- /* We can probe only here because during property set +- * KVM is not available yet +- */ +- finalize_gic_version(vms); +- + sysmem = vms->sysmem = get_system_memory(); + + if (vms->secure) { +@@ -2385,17 +2389,9 @@ static void machvirt_init(MachineState *machine) + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; +- CPUState *cs; +- +- if (n >= smp_cpus) { +- break; +- } + + cpuobj = object_new(possible_cpus->cpus[n].type); + +- cs = CPU(cpuobj); +- cs->cpu_index = n; +- + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", + virt_get_socket_id(machine, n), NULL); +@@ -2902,6 +2898,50 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ CPUArchId *found_cpu; ++ uint64_t mp_affinity; ++ ++ assert(vcpuid >= 0 && vcpuid < ms->possible_cpus->len); ++ ++ /* ++ * RFC: Question: ++ * TBD: Should mp-affinity be treated as MPIDR? ++ */ ++ mp_affinity = virt_cpu_mp_affinity(vms, vcpuid); ++ found_cpu = &ms->possible_cpus->cpus[vcpuid]; ++ ++ assert(found_cpu->arch_id == mp_affinity); ++ ++ /* ++ * RFC: Question: ++ * Slot-id is the index where vCPU with certain arch-id(=mpidr/ap-affinity) ++ * is plugged. For Host KVM, MPIDR for vCPU is derived using vcpu-id. ++ * As I understand, MPIDR and vcpu-id are property of vCPU but slot-id is ++ * more related to machine? Current code assumes slot-id and vcpu-id are ++ * same i.e. meaning of slot is bit vague. ++ * ++ * Q1: Is there any requirement to clearly represent slot and dissociate it ++ * from vcpu-id? ++ * Q2: Should we make MPIDR within host KVM user configurable? ++ * ++ * +----+----+----+----+----+----+----+----+ ++ * MPIDR ||| Res | Aff2 | Aff1 | Aff0 | ++ * +----+----+----+----+----+----+----+----+ ++ * \ \ \ | | ++ * \ 8bit \ 8bit \ |4bit| ++ * \<------->\<------->\ |<-->| ++ * \ \ \| | ++ * +----+----+----+----+----+----+----+----+ ++ * VCPU-ID | Byte4 | Byte2 | Byte1 | Byte0 | ++ * +----+----+----+----+----+----+----+----+ ++ */ ++ ++ return found_cpu; ++} ++ + static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { +@@ -2945,6 +2985,81 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, + dev, &error_abort); + } + ++static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ int32_t min_cpuid = 0; ++ int32_t max_cpuid; ++ ++ /* sanity check the cpu */ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) { ++ error_setg(errp, "Invalid thread-id %u specified, correct range 0:%u", ++ cpu->thread_id, ms->smp.threads - 1); ++ return; ++ } ++ ++ max_cpuid = ms->possible_cpus->len - 1; ++ if (!dev->hotplugged) { ++ min_cpuid = vms->acpi_dev ? ms->smp.cpus : 0; ++ max_cpuid = vms->acpi_dev ? max_cpuid : ms->smp.cpus - 1; ++ } ++ ++ if ((cpu->core_id < min_cpuid) || (cpu->core_id > max_cpuid)) { ++ error_setg(errp, "Invalid core-id %d specified, correct range %d:%d", ++ cpu->core_id, min_cpuid, max_cpuid); ++ return; ++ } ++ ++ if ((cpu->cluster_id < 0) || (cpu->cluster_id >= ms->smp.clusters)) { ++ error_setg(errp, "Invalid cluster-id %u specified, correct range 0:%u", ++ cpu->cluster_id, ms->smp.clusters - 1); ++ return; ++ } ++ ++ if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) { ++ error_setg(errp, "Invalid socket-id %u specified, correct range 0:%u", ++ cpu->socket_id, ms->smp.sockets - 1); ++ return; ++ } ++ ++ cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev); ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ if (qemu_present_cpu(CPU(cpu_slot->cpu))) { ++ error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist", ++ cs->cpu_index, cpu->socket_id, cpu->cluster_id, cpu->core_id, ++ cpu->thread_id, cpu_slot->arch_id); ++ return; ++ } ++ virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); ++} ++ ++static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ MachineState *ms = MACHINE(hotplug_dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ ++ /* insert the cold/hot-plugged vcpu in the slot */ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ cpu_slot->cpu = OBJECT(dev); ++ ++ cs->disabled = false; ++ return; ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -2987,6 +3102,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + qlist_append_str(reserved_regions, resv_prop_str); + qdev_prop_set_array(dev, "reserved-regions", reserved_regions); + g_free(resv_prop_str); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_pre_plug(hotplug_dev, dev, errp); + } + } + +@@ -3008,6 +3125,8 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + virt_memory_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_plug(hotplug_dev, dev, errp); + } + + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { +@@ -3092,7 +3211,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + if (device_is_dynamic_sysbus(mc, dev) || + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || +- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { ++ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || ++ object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +@@ -3169,6 +3289,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + #endif + mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; + mc->kvm_type = virt_kvm_type; ++ mc->has_hotpluggable_cpus = true; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = virt_machine_get_hotplug_handler; + hc->pre_plug = virt_machine_device_pre_plug_cb; +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index cce315c18a..18b8a79c8f 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2477,6 +2477,12 @@ static const struct TCGCPUOps arm_tcg_ops = { + }; + #endif /* CONFIG_TCG */ + ++static int64_t arm_cpu_get_arch_id(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ return cpu->mp_affinity; ++} ++ + static void arm_cpu_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); +@@ -2495,6 +2501,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + cc->class_by_name = arm_cpu_class_by_name; + cc->has_work = arm_cpu_has_work; + cc->dump_state = arm_cpu_dump_state; ++ cc->get_arch_id = arm_cpu_get_arch_id; + cc->set_pc = arm_cpu_set_pc; + cc->get_pc = arm_cpu_get_pc; + cc->gdb_read_register = arm_cpu_gdb_read_register; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 471014b5a9..e226b60b72 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -850,6 +850,17 @@ static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) + } + } + ++static void aarch64_cpu_initfn(Object *obj) ++{ ++ CPUState *cs = CPU(obj); ++ ++ /* ++ * we start every ARM64 vcpu as disabled possible vCPU. It needs to be ++ * enabled explicitly ++ */ ++ cs->disabled = true; ++} ++ + static void aarch64_cpu_finalizefn(Object *obj) + { + } +@@ -862,7 +873,9 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs) + static void aarch64_cpu_class_init(ObjectClass *oc, void *data) + { + CPUClass *cc = CPU_CLASS(oc); ++ DeviceClass *dc = DEVICE_CLASS(oc); + ++ dc->user_creatable = true; + cc->gdb_read_register = aarch64_cpu_gdb_read_register; + cc->gdb_write_register = aarch64_cpu_gdb_write_register; + cc->gdb_num_core_regs = 34; +@@ -908,6 +921,7 @@ void aarch64_cpu_register(const ARMCPUInfo *info) + static const TypeInfo aarch64_cpu_type_info = { + .name = TYPE_AARCH64_CPU, + .parent = TYPE_ARM_CPU, ++ .instance_init = aarch64_cpu_initfn, + .instance_finalize = aarch64_cpu_finalizefn, + .abstract = true, + .class_init = aarch64_cpu_class_init, +-- +2.27.0 + diff --git a/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch b/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a2b9ced7bc1c37c4c19bd08d7662f3b63342ee2 --- /dev/null +++ b/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch @@ -0,0 +1,29 @@ +From 25438f2cdb13d07c1bd228fcf4223c21da368548 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:15:31 +0800 +Subject: [PATCH] arm/virt.c: Convey local_err when set psci-conduit + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ed437ce0e8..934b0412ef 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2323,7 +2323,10 @@ static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, + */ + if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { + object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit, +- NULL); ++ &local_err); ++ if (local_err) { ++ goto out; ++ } + + /* Secondary CPUs start in PSCI powered-down state */ + if (CPU(cpuobj)->cpu_index > 0) { +-- +2.27.0 + diff --git a/hw-arm-expose-host-CPU-frequency-info-to-guest.patch b/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch similarity index 77% rename from hw-arm-expose-host-CPU-frequency-info-to-guest.patch rename to arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch index f0093812ed61e769afec350993e0298a4c5f9e10..052ac56a4167e507ce6866b5e5c19ea33fdc75c8 100644 --- a/hw-arm-expose-host-CPU-frequency-info-to-guest.patch +++ b/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch @@ -1,6 +1,6 @@ -From b70d020dba72283d7b16a77c377512c84aab5f81 Mon Sep 17 00:00:00 2001 +From ebe05c34a66969e4cacc4d6c030dfe93ace89cb2 Mon Sep 17 00:00:00 2001 From: Ying Fang -Date: Mon, 20 Apr 2020 10:38:12 +0800 +Date: Tue, 19 Mar 2024 14:35:55 +0800 Subject: [PATCH] arm64: Add the cpufreq device to show cpufreq info to guest On ARM64 platform, cpu frequency is retrieved via ACPI CPPC. @@ -22,46 +22,36 @@ This series is backported from: https://patchwork.kernel.org/cover/11379943/ Signed-off-by: Ying Fang +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang --- - default-configs/aarch64-softmmu.mak | 1 + - hw/acpi/Makefile.objs | 1 + - hw/acpi/aml-build.c | 22 +++ - hw/acpi/cpufreq.c | 287 ++++++++++++++++++++++++++++ - hw/arm/virt-acpi-build.c | 78 +++++++- - hw/arm/virt.c | 13 ++ - hw/char/Kconfig | 4 + - include/hw/acpi/acpi-defs.h | 38 ++++ - include/hw/acpi/aml-build.h | 3 + - include/hw/arm/virt.h | 1 + - 10 files changed, 446 insertions(+), 2 deletions(-) + configs/devices/aarch64-softmmu/default.mak | 1 + + hw/acpi/aml-build.c | 22 ++ + hw/acpi/cpufreq.c | 283 ++++++++++++++++++++ + hw/acpi/meson.build | 1 + + hw/arm/virt-acpi-build.c | 79 +++++- + hw/arm/virt.c | 13 + + hw/char/Kconfig | 4 + + include/hw/acpi/acpi-defs.h | 40 +++ + include/hw/acpi/aml-build.h | 3 + + include/hw/arm/virt.h | 1 + + 10 files changed, 444 insertions(+), 3 deletions(-) create mode 100644 hw/acpi/cpufreq.c -diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e08..0a030e85 100644 ---- a/default-configs/aarch64-softmmu.mak -+++ b/default-configs/aarch64-softmmu.mak -@@ -6,3 +6,4 @@ include arm-softmmu.mak - CONFIG_XLNX_ZYNQMP_ARM=y - CONFIG_XLNX_VERSAL=y - CONFIG_SBSA_REF=y -+CONFIG_CPUFREQ=y -diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs -index 9bb2101e..1a720c38 100644 ---- a/hw/acpi/Makefile.objs -+++ b/hw/acpi/Makefile.objs -@@ -13,6 +13,7 @@ common-obj-y += bios-linker-loader.o - common-obj-y += aml-build.o utils.o - common-obj-$(CONFIG_ACPI_PCI) += pci.o - common-obj-$(CONFIG_TPM) += tpm.o -+common-obj-$(CONFIG_CPUFREQ) += cpufreq.o - - common-obj-$(CONFIG_IPMI) += ipmi.o - common-obj-$(call lnot,$(CONFIG_IPMI)) += ipmi-stub.o +diff --git a/configs/devices/aarch64-softmmu/default.mak b/configs/devices/aarch64-softmmu/default.mak +index f82a04c27d..8d66d0f1af 100644 +--- a/configs/devices/aarch64-softmmu/default.mak ++++ b/configs/devices/aarch64-softmmu/default.mak +@@ -8,3 +8,4 @@ include ../arm-softmmu/default.mak + # CONFIG_XLNX_ZYNQMP_ARM=n + # CONFIG_XLNX_VERSAL=n + # CONFIG_SBSA_REF=n ++# CONFIG_CPUFREQ=n diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 555c24f2..73f97751 100644 +index 2968df5562..714498165a 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c -@@ -1369,6 +1369,28 @@ Aml *aml_sleep(uint64_t msec) +@@ -1554,6 +1554,28 @@ Aml *aml_sleep(uint64_t msec) return var; } @@ -92,10 +82,10 @@ index 555c24f2..73f97751 100644 int hi, lo; diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c new file mode 100644 -index 00000000..d02a25a6 +index 0000000000..a84db490b3 --- /dev/null +++ b/hw/acpi/cpufreq.c -@@ -0,0 +1,287 @@ +@@ -0,0 +1,283 @@ +/* + * ACPI CPPC register device + * @@ -142,7 +132,6 @@ index 00000000..d02a25a6 + */ +#define DEFAULT_HZ 2400 + -+ +int cppc_regs_offset[CPPC_REG_COUNT] = { + [HIGHEST_PERF] = 0, + [NOMINAL_PERF] = 4, @@ -186,8 +175,7 @@ index 00000000..d02a25a6 +} CpuhzState; + + -+static uint64_t cpufreq_read(void *opaque, hwaddr offset, -+ unsigned size) ++static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size) +{ + CpuhzState *s = (CpuhzState *)opaque; + uint64_t r; @@ -231,7 +219,7 @@ index 00000000..d02a25a6 + break; + /* + * Guest may still access the register by 32bit; add the process to -+ * eliminate unnecessary warnings ++ * eliminate unnecessary warnings. + */ + case 28: + r = s->ReferencePerformanceCounter >> 32; @@ -260,7 +248,6 @@ index 00000000..d02a25a6 + return r; +} + -+ +static void cpufreq_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ @@ -292,7 +279,7 @@ index 00000000..d02a25a6 + const char *endptr = NULL; + int ret; + -+ fd = qemu_open(hostpath, O_RDONLY); ++ fd = qemu_open_old(hostpath, O_RDONLY); + if (fd < 0) { + return 0; + } @@ -382,21 +369,27 @@ index 00000000..d02a25a6 +} + +type_init(cpufreq_register_types) -+ +diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build +index fc1b952379..d36b10ea3c 100644 +--- a/hw/acpi/meson.build ++++ b/hw/acpi/meson.build +@@ -27,6 +27,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_ICH9', if_true: files('ich9.c', 'ich9_tco.c')) + acpi_ss.add(when: 'CONFIG_ACPI_ERST', if_true: files('erst.c')) + acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) + acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) ++acpi_ss.add(when: 'CONFIG_CPUFREQ', if_true: files('cpufreq.c')) + if have_tpm + acpi_ss.add(files('tpm.c')) + endif diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index 0afb3727..29494ebd 100644 +index 8bc35a483c..3cb50bdc65 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c -@@ -45,11 +45,73 @@ - #include "hw/arm/virt.h" - #include "sysemu/numa.h" - #include "kvm_arm.h" -+#include "hw/acpi/acpi-defs.h" +@@ -63,7 +63,68 @@ - #define ARM_SPI_BASE 32 - #define ACPI_POWER_BUTTON_DEVICE "PWRB" + #define ACPI_BUILD_TABLE_SIZE 0x20000 --static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus) +-static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) +static void acpi_dsdt_add_psd(Aml *dev, int cpus) +{ + Aml *pkg; @@ -457,17 +450,18 @@ index 0afb3727..29494ebd 100644 + aml_append(dev, aml_name_decl("_CPC", cpc)); +} + -+static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, -+ const MemMapEntry *cppc_memmap) ++static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, ++ const MemMapEntry *cppc_memmap) { + MachineState *ms = MACHINE(vms); uint16_t i; - -@@ -57,6 +119,18 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus) +@@ -72,7 +133,19 @@ static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) Aml *dev = aml_device("C%.03X", i); aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); aml_append(dev, aml_name_decl("_UID", aml_int(i))); +- aml_append(scope, dev); + -+ /* ++ /* + * Append _CPC and _PSD to support CPU frequence show + * Check CPPC available by DESIRED_PERF register + */ @@ -475,78 +469,79 @@ index 0afb3727..29494ebd 100644 + acpi_dsdt_add_cppc(dev, + cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, + cppc_regs_offset); -+ acpi_dsdt_add_psd(dev, smp_cpus); ++ acpi_dsdt_add_psd(dev, ms->smp.cpus); + } + - aml_append(scope, dev); ++ aml_append(scope, dev); } } -@@ -718,7 +792,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + +@@ -858,7 +931,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) * the RTC ACPI device at all when using UEFI. */ scope = aml_scope("\\_SB"); -- acpi_dsdt_add_cpus(scope, vms->smp_cpus); -+ acpi_dsdt_add_cpus(scope, vms->smp_cpus, &memmap[VIRT_CPUFREQ]); +- acpi_dsdt_add_cpus(scope, vms); ++ acpi_dsdt_add_cpus(scope, vms, &memmap[VIRT_CPUFREQ]); acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], (irqmap[VIRT_UART] + ARM_SPI_BASE)); - acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); + if (vmc->acpi_expose_flash) { diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d9496c93..0fa355ba 100644 +index b82bd1b8c8..c19cacec8b 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -135,6 +135,7 @@ static const MemMapEntry base_memmap[] = { - [VIRT_SECURE_UART] = { 0x09040000, 0x00001000 }, - [VIRT_SMMU] = { 0x09050000, 0x00020000 }, +@@ -157,6 +157,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, -+ [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, ++ [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 }, -@@ -731,6 +732,16 @@ static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, +@@ -980,6 +981,16 @@ static void create_uart(const VirtMachineState *vms, int uart, g_free(nodename); } +static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem) +{ + hwaddr base = vms->memmap[VIRT_CPUFREQ].base; -+ DeviceState *dev = qdev_create(NULL, "cpufreq"); ++ DeviceState *dev = qdev_new("cpufreq"); + SysBusDevice *s = SYS_BUS_DEVICE(dev); + -+ qdev_init_nofail(dev); ++ sysbus_realize_and_unref(s, &error_fatal); + memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0)); +} + - static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) + static void create_rtc(const VirtMachineState *vms) { char *nodename; -@@ -1682,6 +1693,8 @@ static void machvirt_init(MachineState *machine) +@@ -2346,6 +2357,8 @@ static void machvirt_init(MachineState *machine) - create_uart(vms, pic, VIRT_UART, sysmem, serial_hd(0)); + create_uart(vms, VIRT_UART, sysmem, serial_hd(0)); + create_cpufreq(vms, sysmem); + if (vms->secure) { - create_secure_ram(vms, secure_sysmem); - create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); + create_secure_ram(vms, secure_sysmem, secure_tag_sysmem); + create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); diff --git a/hw/char/Kconfig b/hw/char/Kconfig -index 40e7a8b8..2f61bf53 100644 +index 6b6cf2fc1d..335a60c2c1 100644 --- a/hw/char/Kconfig +++ b/hw/char/Kconfig -@@ -46,3 +46,7 @@ config SCLPCONSOLE +@@ -71,3 +71,7 @@ config GOLDFISH_TTY - config TERMINAL3270 + config SHAKTI_UART bool + +config CPUFREQ -+ bool -+ default y ++ bool ++ default y diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h -index 57a3f58b..39ae91d3 100644 +index 2b42e4192b..b1f389fb4b 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h -@@ -634,4 +634,42 @@ struct AcpiIortRC { - } QEMU_PACKED; - typedef struct AcpiIortRC AcpiIortRC; +@@ -93,4 +93,44 @@ typedef struct AcpiFadtData { + #define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) + #define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) +/* + * CPPC register definition from kernel header @@ -582,30 +577,32 @@ index 57a3f58b..39ae91d3 100644 + +/* + * Offset for each CPPC register; -1 for unavailable ++ * ++ * Offset for each CPPC register; -1 for unavailable + * The whole register space is unavailable if desired perf offset is -1. + */ +extern int cppc_regs_offset[CPPC_REG_COUNT]; + #endif diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h -index 1a563ad7..375335ab 100644 +index 84ded2ecd3..200cb113de 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h -@@ -347,6 +347,9 @@ Aml *aml_qword_memory(AmlDecode dec, AmlMinFixed min_fixed, - Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz, +@@ -429,6 +429,9 @@ Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz, uint8_t channel); Aml *aml_sleep(uint64_t msec); + Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source); +Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width, + uint8_t reg_offset, AmlAccessType type, + uint64_t addr); /* Block AML object primitives */ - Aml *aml_scope(const char *name_format, ...) GCC_FMT_ATTR(1, 2); + Aml *aml_scope(const char *name_format, ...) G_GNUC_PRINTF(1, 2); diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index a7209420..43a6ce91 100644 +index f69239850e..e944d434c4 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -66,6 +66,7 @@ enum { +@@ -60,6 +60,7 @@ enum { VIRT_GIC_REDIST, VIRT_SMMU, VIRT_UART, @@ -614,5 +611,5 @@ index a7209420..43a6ce91 100644 VIRT_RTC, VIRT_FW_CFG, -- -2.23.0 +2.27.0 diff --git a/aspeed-smc-Fix-possible-integer-overflow.patch b/aspeed-smc-Fix-possible-integer-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..dbe327d448f2868220ba6fc0e29f45a25ca28f66 --- /dev/null +++ b/aspeed-smc-Fix-possible-integer-overflow.patch @@ -0,0 +1,45 @@ +From 041c319f2f91c85aeb4ed0cefa6afa76773fe960 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 25 Jul 2024 09:57:01 +0800 +Subject: [PATCH] aspeed/smc: Fix possible integer overflow +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 13951ccfcdf0f31902a93859506ccf8c0ef66583 + +Coverity reports a possible integer overflow because routine +aspeeed_smc_hclk_divisor() has a codepath returning 0, which could +lead to an integer overflow when computing variable 'hclk_shift' in +the caller aspeed_smc_dma_calibration(). + +The value passed to aspeed_smc_hclk_divisor() is always between 0 and +15 and, in this case, there is always a matching hclk divisor. Remove +the return 0 and use g_assert_not_reached() instead. + +Fixes: Coverity CID 1547822 +Suggested-by: Peter Maydell +Signed-off-by: Cédric Le Goater +Reviewed-by: Peter Maydell +Signed-off-by: qihao_yewu +--- + hw/ssi/aspeed_smc.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c +index 2a4001b774..8af919a970 100644 +--- a/hw/ssi/aspeed_smc.c ++++ b/hw/ssi/aspeed_smc.c +@@ -764,8 +764,7 @@ static uint8_t aspeed_smc_hclk_divisor(uint8_t hclk_mask) + } + } + +- aspeed_smc_error("invalid HCLK mask %x", hclk_mask); +- return 0; ++ g_assert_not_reached(); + } + + /* +-- +2.41.0.windows.1 + diff --git a/async-use-explicit-memory-barriers.patch b/async-use-explicit-memory-barriers.patch deleted file mode 100644 index 7fb68c949ad4e95cc0f908c44042096b69cf9295..0000000000000000000000000000000000000000 --- a/async-use-explicit-memory-barriers.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 787af8ed2bc86dc8688727d62a251965d9c42e00 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Fri, 10 Apr 2020 16:19:50 +0000 -Subject: [PATCH 2/2] async: use explicit memory barriers - -When using C11 atomics, non-seqcst reads and writes do not participate -in the total order of seqcst operations. In util/async.c and util/aio-posix.c, -in particular, the pattern that we use - - write ctx->notify_me write bh->scheduled - read bh->scheduled read ctx->notify_me - if !bh->scheduled, sleep if ctx->notify_me, notify - -needs to use seqcst operations for both the write and the read. In -general this is something that we do not want, because there can be -many sources that are polled in addition to bottom halves. The -alternative is to place a seqcst memory barrier between the write -and the read. This also comes with a disadvantage, in that the -memory barrier is implicit on strongly-ordered architectures and -it wastes a few dozen clock cycles. - -Fortunately, ctx->notify_me is never written concurrently by two -threads, so we can assert that and relax the writes to ctx->notify_me. -The resulting solution works and performs well on both aarch64 and x86. - -Note that the atomic_set/atomic_read combination is not an atomic -read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED; -on x86, ATOMIC_RELAXED compiles to a locked operation. - -upstream_url: https://patchwork.kernel.org/patch/11482103/ - -Analyzed-by: Ying Fang -Signed-off-by: Paolo Bonzini -Tested-by: Ying Fang -Message-Id: <20200407140746.8041-6-pbonzini@redhat.com> -Signed-off-by: Stefan Hajnoczi ---- - util/aio-posix.c | 16 ++++++++++++++-- - util/aio-win32.c | 17 ++++++++++++++--- - util/async.c | 16 ++++++++++++---- - 3 files changed, 40 insertions(+), 9 deletions(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index 6fbfa792..ca58b9a4 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -613,6 +613,11 @@ bool aio_poll(AioContext *ctx, bool blocking) - int64_t timeout; - int64_t start = 0; - -+ /* -+ * There cannot be two concurrent aio_poll calls for the same AioContext (or -+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). -+ * We rely on this below to avoid slow locked accesses to ctx->notify_me. -+ */ - assert(in_aio_context_home_thread(ctx)); - - /* aio_notify can avoid the expensive event_notifier_set if -@@ -623,7 +628,13 @@ bool aio_poll(AioContext *ctx, bool blocking) - * so disable the optimization now. - */ - if (blocking) { -- atomic_add(&ctx->notify_me, 2); -+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); -+ /* -+ * Write ctx->notify_me before computing the timeout -+ * (reading bottom half flags, etc.). Pairs with -+ * smp_mb in aio_notify(). -+ */ -+ smp_mb(); - } - - qemu_lockcnt_inc(&ctx->list_lock); -@@ -668,7 +679,8 @@ bool aio_poll(AioContext *ctx, bool blocking) - } - - if (blocking) { -- atomic_sub(&ctx->notify_me, 2); -+ /* Finish the poll before clearing the flag. */ -+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); - aio_notify_accept(ctx); - } - -diff --git a/util/aio-win32.c b/util/aio-win32.c -index a23b9c36..729d533f 100644 ---- a/util/aio-win32.c -+++ b/util/aio-win32.c -@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking) - int count; - int timeout; - -+ /* -+ * There cannot be two concurrent aio_poll calls for the same AioContext (or -+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). -+ * We rely on this below to avoid slow locked accesses to ctx->notify_me. -+ */ -+ assert(in_aio_context_home_thread(ctx)); - progress = false; - - /* aio_notify can avoid the expensive event_notifier_set if -@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking) - * so disable the optimization now. - */ - if (blocking) { -- atomic_add(&ctx->notify_me, 2); -+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); -+ /* -+ * Write ctx->notify_me before computing the timeout -+ * (reading bottom half flags, etc.). Pairs with -+ * smp_mb in aio_notify(). -+ */ -+ smp_mb(); - } - - qemu_lockcnt_inc(&ctx->list_lock); -@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking) - ret = WaitForMultipleObjects(count, events, FALSE, timeout); - if (blocking) { - assert(first); -- assert(in_aio_context_home_thread(ctx)); -- atomic_sub(&ctx->notify_me, 2); -+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); - aio_notify_accept(ctx); - } - -diff --git a/util/async.c b/util/async.c -index afc17fb3..12b33204 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -221,7 +221,14 @@ aio_ctx_prepare(GSource *source, gint *timeout) - { - AioContext *ctx = (AioContext *) source; - -- atomic_or(&ctx->notify_me, 1); -+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1); -+ -+ /* -+ * Write ctx->notify_me before computing the timeout -+ * (reading bottom half flags, etc.). Pairs with -+ * smp_mb in aio_notify(). -+ */ -+ smp_mb(); - - /* We assume there is no timeout already supplied */ - *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); -@@ -239,7 +246,8 @@ aio_ctx_check(GSource *source) - AioContext *ctx = (AioContext *) source; - QEMUBH *bh; - -- atomic_and(&ctx->notify_me, ~1); -+ /* Finish computing the timeout before clearing the flag. */ -+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1); - aio_notify_accept(ctx); - - for (bh = ctx->first_bh; bh; bh = bh->next) { -@@ -344,10 +352,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx) - void aio_notify(AioContext *ctx) - { - /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs -- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. -+ * with smp_mb in aio_ctx_prepare or aio_poll. - */ - smp_mb(); -- if (ctx->notify_me) { -+ if (atomic_read(&ctx->notify_me)) { - event_notifier_set(&ctx->notifier); - atomic_mb_set(&ctx->notified, true); - } --- -2.25.2 - diff --git a/ati-check-x-y-display-parameter-values.patch b/ati-check-x-y-display-parameter-values.patch deleted file mode 100644 index 22a38b28067a612f181c4fc0a2cf1b9f6d049493..0000000000000000000000000000000000000000 --- a/ati-check-x-y-display-parameter-values.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 9557ba506470517668ffecb4d5ef4804eca4fd88 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Wed, 18 Nov 2020 10:22:32 +0800 -Subject: [PATCH] ati: check x y display parameter values - -fix CVE-2020-24352 - -The source and destination x,y display parameters in ati_2d_blt() -may run off the vga limits if either of s->regs.[src|dst]_[xy] is -zero. Check the parameter values to avoid potential crash. - -Reported-by: Gaoning Pan -Signed-off-by: Prasad J Pandit -Message-id: 20201021103818.1704030-1-ppandit@redhat.com -Signed-off-by: Gerd Hoffmann - -cherry-pick from commit ca1f9cbfdce4d63b10d57de80fef89a89d92a540 -Signed-off-by: Jiajie Li ---- - hw/display/ati_2d.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c -index 23a8ae0cd8..4dc10ea795 100644 ---- a/hw/display/ati_2d.c -+++ b/hw/display/ati_2d.c -@@ -75,8 +75,9 @@ void ati_2d_blt(ATIVGAState *s) - dst_stride *= bpp; - } - uint8_t *end = s->vga.vram_ptr + s->vga.vram_size; -- if (dst_bits >= end || dst_bits + dst_x + (dst_y + s->regs.dst_height) * -- dst_stride >= end) { -+ if (dst_x > 0x3fff || dst_y > 0x3fff || dst_bits >= end -+ || dst_bits + dst_x -+ + (dst_y + s->regs.dst_height) * dst_stride >= end) { - qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); - return; - } -@@ -107,8 +108,9 @@ void ati_2d_blt(ATIVGAState *s) - src_bits += s->regs.crtc_offset & 0x07ffffff; - src_stride *= bpp; - } -- if (src_bits >= end || src_bits + src_x + -- (src_y + s->regs.dst_height) * src_stride >= end) { -+ if (src_x > 0x3fff || src_y > 0x3fff || src_bits >= end -+ || src_bits + src_x -+ + (src_y + s->regs.dst_height) * src_stride >= end) { - qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); - return; - } --- -2.27.0 - diff --git a/ati-use-vga_read_byte-in-ati_cursor_define.patch b/ati-use-vga_read_byte-in-ati_cursor_define.patch deleted file mode 100644 index a4b7e806600c341d865ca6acaf75ed4ce6b9c9b8..0000000000000000000000000000000000000000 --- a/ati-use-vga_read_byte-in-ati_cursor_define.patch +++ /dev/null @@ -1,198 +0,0 @@ -From 1ebe0e71d04bfdc76777a3a672e873f006d207e2 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Fri, 5 Feb 2021 10:38:24 +0800 -Subject: [PATCH] ati: use vga_read_byte in ati_cursor_define -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -fix CVE-2019-20808 - -This makes sure reads are confined to vga video memory. - -v3: use uint32_t, fix cut+paste bug. -v2: fix ati_cursor_draw_line too. - -Reported-by: xu hang -Signed-off-by: Gerd Hoffmann -Reviewed-by: BALATON Zoltan -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20190917111441.27405-3-kraxel@redhat.com - -cherry-pick from aab0e2a661b2b6bf7915c0aefe807fb60d6d9d13 -Signed-off-by: Jiajie Li ---- - hw/display/ati.c | 21 ++++++++--------- - hw/display/vga-access.h | 49 ++++++++++++++++++++++++++++++++++++++++ - hw/display/vga-helpers.h | 27 +--------------------- - 3 files changed, 60 insertions(+), 37 deletions(-) - create mode 100644 hw/display/vga-access.h - -diff --git a/hw/display/ati.c b/hw/display/ati.c -index 5943040416..b17569874e 100644 ---- a/hw/display/ati.c -+++ b/hw/display/ati.c -@@ -19,6 +19,7 @@ - #include "qemu/osdep.h" - #include "ati_int.h" - #include "ati_regs.h" -+#include "vga-access.h" - #include "vga_regs.h" - #include "qemu/log.h" - #include "qemu/module.h" -@@ -125,20 +126,19 @@ static void ati_vga_switch_mode(ATIVGAState *s) - static void ati_cursor_define(ATIVGAState *s) - { - uint8_t data[1024]; -- uint8_t *src; -+ uint32_t srcoff; - int i, j, idx = 0; - - if ((s->regs.cur_offset & BIT(31)) || s->cursor_guest_mode) { - return; /* Do not update cursor if locked or rendered by guest */ - } - /* FIXME handle cur_hv_offs correctly */ -- src = s->vga.vram_ptr + (s->regs.crtc_offset & 0x07ffffff) + -- s->regs.cur_offset - (s->regs.cur_hv_offs >> 16) - -- (s->regs.cur_hv_offs & 0xffff) * 16; -+ srcoff = s->regs.cur_offset - -+ (s->regs.cur_hv_offs >> 16) - (s->regs.cur_hv_offs & 0xffff) * 16; - for (i = 0; i < 64; i++) { - for (j = 0; j < 8; j++, idx++) { -- data[idx] = src[i * 16 + j]; -- data[512 + idx] = src[i * 16 + j + 8]; -+ data[idx] = vga_read_byte(&s->vga, srcoff + i * 16 + j); -+ data[512 + idx] = vga_read_byte(&s->vga, srcoff + i * 16 + j + 8); - } - } - if (!s->cursor) { -@@ -180,7 +180,7 @@ static void ati_cursor_invalidate(VGACommonState *vga) - static void ati_cursor_draw_line(VGACommonState *vga, uint8_t *d, int scr_y) - { - ATIVGAState *s = container_of(vga, ATIVGAState, vga); -- uint8_t *src; -+ uint32_t srcoff; - uint32_t *dp = (uint32_t *)d; - int i, j, h; - -@@ -190,14 +190,13 @@ static void ati_cursor_draw_line(VGACommonState *vga, uint8_t *d, int scr_y) - return; - } - /* FIXME handle cur_hv_offs correctly */ -- src = s->vga.vram_ptr + (s->regs.crtc_offset & 0x07ffffff) + -- s->cursor_offset + (scr_y - vga->hw_cursor_y) * 16; -+ srcoff = s->cursor_offset + (scr_y - vga->hw_cursor_y) * 16; - dp = &dp[vga->hw_cursor_x]; - h = ((s->regs.crtc_h_total_disp >> 16) + 1) * 8; - for (i = 0; i < 8; i++) { - uint32_t color; -- uint8_t abits = src[i]; -- uint8_t xbits = src[i + 8]; -+ uint8_t abits = vga_read_byte(vga, srcoff + i); -+ uint8_t xbits = vga_read_byte(vga, srcoff + i + 8); - for (j = 0; j < 8; j++, abits <<= 1, xbits <<= 1) { - if (abits & BIT(7)) { - if (xbits & BIT(7)) { -diff --git a/hw/display/vga-access.h b/hw/display/vga-access.h -new file mode 100644 -index 0000000000..c0fbd9958b ---- /dev/null -+++ b/hw/display/vga-access.h -@@ -0,0 +1,49 @@ -+/* -+ * QEMU VGA Emulator templates -+ * -+ * Copyright (c) 2003 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+static inline uint8_t vga_read_byte(VGACommonState *vga, uint32_t addr) -+{ -+ return vga->vram_ptr[addr & vga->vbe_size_mask]; -+} -+ -+static inline uint16_t vga_read_word_le(VGACommonState *vga, uint32_t addr) -+{ -+ uint32_t offset = addr & vga->vbe_size_mask & ~1; -+ uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); -+ return lduw_le_p(ptr); -+} -+ -+static inline uint16_t vga_read_word_be(VGACommonState *vga, uint32_t addr) -+{ -+ uint32_t offset = addr & vga->vbe_size_mask & ~1; -+ uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); -+ return lduw_be_p(ptr); -+} -+ -+static inline uint32_t vga_read_dword_le(VGACommonState *vga, uint32_t addr) -+{ -+ uint32_t offset = addr & vga->vbe_size_mask & ~3; -+ uint32_t *ptr = (uint32_t *)(vga->vram_ptr + offset); -+ return ldl_le_p(ptr); -+} -diff --git a/hw/display/vga-helpers.h b/hw/display/vga-helpers.h -index 5a752b3f9e..5b6c02faa6 100644 ---- a/hw/display/vga-helpers.h -+++ b/hw/display/vga-helpers.h -@@ -21,6 +21,7 @@ - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -+#include "vga-access.h" - - static inline void vga_draw_glyph_line(uint8_t *d, uint32_t font_data, - uint32_t xorcol, uint32_t bgcol) -@@ -95,32 +96,6 @@ static void vga_draw_glyph9(uint8_t *d, int linesize, - } while (--h); - } - --static inline uint8_t vga_read_byte(VGACommonState *vga, uint32_t addr) --{ -- return vga->vram_ptr[addr & vga->vbe_size_mask]; --} -- --static inline uint16_t vga_read_word_le(VGACommonState *vga, uint32_t addr) --{ -- uint32_t offset = addr & vga->vbe_size_mask & ~1; -- uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); -- return lduw_le_p(ptr); --} -- --static inline uint16_t vga_read_word_be(VGACommonState *vga, uint32_t addr) --{ -- uint32_t offset = addr & vga->vbe_size_mask & ~1; -- uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); -- return lduw_be_p(ptr); --} -- --static inline uint32_t vga_read_dword_le(VGACommonState *vga, uint32_t addr) --{ -- uint32_t offset = addr & vga->vbe_size_mask & ~3; -- uint32_t *ptr = (uint32_t *)(vga->vram_ptr + offset); -- return ldl_le_p(ptr); --} -- - /* - * 4 color mode - */ --- -2.27.0 - diff --git a/ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch b/ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch deleted file mode 100644 index ef1d8b646c607a5afb6278ab229efc4a5a15965f..0000000000000000000000000000000000000000 --- a/ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ac2071c3791b67fc7af78b8ceb320c01ca1b5df7 Mon Sep 17 00:00:00 2001 -From: BALATON Zoltan -Date: Mon, 6 Apr 2020 22:34:26 +0200 -Subject: [PATCH] ati-vga: Fix checks in ati_2d_blt() to avoid crash - -In some corner cases (that never happen during normal operation but a -malicious guest could program wrong values) pixman functions were -called with parameters that result in a crash. Fix this and add more -checks to disallow such cases. - -Reported-by: Ziming Zhang -Signed-off-by: BALATON Zoltan -Message-id: 20200406204029.19559747D5D@zero.eik.bme.hu -Signed-off-by: Gerd Hoffmann - -diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c -index 42e82311eb..23a8ae0cd8 100644 ---- a/hw/display/ati_2d.c -+++ b/hw/display/ati_2d.c -@@ -53,12 +53,20 @@ void ati_2d_blt(ATIVGAState *s) - s->vga.vbe_start_addr, surface_data(ds), surface_stride(ds), - surface_bits_per_pixel(ds), - (s->regs.dp_mix & GMC_ROP3_MASK) >> 16); -- int dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? -- s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width); -- int dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? -- s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height); -+ unsigned dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? -+ s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width); -+ unsigned dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? -+ s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height); - int bpp = ati_bpp_from_datatype(s); -+ if (!bpp) { -+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid bpp\n"); -+ return; -+ } - int dst_stride = DEFAULT_CNTL ? s->regs.dst_pitch : s->regs.default_pitch; -+ if (!dst_stride) { -+ qemu_log_mask(LOG_GUEST_ERROR, "Zero dest pitch\n"); -+ return; -+ } - uint8_t *dst_bits = s->vga.vram_ptr + (DEFAULT_CNTL ? - s->regs.dst_offset : s->regs.default_offset); - -@@ -82,12 +90,16 @@ void ati_2d_blt(ATIVGAState *s) - switch (s->regs.dp_mix & GMC_ROP3_MASK) { - case ROP3_SRCCOPY: - { -- int src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? -- s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width); -- int src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? -- s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height); -+ unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? -+ s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width); -+ unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? -+ s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height); - int src_stride = DEFAULT_CNTL ? - s->regs.src_pitch : s->regs.default_pitch; -+ if (!src_stride) { -+ qemu_log_mask(LOG_GUEST_ERROR, "Zero source pitch\n"); -+ return; -+ } - uint8_t *src_bits = s->vga.vram_ptr + (DEFAULT_CNTL ? - s->regs.src_offset : s->regs.default_offset); - -@@ -137,8 +149,10 @@ void ati_2d_blt(ATIVGAState *s) - dst_y * surface_stride(ds), - s->regs.dst_height * surface_stride(ds)); - } -- s->regs.dst_x += s->regs.dst_width; -- s->regs.dst_y += s->regs.dst_height; -+ s->regs.dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? -+ dst_x + s->regs.dst_width : dst_x); -+ s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? -+ dst_y + s->regs.dst_height : dst_y); - break; - } - case ROP3_PATCOPY: -@@ -179,7 +193,8 @@ void ati_2d_blt(ATIVGAState *s) - dst_y * surface_stride(ds), - s->regs.dst_height * surface_stride(ds)); - } -- s->regs.dst_y += s->regs.dst_height; -+ s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? -+ dst_y + s->regs.dst_height : dst_y); - break; - } - default: --- -2.23.0 - diff --git a/ati-vga-check-mm_index-before-recursive-call-CVE-202.patch b/ati-vga-check-mm_index-before-recursive-call-CVE-202.patch deleted file mode 100644 index b80c9dc973015dd83e3d9c0c000dd0b15b303608..0000000000000000000000000000000000000000 --- a/ati-vga-check-mm_index-before-recursive-call-CVE-202.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 89554d2f71d4c79c5d8e804d90d74f3985d7ded5 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 4 Jun 2020 14:38:30 +0530 -Subject: [PATCH 3/9] ati-vga: check mm_index before recursive call - (CVE-2020-13800) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -While accessing VGA registers via ati_mm_read/write routines, -a guest may set 's->regs.mm_index' such that it leads to infinite -recursion. Check mm_index value to avoid such recursion. Log an -error message for wrong values. - -Reported-by: Ren Ding -Reported-by: Hanqing Zhao -Reported-by: Yi Ren -Message-id: 20200604090830.33885-1-ppandit@redhat.com -Suggested-by: BALATON Zoltan -Suggested-by: Philippe Mathieu-Daudé -Signed-off-by: Prasad J Pandit -Signed-off-by: Gerd Hoffmann ---- - hw/display/ati.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/display/ati.c b/hw/display/ati.c -index a747c4cc98..5943040416 100644 ---- a/hw/display/ati.c -+++ b/hw/display/ati.c -@@ -261,8 +261,11 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size) - if (idx <= s->vga.vram_size - size) { - val = ldn_le_p(s->vga.vram_ptr + idx, size); - } -- } else { -+ } else if (s->regs.mm_index > MM_DATA + 3) { - val = ati_mm_read(s, s->regs.mm_index + addr - MM_DATA, size); -+ } else { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "ati_mm_read: mm_index too small: %u\n", s->regs.mm_index); - } - break; - case BIOS_0_SCRATCH ... BUS_CNTL - 1: -@@ -472,8 +475,11 @@ static void ati_mm_write(void *opaque, hwaddr addr, - if (idx <= s->vga.vram_size - size) { - stn_le_p(s->vga.vram_ptr + idx, size, data); - } -- } else { -+ } else if (s->regs.mm_index > MM_DATA + 3) { - ati_mm_write(s, s->regs.mm_index + addr - MM_DATA, data, size); -+ } else { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "ati_mm_write: mm_index too small: %u\n", s->regs.mm_index); - } - break; - case BIOS_0_SCRATCH ... BUS_CNTL - 1: --- -2.25.1 - diff --git a/audio-audio.c-remove-trailing-newline-in-error_setg.patch b/audio-audio.c-remove-trailing-newline-in-error_setg.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b74577c75b8a7366ac3474eb428e7e2983c6695 --- /dev/null +++ b/audio-audio.c-remove-trailing-newline-in-error_setg.patch @@ -0,0 +1,36 @@ +From b60350d9f495f568aa1380f02a13b51e9619a7de Mon Sep 17 00:00:00 2001 +From: gubin +Date: Mon, 18 Nov 2024 14:17:52 +0800 +Subject: [PATCH] audio/audio.c: remove trailing newline in error_setg +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 09a36158c283f7448d1b00fdbb6634f05d27f922 + +error_setg() appends newline to the formatted message. +Fixes: cb94ff5f80c5 ("audio: propagate Error * out of audio_init") + +Signed-off-by: Michael Tokarev +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: gubin +--- + audio/audio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/audio/audio.c b/audio/audio.c +index 8d1e4ad922..7ac74f9e16 100644 +--- a/audio/audio.c ++++ b/audio/audio.c +@@ -1744,7 +1744,7 @@ static AudioState *audio_init(Audiodev *dev, Error **errp) + if (driver) { + done = !audio_driver_init(s, driver, dev, errp); + } else { +- error_setg(errp, "Unknown audio driver `%s'\n", drvname); ++ error_setg(errp, "Unknown audio driver `%s'", drvname); + } + if (!done) { + goto out; +-- +2.41.0.windows.1 + diff --git a/audio-fix-integer-overflow.patch b/audio-fix-integer-overflow.patch deleted file mode 100644 index 91f5280f1854634460e43b48ae98a4f5eb57b26c..0000000000000000000000000000000000000000 --- a/audio-fix-integer-overflow.patch +++ /dev/null @@ -1,37 +0,0 @@ -From d0c4e8cc25dc3bfed1659c35fb59b2f0418ba1d5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Volker=20R=C3=BCmelin?= -Date: Thu, 19 Dec 2019 21:34:05 +0100 -Subject: [PATCH 2/8] audio: fix integer overflow -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Tell the compiler to do a 32bit * 32bit -> 64bit multiplication -because period_ticks is a 64bit variable. The overflow occurs -for audio timer periods larger than 4294967us. - -Fixes: be1092afa0 "audio: fix audio timer rate conversion bug" - -Signed-off-by: Volker Rümelin -Message-id: 8893a235-66a8-8fbe-7d95-862e29da90b1@t-online.de -Signed-off-by: Gerd Hoffmann ---- - audio/audio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/audio/audio.c b/audio/audio.c -index 05adf7f..efcb5d4 100644 ---- a/audio/audio.c -+++ b/audio/audio.c -@@ -1473,7 +1473,7 @@ static int audio_init(Audiodev *dev) - if (dev->timer_period <= 0) { - s->period_ticks = 1; - } else { -- s->period_ticks = dev->timer_period * SCALE_US; -+ s->period_ticks = dev->timer_period * (int64_t)SCALE_US; - } - - e = qemu_add_vm_change_state_handler (audio_vm_change_state_handler, s); --- -1.8.3.1 - diff --git a/audio-pw-Report-more-accurate-error-when-connecting--new.patch b/audio-pw-Report-more-accurate-error-when-connecting--new.patch new file mode 100644 index 0000000000000000000000000000000000000000..390c9658f44914ffba3c468b8a895fc744ca1faa --- /dev/null +++ b/audio-pw-Report-more-accurate-error-when-connecting--new.patch @@ -0,0 +1,899 @@ +From 6adb429abb287b3143ed447b334aa89c1a1c0d71 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E5=BC=A0=E6=A5=9A=E5=90=9B?= + +Date: Fri, 18 Oct 2024 10:29:16 +0800 +Subject: [PATCH] audio/pw: Report more accurate error when connecting to + PipeWire fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +According to its man page [1], pw_context_connect() sets errno on +failure: + + Returns a Core on success or NULL with errno set on error. + +It may be handy to see errno when figuring out why PipeWire +failed to connect. That leaves us with just one possible path to +reach 'fail_error' label which is then moved to that path and +also its error message is adjusted slightly. + +1: https://docs.pipewire.org/group__pw__core.html#ga5994e3a54e4ec718094ca02a1234815b + +Signed-off-by: Michal Privoznik +Reviewed-by: Manos Pitsidianakis +Reviewed-by: Marc-André Lureau +Message-ID: <3a78811ad5b0e87816b7616ab21d2eeef00b9c52.1726647033.git.mprivozn@redhat.com> +Signed-off-by: Zhang Chujun +--- + audio/pwaudio.c.orig | 858 ------------------------------------------- + 1 file changed, 858 deletions(-) + delete mode 100644 audio/pwaudio.c.orig + +diff --git a/audio/pwaudio.c.orig b/audio/pwaudio.c.orig +deleted file mode 100644 +index 3ce5f6507b..0000000000 +--- a/audio/pwaudio.c.orig ++++ /dev/null +@@ -1,858 +0,0 @@ +-/* +- * QEMU PipeWire audio driver +- * +- * Copyright (c) 2023 Red Hat Inc. +- * +- * Author: Dorinda Bassey +- * +- * SPDX-License-Identifier: GPL-2.0-or-later +- */ +- +-#include "qemu/osdep.h" +-#include "qemu/module.h" +-#include "audio.h" +-#include +-#include "qemu/error-report.h" +-#include "qapi/error.h" +-#include +-#include +-#include +-#include +- +-#include +-#include "trace.h" +- +-#define AUDIO_CAP "pipewire" +-#define RINGBUFFER_SIZE (1u << 22) +-#define RINGBUFFER_MASK (RINGBUFFER_SIZE - 1) +- +-#include "audio_int.h" +- +-typedef struct pwvolume { +- uint32_t channels; +- float values[SPA_AUDIO_MAX_CHANNELS]; +-} pwvolume; +- +-typedef struct pwaudio { +- Audiodev *dev; +- struct pw_thread_loop *thread_loop; +- struct pw_context *context; +- +- struct pw_core *core; +- struct spa_hook core_listener; +- int last_seq, pending_seq, error; +-} pwaudio; +- +-typedef struct PWVoice { +- pwaudio *g; +- struct pw_stream *stream; +- struct spa_hook stream_listener; +- struct spa_audio_info_raw info; +- uint32_t highwater_mark; +- uint32_t frame_size, req; +- struct spa_ringbuffer ring; +- uint8_t buffer[RINGBUFFER_SIZE]; +- +- pwvolume volume; +- bool muted; +-} PWVoice; +- +-typedef struct PWVoiceOut { +- HWVoiceOut hw; +- PWVoice v; +-} PWVoiceOut; +- +-typedef struct PWVoiceIn { +- HWVoiceIn hw; +- PWVoice v; +-} PWVoiceIn; +- +-#define PW_VOICE_IN(v) ((PWVoiceIn *)v) +-#define PW_VOICE_OUT(v) ((PWVoiceOut *)v) +- +-static void +-stream_destroy(void *data) +-{ +- PWVoice *v = (PWVoice *) data; +- spa_hook_remove(&v->stream_listener); +- v->stream = NULL; +-} +- +-/* output data processing function to read stuffs from the buffer */ +-static void +-playback_on_process(void *data) +-{ +- PWVoice *v = data; +- void *p; +- struct pw_buffer *b; +- struct spa_buffer *buf; +- uint32_t req, index, n_bytes; +- int32_t avail; +- +- assert(v->stream); +- +- /* obtain a buffer to read from */ +- b = pw_stream_dequeue_buffer(v->stream); +- if (b == NULL) { +- error_report("out of buffers: %s", strerror(errno)); +- return; +- } +- +- buf = b->buffer; +- p = buf->datas[0].data; +- if (p == NULL) { +- return; +- } +- /* calculate the total no of bytes to read data from buffer */ +- req = b->requested * v->frame_size; +- if (req == 0) { +- req = v->req; +- } +- n_bytes = SPA_MIN(req, buf->datas[0].maxsize); +- +- /* get no of available bytes to read data from buffer */ +- avail = spa_ringbuffer_get_read_index(&v->ring, &index); +- +- if (avail <= 0) { +- PWVoiceOut *vo = container_of(data, PWVoiceOut, v); +- audio_pcm_info_clear_buf(&vo->hw.info, p, n_bytes / v->frame_size); +- } else { +- if ((uint32_t) avail < n_bytes) { +- /* +- * PipeWire immediately calls this callback again if we provide +- * less than n_bytes. Then audio_pcm_info_clear_buf() fills the +- * rest of the buffer with silence. +- */ +- n_bytes = avail; +- } +- +- spa_ringbuffer_read_data(&v->ring, +- v->buffer, RINGBUFFER_SIZE, +- index & RINGBUFFER_MASK, p, n_bytes); +- +- index += n_bytes; +- spa_ringbuffer_read_update(&v->ring, index); +- +- } +- buf->datas[0].chunk->offset = 0; +- buf->datas[0].chunk->stride = v->frame_size; +- buf->datas[0].chunk->size = n_bytes; +- +- /* queue the buffer for playback */ +- pw_stream_queue_buffer(v->stream, b); +-} +- +-/* output data processing function to generate stuffs in the buffer */ +-static void +-capture_on_process(void *data) +-{ +- PWVoice *v = (PWVoice *) data; +- void *p; +- struct pw_buffer *b; +- struct spa_buffer *buf; +- int32_t filled; +- uint32_t index, offs, n_bytes; +- +- assert(v->stream); +- +- /* obtain a buffer */ +- b = pw_stream_dequeue_buffer(v->stream); +- if (b == NULL) { +- error_report("out of buffers: %s", strerror(errno)); +- return; +- } +- +- /* Write data into buffer */ +- buf = b->buffer; +- p = buf->datas[0].data; +- if (p == NULL) { +- return; +- } +- offs = SPA_MIN(buf->datas[0].chunk->offset, buf->datas[0].maxsize); +- n_bytes = SPA_MIN(buf->datas[0].chunk->size, buf->datas[0].maxsize - offs); +- +- filled = spa_ringbuffer_get_write_index(&v->ring, &index); +- +- +- if (filled < 0) { +- error_report("%p: underrun write:%u filled:%d", p, index, filled); +- } else { +- if ((uint32_t) filled + n_bytes > RINGBUFFER_SIZE) { +- error_report("%p: overrun write:%u filled:%d + size:%u > max:%u", +- p, index, filled, n_bytes, RINGBUFFER_SIZE); +- } +- } +- spa_ringbuffer_write_data(&v->ring, +- v->buffer, RINGBUFFER_SIZE, +- index & RINGBUFFER_MASK, +- SPA_PTROFF(p, offs, void), n_bytes); +- index += n_bytes; +- spa_ringbuffer_write_update(&v->ring, index); +- +- /* queue the buffer for playback */ +- pw_stream_queue_buffer(v->stream, b); +-} +- +-static void +-on_stream_state_changed(void *data, enum pw_stream_state old, +- enum pw_stream_state state, const char *error) +-{ +- PWVoice *v = (PWVoice *) data; +- +- trace_pw_state_changed(pw_stream_get_node_id(v->stream), +- pw_stream_state_as_string(state)); +-} +- +-static const struct pw_stream_events capture_stream_events = { +- PW_VERSION_STREAM_EVENTS, +- .destroy = stream_destroy, +- .state_changed = on_stream_state_changed, +- .process = capture_on_process +-}; +- +-static const struct pw_stream_events playback_stream_events = { +- PW_VERSION_STREAM_EVENTS, +- .destroy = stream_destroy, +- .state_changed = on_stream_state_changed, +- .process = playback_on_process +-}; +- +-static size_t +-qpw_read(HWVoiceIn *hw, void *data, size_t len) +-{ +- PWVoiceIn *pw = (PWVoiceIn *) hw; +- PWVoice *v = &pw->v; +- pwaudio *c = v->g; +- const char *error = NULL; +- size_t l; +- int32_t avail; +- uint32_t index; +- +- pw_thread_loop_lock(c->thread_loop); +- if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) { +- /* wait for stream to become ready */ +- l = 0; +- goto done_unlock; +- } +- /* get no of available bytes to read data from buffer */ +- avail = spa_ringbuffer_get_read_index(&v->ring, &index); +- +- trace_pw_read(avail, index, len); +- +- if (avail < (int32_t) len) { +- len = avail; +- } +- +- spa_ringbuffer_read_data(&v->ring, +- v->buffer, RINGBUFFER_SIZE, +- index & RINGBUFFER_MASK, data, len); +- index += len; +- spa_ringbuffer_read_update(&v->ring, index); +- l = len; +- +-done_unlock: +- pw_thread_loop_unlock(c->thread_loop); +- return l; +-} +- +-static size_t qpw_buffer_get_free(HWVoiceOut *hw) +-{ +- PWVoiceOut *pw = (PWVoiceOut *)hw; +- PWVoice *v = &pw->v; +- pwaudio *c = v->g; +- const char *error = NULL; +- int32_t filled, avail; +- uint32_t index; +- +- pw_thread_loop_lock(c->thread_loop); +- if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) { +- /* wait for stream to become ready */ +- avail = 0; +- goto done_unlock; +- } +- +- filled = spa_ringbuffer_get_write_index(&v->ring, &index); +- avail = v->highwater_mark - filled; +- +-done_unlock: +- pw_thread_loop_unlock(c->thread_loop); +- return avail; +-} +- +-static size_t +-qpw_write(HWVoiceOut *hw, void *data, size_t len) +-{ +- PWVoiceOut *pw = (PWVoiceOut *) hw; +- PWVoice *v = &pw->v; +- pwaudio *c = v->g; +- const char *error = NULL; +- int32_t filled, avail; +- uint32_t index; +- +- pw_thread_loop_lock(c->thread_loop); +- if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) { +- /* wait for stream to become ready */ +- len = 0; +- goto done_unlock; +- } +- filled = spa_ringbuffer_get_write_index(&v->ring, &index); +- avail = v->highwater_mark - filled; +- +- trace_pw_write(filled, avail, index, len); +- +- if (len > avail) { +- len = avail; +- } +- +- if (filled < 0) { +- error_report("%p: underrun write:%u filled:%d", pw, index, filled); +- } else { +- if ((uint32_t) filled + len > RINGBUFFER_SIZE) { +- error_report("%p: overrun write:%u filled:%d + size:%zu > max:%u", +- pw, index, filled, len, RINGBUFFER_SIZE); +- } +- } +- +- spa_ringbuffer_write_data(&v->ring, +- v->buffer, RINGBUFFER_SIZE, +- index & RINGBUFFER_MASK, data, len); +- index += len; +- spa_ringbuffer_write_update(&v->ring, index); +- +-done_unlock: +- pw_thread_loop_unlock(c->thread_loop); +- return len; +-} +- +-static int +-audfmt_to_pw(AudioFormat fmt, int endianness) +-{ +- int format; +- +- switch (fmt) { +- case AUDIO_FORMAT_S8: +- format = SPA_AUDIO_FORMAT_S8; +- break; +- case AUDIO_FORMAT_U8: +- format = SPA_AUDIO_FORMAT_U8; +- break; +- case AUDIO_FORMAT_S16: +- format = endianness ? SPA_AUDIO_FORMAT_S16_BE : SPA_AUDIO_FORMAT_S16_LE; +- break; +- case AUDIO_FORMAT_U16: +- format = endianness ? SPA_AUDIO_FORMAT_U16_BE : SPA_AUDIO_FORMAT_U16_LE; +- break; +- case AUDIO_FORMAT_S32: +- format = endianness ? SPA_AUDIO_FORMAT_S32_BE : SPA_AUDIO_FORMAT_S32_LE; +- break; +- case AUDIO_FORMAT_U32: +- format = endianness ? SPA_AUDIO_FORMAT_U32_BE : SPA_AUDIO_FORMAT_U32_LE; +- break; +- case AUDIO_FORMAT_F32: +- format = endianness ? SPA_AUDIO_FORMAT_F32_BE : SPA_AUDIO_FORMAT_F32_LE; +- break; +- default: +- dolog("Internal logic error: Bad audio format %d\n", fmt); +- format = SPA_AUDIO_FORMAT_U8; +- break; +- } +- return format; +-} +- +-static AudioFormat +-pw_to_audfmt(enum spa_audio_format fmt, int *endianness, +- uint32_t *sample_size) +-{ +- switch (fmt) { +- case SPA_AUDIO_FORMAT_S8: +- *sample_size = 1; +- return AUDIO_FORMAT_S8; +- case SPA_AUDIO_FORMAT_U8: +- *sample_size = 1; +- return AUDIO_FORMAT_U8; +- case SPA_AUDIO_FORMAT_S16_BE: +- *sample_size = 2; +- *endianness = 1; +- return AUDIO_FORMAT_S16; +- case SPA_AUDIO_FORMAT_S16_LE: +- *sample_size = 2; +- *endianness = 0; +- return AUDIO_FORMAT_S16; +- case SPA_AUDIO_FORMAT_U16_BE: +- *sample_size = 2; +- *endianness = 1; +- return AUDIO_FORMAT_U16; +- case SPA_AUDIO_FORMAT_U16_LE: +- *sample_size = 2; +- *endianness = 0; +- return AUDIO_FORMAT_U16; +- case SPA_AUDIO_FORMAT_S32_BE: +- *sample_size = 4; +- *endianness = 1; +- return AUDIO_FORMAT_S32; +- case SPA_AUDIO_FORMAT_S32_LE: +- *sample_size = 4; +- *endianness = 0; +- return AUDIO_FORMAT_S32; +- case SPA_AUDIO_FORMAT_U32_BE: +- *sample_size = 4; +- *endianness = 1; +- return AUDIO_FORMAT_U32; +- case SPA_AUDIO_FORMAT_U32_LE: +- *sample_size = 4; +- *endianness = 0; +- return AUDIO_FORMAT_U32; +- case SPA_AUDIO_FORMAT_F32_BE: +- *sample_size = 4; +- *endianness = 1; +- return AUDIO_FORMAT_F32; +- case SPA_AUDIO_FORMAT_F32_LE: +- *sample_size = 4; +- *endianness = 0; +- return AUDIO_FORMAT_F32; +- default: +- *sample_size = 1; +- dolog("Internal logic error: Bad spa_audio_format %d\n", fmt); +- return AUDIO_FORMAT_U8; +- } +-} +- +-static int +-qpw_stream_new(pwaudio *c, PWVoice *v, const char *stream_name, +- const char *name, enum spa_direction dir) +-{ +- int res; +- uint32_t n_params; +- const struct spa_pod *params[2]; +- uint8_t buffer[1024]; +- struct spa_pod_builder b; +- uint64_t buf_samples; +- struct pw_properties *props; +- +- props = pw_properties_new(NULL, NULL); +- if (!props) { +- error_report("Failed to create PW properties: %s", g_strerror(errno)); +- return -1; +- } +- +- /* 75% of the timer period for faster updates */ +- buf_samples = (uint64_t)v->g->dev->timer_period * v->info.rate +- * 3 / 4 / 1000000; +- pw_properties_setf(props, PW_KEY_NODE_LATENCY, "%" PRIu64 "/%u", +- buf_samples, v->info.rate); +- +- trace_pw_period(buf_samples, v->info.rate); +- if (name) { +- pw_properties_set(props, PW_KEY_TARGET_OBJECT, name); +- } +- v->stream = pw_stream_new(c->core, stream_name, props); +- if (v->stream == NULL) { +- error_report("Failed to create PW stream: %s", g_strerror(errno)); +- return -1; +- } +- +- if (dir == SPA_DIRECTION_INPUT) { +- pw_stream_add_listener(v->stream, +- &v->stream_listener, &capture_stream_events, v); +- } else { +- pw_stream_add_listener(v->stream, +- &v->stream_listener, &playback_stream_events, v); +- } +- +- n_params = 0; +- spa_pod_builder_init(&b, buffer, sizeof(buffer)); +- params[n_params++] = spa_format_audio_raw_build(&b, +- SPA_PARAM_EnumFormat, +- &v->info); +- +- /* connect the stream to a sink or source */ +- res = pw_stream_connect(v->stream, +- dir == +- SPA_DIRECTION_INPUT ? PW_DIRECTION_INPUT : +- PW_DIRECTION_OUTPUT, PW_ID_ANY, +- PW_STREAM_FLAG_AUTOCONNECT | +- PW_STREAM_FLAG_INACTIVE | +- PW_STREAM_FLAG_MAP_BUFFERS | +- PW_STREAM_FLAG_RT_PROCESS, params, n_params); +- if (res < 0) { +- error_report("Failed to connect PW stream: %s", g_strerror(errno)); +- pw_stream_destroy(v->stream); +- return -1; +- } +- +- return 0; +-} +- +-static void +-qpw_set_position(uint32_t channels, uint32_t position[SPA_AUDIO_MAX_CHANNELS]) +-{ +- memcpy(position, (uint32_t[SPA_AUDIO_MAX_CHANNELS]) { SPA_AUDIO_CHANNEL_UNKNOWN, }, +- sizeof(uint32_t) * SPA_AUDIO_MAX_CHANNELS); +- /* +- * TODO: This currently expects the only frontend supporting more than 2 +- * channels is the usb-audio. We will need some means to set channel +- * order when a new frontend gains multi-channel support. +- */ +- switch (channels) { +- case 8: +- position[6] = SPA_AUDIO_CHANNEL_SL; +- position[7] = SPA_AUDIO_CHANNEL_SR; +- /* fallthrough */ +- case 6: +- position[2] = SPA_AUDIO_CHANNEL_FC; +- position[3] = SPA_AUDIO_CHANNEL_LFE; +- position[4] = SPA_AUDIO_CHANNEL_RL; +- position[5] = SPA_AUDIO_CHANNEL_RR; +- /* fallthrough */ +- case 2: +- position[0] = SPA_AUDIO_CHANNEL_FL; +- position[1] = SPA_AUDIO_CHANNEL_FR; +- break; +- case 1: +- position[0] = SPA_AUDIO_CHANNEL_MONO; +- break; +- default: +- dolog("Internal error: unsupported channel count %d\n", channels); +- } +-} +- +-static int +-qpw_init_out(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque) +-{ +- PWVoiceOut *pw = (PWVoiceOut *) hw; +- PWVoice *v = &pw->v; +- struct audsettings obt_as = *as; +- pwaudio *c = v->g = drv_opaque; +- AudiodevPipewireOptions *popts = &c->dev->u.pipewire; +- AudiodevPipewirePerDirectionOptions *ppdo = popts->out; +- int r; +- +- pw_thread_loop_lock(c->thread_loop); +- +- v->info.format = audfmt_to_pw(as->fmt, as->endianness); +- v->info.channels = as->nchannels; +- qpw_set_position(as->nchannels, v->info.position); +- v->info.rate = as->freq; +- +- obt_as.fmt = +- pw_to_audfmt(v->info.format, &obt_as.endianness, &v->frame_size); +- v->frame_size *= as->nchannels; +- +- v->req = (uint64_t)c->dev->timer_period * v->info.rate +- * 1 / 2 / 1000000 * v->frame_size; +- +- /* call the function that creates a new stream for playback */ +- r = qpw_stream_new(c, v, ppdo->stream_name ? : c->dev->id, +- ppdo->name, SPA_DIRECTION_OUTPUT); +- if (r < 0) { +- pw_thread_loop_unlock(c->thread_loop); +- return -1; +- } +- +- /* report the audio format we support */ +- audio_pcm_init_info(&hw->info, &obt_as); +- +- /* report the buffer size to qemu */ +- hw->samples = audio_buffer_frames( +- qapi_AudiodevPipewirePerDirectionOptions_base(ppdo), &obt_as, 46440); +- v->highwater_mark = MIN(RINGBUFFER_SIZE, +- (ppdo->has_latency ? ppdo->latency : 46440) +- * (uint64_t)v->info.rate / 1000000 * v->frame_size); +- +- pw_thread_loop_unlock(c->thread_loop); +- return 0; +-} +- +-static int +-qpw_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque) +-{ +- PWVoiceIn *pw = (PWVoiceIn *) hw; +- PWVoice *v = &pw->v; +- struct audsettings obt_as = *as; +- pwaudio *c = v->g = drv_opaque; +- AudiodevPipewireOptions *popts = &c->dev->u.pipewire; +- AudiodevPipewirePerDirectionOptions *ppdo = popts->in; +- int r; +- +- pw_thread_loop_lock(c->thread_loop); +- +- v->info.format = audfmt_to_pw(as->fmt, as->endianness); +- v->info.channels = as->nchannels; +- qpw_set_position(as->nchannels, v->info.position); +- v->info.rate = as->freq; +- +- obt_as.fmt = +- pw_to_audfmt(v->info.format, &obt_as.endianness, &v->frame_size); +- v->frame_size *= as->nchannels; +- +- /* call the function that creates a new stream for recording */ +- r = qpw_stream_new(c, v, ppdo->stream_name ? : c->dev->id, +- ppdo->name, SPA_DIRECTION_INPUT); +- if (r < 0) { +- pw_thread_loop_unlock(c->thread_loop); +- return -1; +- } +- +- /* report the audio format we support */ +- audio_pcm_init_info(&hw->info, &obt_as); +- +- /* report the buffer size to qemu */ +- hw->samples = audio_buffer_frames( +- qapi_AudiodevPipewirePerDirectionOptions_base(ppdo), &obt_as, 46440); +- +- pw_thread_loop_unlock(c->thread_loop); +- return 0; +-} +- +-static void +-qpw_voice_fini(PWVoice *v) +-{ +- pwaudio *c = v->g; +- +- if (!v->stream) { +- return; +- } +- pw_thread_loop_lock(c->thread_loop); +- pw_stream_destroy(v->stream); +- v->stream = NULL; +- pw_thread_loop_unlock(c->thread_loop); +-} +- +-static void +-qpw_fini_out(HWVoiceOut *hw) +-{ +- qpw_voice_fini(&PW_VOICE_OUT(hw)->v); +-} +- +-static void +-qpw_fini_in(HWVoiceIn *hw) +-{ +- qpw_voice_fini(&PW_VOICE_IN(hw)->v); +-} +- +-static void +-qpw_voice_set_enabled(PWVoice *v, bool enable) +-{ +- pwaudio *c = v->g; +- pw_thread_loop_lock(c->thread_loop); +- pw_stream_set_active(v->stream, enable); +- pw_thread_loop_unlock(c->thread_loop); +-} +- +-static void +-qpw_enable_out(HWVoiceOut *hw, bool enable) +-{ +- qpw_voice_set_enabled(&PW_VOICE_OUT(hw)->v, enable); +-} +- +-static void +-qpw_enable_in(HWVoiceIn *hw, bool enable) +-{ +- qpw_voice_set_enabled(&PW_VOICE_IN(hw)->v, enable); +-} +- +-static void +-qpw_voice_set_volume(PWVoice *v, Volume *vol) +-{ +- pwaudio *c = v->g; +- int i, ret; +- +- pw_thread_loop_lock(c->thread_loop); +- v->volume.channels = vol->channels; +- +- for (i = 0; i < vol->channels; ++i) { +- v->volume.values[i] = (float)vol->vol[i] / 255; +- } +- +- ret = pw_stream_set_control(v->stream, +- SPA_PROP_channelVolumes, v->volume.channels, v->volume.values, 0); +- trace_pw_vol(ret == 0 ? "success" : "failed"); +- +- v->muted = vol->mute; +- float val = v->muted ? 1.f : 0.f; +- ret = pw_stream_set_control(v->stream, SPA_PROP_mute, 1, &val, 0); +- pw_thread_loop_unlock(c->thread_loop); +-} +- +-static void +-qpw_volume_out(HWVoiceOut *hw, Volume *vol) +-{ +- qpw_voice_set_volume(&PW_VOICE_OUT(hw)->v, vol); +-} +- +-static void +-qpw_volume_in(HWVoiceIn *hw, Volume *vol) +-{ +- qpw_voice_set_volume(&PW_VOICE_IN(hw)->v, vol); +-} +- +-static int wait_resync(pwaudio *pw) +-{ +- int res; +- pw->pending_seq = pw_core_sync(pw->core, PW_ID_CORE, pw->pending_seq); +- +- while (true) { +- pw_thread_loop_wait(pw->thread_loop); +- +- res = pw->error; +- if (res < 0) { +- pw->error = 0; +- return res; +- } +- if (pw->pending_seq == pw->last_seq) { +- break; +- } +- } +- return 0; +-} +- +-static void +-on_core_error(void *data, uint32_t id, int seq, int res, const char *message) +-{ +- pwaudio *pw = data; +- +- error_report("error id:%u seq:%d res:%d (%s): %s", +- id, seq, res, spa_strerror(res), message); +- +- /* stop and exit the thread loop */ +- pw_thread_loop_signal(pw->thread_loop, FALSE); +-} +- +-static void +-on_core_done(void *data, uint32_t id, int seq) +-{ +- pwaudio *pw = data; +- assert(id == PW_ID_CORE); +- pw->last_seq = seq; +- if (pw->pending_seq == seq) { +- /* stop and exit the thread loop */ +- pw_thread_loop_signal(pw->thread_loop, FALSE); +- } +-} +- +-static const struct pw_core_events core_events = { +- PW_VERSION_CORE_EVENTS, +- .done = on_core_done, +- .error = on_core_error, +-}; +- +-static void * +-qpw_audio_init(Audiodev *dev, Error **errp) +-{ +- g_autofree pwaudio *pw = g_new0(pwaudio, 1); +- +- assert(dev->driver == AUDIODEV_DRIVER_PIPEWIRE); +- trace_pw_audio_init(); +- +- pw_init(NULL, NULL); +- +- pw->dev = dev; +- pw->thread_loop = pw_thread_loop_new("PipeWire thread loop", NULL); +- if (pw->thread_loop == NULL) { +- error_setg_errno(errp, errno, "Could not create PipeWire loop"); +- goto fail; +- } +- +- pw->context = +- pw_context_new(pw_thread_loop_get_loop(pw->thread_loop), NULL, 0); +- if (pw->context == NULL) { +- error_setg_errno(errp, errno, "Could not create PipeWire context"); +- goto fail; +- } +- +- if (pw_thread_loop_start(pw->thread_loop) < 0) { +- error_setg_errno(errp, errno, "Could not start PipeWire loop"); +- goto fail; +- } +- +- pw_thread_loop_lock(pw->thread_loop); +- +- pw->core = pw_context_connect(pw->context, NULL, 0); +- if (pw->core == NULL) { +- pw_thread_loop_unlock(pw->thread_loop); +- goto fail_error; +- } +- +- if (pw_core_add_listener(pw->core, &pw->core_listener, +- &core_events, pw) < 0) { +- pw_thread_loop_unlock(pw->thread_loop); +- goto fail_error; +- } +- if (wait_resync(pw) < 0) { +- pw_thread_loop_unlock(pw->thread_loop); +- } +- +- pw_thread_loop_unlock(pw->thread_loop); +- +- return g_steal_pointer(&pw); +- +-fail_error: +- error_setg(errp, "Failed to initialize PW context"); +-fail: +- if (pw->thread_loop) { +- pw_thread_loop_stop(pw->thread_loop); +- } +- g_clear_pointer(&pw->context, pw_context_destroy); +- g_clear_pointer(&pw->thread_loop, pw_thread_loop_destroy); +- return NULL; +-} +- +-static void +-qpw_audio_fini(void *opaque) +-{ +- pwaudio *pw = opaque; +- +- if (pw->thread_loop) { +- pw_thread_loop_stop(pw->thread_loop); +- } +- +- if (pw->core) { +- spa_hook_remove(&pw->core_listener); +- spa_zero(pw->core_listener); +- pw_core_disconnect(pw->core); +- } +- +- if (pw->context) { +- pw_context_destroy(pw->context); +- } +- pw_thread_loop_destroy(pw->thread_loop); +- +- g_free(pw); +-} +- +-static struct audio_pcm_ops qpw_pcm_ops = { +- .init_out = qpw_init_out, +- .fini_out = qpw_fini_out, +- .write = qpw_write, +- .buffer_get_free = qpw_buffer_get_free, +- .run_buffer_out = audio_generic_run_buffer_out, +- .enable_out = qpw_enable_out, +- .volume_out = qpw_volume_out, +- .volume_in = qpw_volume_in, +- +- .init_in = qpw_init_in, +- .fini_in = qpw_fini_in, +- .read = qpw_read, +- .run_buffer_in = audio_generic_run_buffer_in, +- .enable_in = qpw_enable_in +-}; +- +-static struct audio_driver pw_audio_driver = { +- .name = "pipewire", +- .descr = "http://www.pipewire.org/", +- .init = qpw_audio_init, +- .fini = qpw_audio_fini, +- .pcm_ops = &qpw_pcm_ops, +- .max_voices_out = INT_MAX, +- .max_voices_in = INT_MAX, +- .voice_size_out = sizeof(PWVoiceOut), +- .voice_size_in = sizeof(PWVoiceIn), +-}; +- +-static void +-register_audio_pw(void) +-{ +- audio_driver_register(&pw_audio_driver); +-} +- +-type_init(register_audio_pw); +-- +2.41.0.windows.1 + diff --git a/audio-pw-Report-more-accurate-error-when-connecting-.patch b/audio-pw-Report-more-accurate-error-when-connecting-.patch new file mode 100644 index 0000000000000000000000000000000000000000..20de4547f38c74d7adb12eaa7a48e3ca31d0ee63 --- /dev/null +++ b/audio-pw-Report-more-accurate-error-when-connecting-.patch @@ -0,0 +1,931 @@ +From fab03a72da74e938a2a476f1824ac0acd4a1fee2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E5=BC=A0=E6=A5=9A=E5=90=9B?= + +Date: Fri, 18 Oct 2024 10:17:10 +0800 +Subject: [PATCH] audio/pw: Report more accurate error when connecting to + PipeWire fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +According to its man page [1], pw_context_connect() sets errno on +failure: + + Returns a Core on success or NULL with errno set on error. + +It may be handy to see errno when figuring out why PipeWire +failed to connect. That leaves us with just one possible path to +reach 'fail_error' label which is then moved to that path and +also its error message is adjusted slightly. + +1: https://docs.pipewire.org/group__pw__core.html#ga5994e3a54e4ec718094ca02a1234815b + +Signed-off-by: Michal Privoznik +Reviewed-by: Manos Pitsidianakis +Reviewed-by: Marc-André Lureau +Message-ID: <3a78811ad5b0e87816b7616ab21d2eeef00b9c52.1726647033.git.mprivozn@redhat.com> +Signed-off-by: Zhang Chujun +--- + audio/pwaudio.c | 8 +- + audio/pwaudio.c.orig | 858 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 862 insertions(+), 4 deletions(-) + create mode 100644 audio/pwaudio.c.orig + +diff --git a/audio/pwaudio.c b/audio/pwaudio.c +index 3ce5f6507b..5d1c7126d3 100644 +--- a/audio/pwaudio.c ++++ b/audio/pwaudio.c +@@ -770,13 +770,15 @@ qpw_audio_init(Audiodev *dev, Error **errp) + pw->core = pw_context_connect(pw->context, NULL, 0); + if (pw->core == NULL) { + pw_thread_loop_unlock(pw->thread_loop); +- goto fail_error; ++ error_setg_errno(errp, errno, "Failed to connect to PipeWire instance"); ++ goto fail; + } + + if (pw_core_add_listener(pw->core, &pw->core_listener, + &core_events, pw) < 0) { + pw_thread_loop_unlock(pw->thread_loop); +- goto fail_error; ++ error_setg(errp, "Failed to add PipeWire listener"); ++ goto fail; + } + if (wait_resync(pw) < 0) { + pw_thread_loop_unlock(pw->thread_loop); +@@ -786,8 +788,6 @@ qpw_audio_init(Audiodev *dev, Error **errp) + + return g_steal_pointer(&pw); + +-fail_error: +- error_setg(errp, "Failed to initialize PW context"); + fail: + if (pw->thread_loop) { + pw_thread_loop_stop(pw->thread_loop); +diff --git a/audio/pwaudio.c.orig b/audio/pwaudio.c.orig +new file mode 100644 +index 0000000000..3ce5f6507b +--- /dev/null ++++ b/audio/pwaudio.c.orig +@@ -0,0 +1,858 @@ ++/* ++ * QEMU PipeWire audio driver ++ * ++ * Copyright (c) 2023 Red Hat Inc. ++ * ++ * Author: Dorinda Bassey ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/module.h" ++#include "audio.h" ++#include ++#include "qemu/error-report.h" ++#include "qapi/error.h" ++#include ++#include ++#include ++#include ++ ++#include ++#include "trace.h" ++ ++#define AUDIO_CAP "pipewire" ++#define RINGBUFFER_SIZE (1u << 22) ++#define RINGBUFFER_MASK (RINGBUFFER_SIZE - 1) ++ ++#include "audio_int.h" ++ ++typedef struct pwvolume { ++ uint32_t channels; ++ float values[SPA_AUDIO_MAX_CHANNELS]; ++} pwvolume; ++ ++typedef struct pwaudio { ++ Audiodev *dev; ++ struct pw_thread_loop *thread_loop; ++ struct pw_context *context; ++ ++ struct pw_core *core; ++ struct spa_hook core_listener; ++ int last_seq, pending_seq, error; ++} pwaudio; ++ ++typedef struct PWVoice { ++ pwaudio *g; ++ struct pw_stream *stream; ++ struct spa_hook stream_listener; ++ struct spa_audio_info_raw info; ++ uint32_t highwater_mark; ++ uint32_t frame_size, req; ++ struct spa_ringbuffer ring; ++ uint8_t buffer[RINGBUFFER_SIZE]; ++ ++ pwvolume volume; ++ bool muted; ++} PWVoice; ++ ++typedef struct PWVoiceOut { ++ HWVoiceOut hw; ++ PWVoice v; ++} PWVoiceOut; ++ ++typedef struct PWVoiceIn { ++ HWVoiceIn hw; ++ PWVoice v; ++} PWVoiceIn; ++ ++#define PW_VOICE_IN(v) ((PWVoiceIn *)v) ++#define PW_VOICE_OUT(v) ((PWVoiceOut *)v) ++ ++static void ++stream_destroy(void *data) ++{ ++ PWVoice *v = (PWVoice *) data; ++ spa_hook_remove(&v->stream_listener); ++ v->stream = NULL; ++} ++ ++/* output data processing function to read stuffs from the buffer */ ++static void ++playback_on_process(void *data) ++{ ++ PWVoice *v = data; ++ void *p; ++ struct pw_buffer *b; ++ struct spa_buffer *buf; ++ uint32_t req, index, n_bytes; ++ int32_t avail; ++ ++ assert(v->stream); ++ ++ /* obtain a buffer to read from */ ++ b = pw_stream_dequeue_buffer(v->stream); ++ if (b == NULL) { ++ error_report("out of buffers: %s", strerror(errno)); ++ return; ++ } ++ ++ buf = b->buffer; ++ p = buf->datas[0].data; ++ if (p == NULL) { ++ return; ++ } ++ /* calculate the total no of bytes to read data from buffer */ ++ req = b->requested * v->frame_size; ++ if (req == 0) { ++ req = v->req; ++ } ++ n_bytes = SPA_MIN(req, buf->datas[0].maxsize); ++ ++ /* get no of available bytes to read data from buffer */ ++ avail = spa_ringbuffer_get_read_index(&v->ring, &index); ++ ++ if (avail <= 0) { ++ PWVoiceOut *vo = container_of(data, PWVoiceOut, v); ++ audio_pcm_info_clear_buf(&vo->hw.info, p, n_bytes / v->frame_size); ++ } else { ++ if ((uint32_t) avail < n_bytes) { ++ /* ++ * PipeWire immediately calls this callback again if we provide ++ * less than n_bytes. Then audio_pcm_info_clear_buf() fills the ++ * rest of the buffer with silence. ++ */ ++ n_bytes = avail; ++ } ++ ++ spa_ringbuffer_read_data(&v->ring, ++ v->buffer, RINGBUFFER_SIZE, ++ index & RINGBUFFER_MASK, p, n_bytes); ++ ++ index += n_bytes; ++ spa_ringbuffer_read_update(&v->ring, index); ++ ++ } ++ buf->datas[0].chunk->offset = 0; ++ buf->datas[0].chunk->stride = v->frame_size; ++ buf->datas[0].chunk->size = n_bytes; ++ ++ /* queue the buffer for playback */ ++ pw_stream_queue_buffer(v->stream, b); ++} ++ ++/* output data processing function to generate stuffs in the buffer */ ++static void ++capture_on_process(void *data) ++{ ++ PWVoice *v = (PWVoice *) data; ++ void *p; ++ struct pw_buffer *b; ++ struct spa_buffer *buf; ++ int32_t filled; ++ uint32_t index, offs, n_bytes; ++ ++ assert(v->stream); ++ ++ /* obtain a buffer */ ++ b = pw_stream_dequeue_buffer(v->stream); ++ if (b == NULL) { ++ error_report("out of buffers: %s", strerror(errno)); ++ return; ++ } ++ ++ /* Write data into buffer */ ++ buf = b->buffer; ++ p = buf->datas[0].data; ++ if (p == NULL) { ++ return; ++ } ++ offs = SPA_MIN(buf->datas[0].chunk->offset, buf->datas[0].maxsize); ++ n_bytes = SPA_MIN(buf->datas[0].chunk->size, buf->datas[0].maxsize - offs); ++ ++ filled = spa_ringbuffer_get_write_index(&v->ring, &index); ++ ++ ++ if (filled < 0) { ++ error_report("%p: underrun write:%u filled:%d", p, index, filled); ++ } else { ++ if ((uint32_t) filled + n_bytes > RINGBUFFER_SIZE) { ++ error_report("%p: overrun write:%u filled:%d + size:%u > max:%u", ++ p, index, filled, n_bytes, RINGBUFFER_SIZE); ++ } ++ } ++ spa_ringbuffer_write_data(&v->ring, ++ v->buffer, RINGBUFFER_SIZE, ++ index & RINGBUFFER_MASK, ++ SPA_PTROFF(p, offs, void), n_bytes); ++ index += n_bytes; ++ spa_ringbuffer_write_update(&v->ring, index); ++ ++ /* queue the buffer for playback */ ++ pw_stream_queue_buffer(v->stream, b); ++} ++ ++static void ++on_stream_state_changed(void *data, enum pw_stream_state old, ++ enum pw_stream_state state, const char *error) ++{ ++ PWVoice *v = (PWVoice *) data; ++ ++ trace_pw_state_changed(pw_stream_get_node_id(v->stream), ++ pw_stream_state_as_string(state)); ++} ++ ++static const struct pw_stream_events capture_stream_events = { ++ PW_VERSION_STREAM_EVENTS, ++ .destroy = stream_destroy, ++ .state_changed = on_stream_state_changed, ++ .process = capture_on_process ++}; ++ ++static const struct pw_stream_events playback_stream_events = { ++ PW_VERSION_STREAM_EVENTS, ++ .destroy = stream_destroy, ++ .state_changed = on_stream_state_changed, ++ .process = playback_on_process ++}; ++ ++static size_t ++qpw_read(HWVoiceIn *hw, void *data, size_t len) ++{ ++ PWVoiceIn *pw = (PWVoiceIn *) hw; ++ PWVoice *v = &pw->v; ++ pwaudio *c = v->g; ++ const char *error = NULL; ++ size_t l; ++ int32_t avail; ++ uint32_t index; ++ ++ pw_thread_loop_lock(c->thread_loop); ++ if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) { ++ /* wait for stream to become ready */ ++ l = 0; ++ goto done_unlock; ++ } ++ /* get no of available bytes to read data from buffer */ ++ avail = spa_ringbuffer_get_read_index(&v->ring, &index); ++ ++ trace_pw_read(avail, index, len); ++ ++ if (avail < (int32_t) len) { ++ len = avail; ++ } ++ ++ spa_ringbuffer_read_data(&v->ring, ++ v->buffer, RINGBUFFER_SIZE, ++ index & RINGBUFFER_MASK, data, len); ++ index += len; ++ spa_ringbuffer_read_update(&v->ring, index); ++ l = len; ++ ++done_unlock: ++ pw_thread_loop_unlock(c->thread_loop); ++ return l; ++} ++ ++static size_t qpw_buffer_get_free(HWVoiceOut *hw) ++{ ++ PWVoiceOut *pw = (PWVoiceOut *)hw; ++ PWVoice *v = &pw->v; ++ pwaudio *c = v->g; ++ const char *error = NULL; ++ int32_t filled, avail; ++ uint32_t index; ++ ++ pw_thread_loop_lock(c->thread_loop); ++ if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) { ++ /* wait for stream to become ready */ ++ avail = 0; ++ goto done_unlock; ++ } ++ ++ filled = spa_ringbuffer_get_write_index(&v->ring, &index); ++ avail = v->highwater_mark - filled; ++ ++done_unlock: ++ pw_thread_loop_unlock(c->thread_loop); ++ return avail; ++} ++ ++static size_t ++qpw_write(HWVoiceOut *hw, void *data, size_t len) ++{ ++ PWVoiceOut *pw = (PWVoiceOut *) hw; ++ PWVoice *v = &pw->v; ++ pwaudio *c = v->g; ++ const char *error = NULL; ++ int32_t filled, avail; ++ uint32_t index; ++ ++ pw_thread_loop_lock(c->thread_loop); ++ if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) { ++ /* wait for stream to become ready */ ++ len = 0; ++ goto done_unlock; ++ } ++ filled = spa_ringbuffer_get_write_index(&v->ring, &index); ++ avail = v->highwater_mark - filled; ++ ++ trace_pw_write(filled, avail, index, len); ++ ++ if (len > avail) { ++ len = avail; ++ } ++ ++ if (filled < 0) { ++ error_report("%p: underrun write:%u filled:%d", pw, index, filled); ++ } else { ++ if ((uint32_t) filled + len > RINGBUFFER_SIZE) { ++ error_report("%p: overrun write:%u filled:%d + size:%zu > max:%u", ++ pw, index, filled, len, RINGBUFFER_SIZE); ++ } ++ } ++ ++ spa_ringbuffer_write_data(&v->ring, ++ v->buffer, RINGBUFFER_SIZE, ++ index & RINGBUFFER_MASK, data, len); ++ index += len; ++ spa_ringbuffer_write_update(&v->ring, index); ++ ++done_unlock: ++ pw_thread_loop_unlock(c->thread_loop); ++ return len; ++} ++ ++static int ++audfmt_to_pw(AudioFormat fmt, int endianness) ++{ ++ int format; ++ ++ switch (fmt) { ++ case AUDIO_FORMAT_S8: ++ format = SPA_AUDIO_FORMAT_S8; ++ break; ++ case AUDIO_FORMAT_U8: ++ format = SPA_AUDIO_FORMAT_U8; ++ break; ++ case AUDIO_FORMAT_S16: ++ format = endianness ? SPA_AUDIO_FORMAT_S16_BE : SPA_AUDIO_FORMAT_S16_LE; ++ break; ++ case AUDIO_FORMAT_U16: ++ format = endianness ? SPA_AUDIO_FORMAT_U16_BE : SPA_AUDIO_FORMAT_U16_LE; ++ break; ++ case AUDIO_FORMAT_S32: ++ format = endianness ? SPA_AUDIO_FORMAT_S32_BE : SPA_AUDIO_FORMAT_S32_LE; ++ break; ++ case AUDIO_FORMAT_U32: ++ format = endianness ? SPA_AUDIO_FORMAT_U32_BE : SPA_AUDIO_FORMAT_U32_LE; ++ break; ++ case AUDIO_FORMAT_F32: ++ format = endianness ? SPA_AUDIO_FORMAT_F32_BE : SPA_AUDIO_FORMAT_F32_LE; ++ break; ++ default: ++ dolog("Internal logic error: Bad audio format %d\n", fmt); ++ format = SPA_AUDIO_FORMAT_U8; ++ break; ++ } ++ return format; ++} ++ ++static AudioFormat ++pw_to_audfmt(enum spa_audio_format fmt, int *endianness, ++ uint32_t *sample_size) ++{ ++ switch (fmt) { ++ case SPA_AUDIO_FORMAT_S8: ++ *sample_size = 1; ++ return AUDIO_FORMAT_S8; ++ case SPA_AUDIO_FORMAT_U8: ++ *sample_size = 1; ++ return AUDIO_FORMAT_U8; ++ case SPA_AUDIO_FORMAT_S16_BE: ++ *sample_size = 2; ++ *endianness = 1; ++ return AUDIO_FORMAT_S16; ++ case SPA_AUDIO_FORMAT_S16_LE: ++ *sample_size = 2; ++ *endianness = 0; ++ return AUDIO_FORMAT_S16; ++ case SPA_AUDIO_FORMAT_U16_BE: ++ *sample_size = 2; ++ *endianness = 1; ++ return AUDIO_FORMAT_U16; ++ case SPA_AUDIO_FORMAT_U16_LE: ++ *sample_size = 2; ++ *endianness = 0; ++ return AUDIO_FORMAT_U16; ++ case SPA_AUDIO_FORMAT_S32_BE: ++ *sample_size = 4; ++ *endianness = 1; ++ return AUDIO_FORMAT_S32; ++ case SPA_AUDIO_FORMAT_S32_LE: ++ *sample_size = 4; ++ *endianness = 0; ++ return AUDIO_FORMAT_S32; ++ case SPA_AUDIO_FORMAT_U32_BE: ++ *sample_size = 4; ++ *endianness = 1; ++ return AUDIO_FORMAT_U32; ++ case SPA_AUDIO_FORMAT_U32_LE: ++ *sample_size = 4; ++ *endianness = 0; ++ return AUDIO_FORMAT_U32; ++ case SPA_AUDIO_FORMAT_F32_BE: ++ *sample_size = 4; ++ *endianness = 1; ++ return AUDIO_FORMAT_F32; ++ case SPA_AUDIO_FORMAT_F32_LE: ++ *sample_size = 4; ++ *endianness = 0; ++ return AUDIO_FORMAT_F32; ++ default: ++ *sample_size = 1; ++ dolog("Internal logic error: Bad spa_audio_format %d\n", fmt); ++ return AUDIO_FORMAT_U8; ++ } ++} ++ ++static int ++qpw_stream_new(pwaudio *c, PWVoice *v, const char *stream_name, ++ const char *name, enum spa_direction dir) ++{ ++ int res; ++ uint32_t n_params; ++ const struct spa_pod *params[2]; ++ uint8_t buffer[1024]; ++ struct spa_pod_builder b; ++ uint64_t buf_samples; ++ struct pw_properties *props; ++ ++ props = pw_properties_new(NULL, NULL); ++ if (!props) { ++ error_report("Failed to create PW properties: %s", g_strerror(errno)); ++ return -1; ++ } ++ ++ /* 75% of the timer period for faster updates */ ++ buf_samples = (uint64_t)v->g->dev->timer_period * v->info.rate ++ * 3 / 4 / 1000000; ++ pw_properties_setf(props, PW_KEY_NODE_LATENCY, "%" PRIu64 "/%u", ++ buf_samples, v->info.rate); ++ ++ trace_pw_period(buf_samples, v->info.rate); ++ if (name) { ++ pw_properties_set(props, PW_KEY_TARGET_OBJECT, name); ++ } ++ v->stream = pw_stream_new(c->core, stream_name, props); ++ if (v->stream == NULL) { ++ error_report("Failed to create PW stream: %s", g_strerror(errno)); ++ return -1; ++ } ++ ++ if (dir == SPA_DIRECTION_INPUT) { ++ pw_stream_add_listener(v->stream, ++ &v->stream_listener, &capture_stream_events, v); ++ } else { ++ pw_stream_add_listener(v->stream, ++ &v->stream_listener, &playback_stream_events, v); ++ } ++ ++ n_params = 0; ++ spa_pod_builder_init(&b, buffer, sizeof(buffer)); ++ params[n_params++] = spa_format_audio_raw_build(&b, ++ SPA_PARAM_EnumFormat, ++ &v->info); ++ ++ /* connect the stream to a sink or source */ ++ res = pw_stream_connect(v->stream, ++ dir == ++ SPA_DIRECTION_INPUT ? PW_DIRECTION_INPUT : ++ PW_DIRECTION_OUTPUT, PW_ID_ANY, ++ PW_STREAM_FLAG_AUTOCONNECT | ++ PW_STREAM_FLAG_INACTIVE | ++ PW_STREAM_FLAG_MAP_BUFFERS | ++ PW_STREAM_FLAG_RT_PROCESS, params, n_params); ++ if (res < 0) { ++ error_report("Failed to connect PW stream: %s", g_strerror(errno)); ++ pw_stream_destroy(v->stream); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static void ++qpw_set_position(uint32_t channels, uint32_t position[SPA_AUDIO_MAX_CHANNELS]) ++{ ++ memcpy(position, (uint32_t[SPA_AUDIO_MAX_CHANNELS]) { SPA_AUDIO_CHANNEL_UNKNOWN, }, ++ sizeof(uint32_t) * SPA_AUDIO_MAX_CHANNELS); ++ /* ++ * TODO: This currently expects the only frontend supporting more than 2 ++ * channels is the usb-audio. We will need some means to set channel ++ * order when a new frontend gains multi-channel support. ++ */ ++ switch (channels) { ++ case 8: ++ position[6] = SPA_AUDIO_CHANNEL_SL; ++ position[7] = SPA_AUDIO_CHANNEL_SR; ++ /* fallthrough */ ++ case 6: ++ position[2] = SPA_AUDIO_CHANNEL_FC; ++ position[3] = SPA_AUDIO_CHANNEL_LFE; ++ position[4] = SPA_AUDIO_CHANNEL_RL; ++ position[5] = SPA_AUDIO_CHANNEL_RR; ++ /* fallthrough */ ++ case 2: ++ position[0] = SPA_AUDIO_CHANNEL_FL; ++ position[1] = SPA_AUDIO_CHANNEL_FR; ++ break; ++ case 1: ++ position[0] = SPA_AUDIO_CHANNEL_MONO; ++ break; ++ default: ++ dolog("Internal error: unsupported channel count %d\n", channels); ++ } ++} ++ ++static int ++qpw_init_out(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque) ++{ ++ PWVoiceOut *pw = (PWVoiceOut *) hw; ++ PWVoice *v = &pw->v; ++ struct audsettings obt_as = *as; ++ pwaudio *c = v->g = drv_opaque; ++ AudiodevPipewireOptions *popts = &c->dev->u.pipewire; ++ AudiodevPipewirePerDirectionOptions *ppdo = popts->out; ++ int r; ++ ++ pw_thread_loop_lock(c->thread_loop); ++ ++ v->info.format = audfmt_to_pw(as->fmt, as->endianness); ++ v->info.channels = as->nchannels; ++ qpw_set_position(as->nchannels, v->info.position); ++ v->info.rate = as->freq; ++ ++ obt_as.fmt = ++ pw_to_audfmt(v->info.format, &obt_as.endianness, &v->frame_size); ++ v->frame_size *= as->nchannels; ++ ++ v->req = (uint64_t)c->dev->timer_period * v->info.rate ++ * 1 / 2 / 1000000 * v->frame_size; ++ ++ /* call the function that creates a new stream for playback */ ++ r = qpw_stream_new(c, v, ppdo->stream_name ? : c->dev->id, ++ ppdo->name, SPA_DIRECTION_OUTPUT); ++ if (r < 0) { ++ pw_thread_loop_unlock(c->thread_loop); ++ return -1; ++ } ++ ++ /* report the audio format we support */ ++ audio_pcm_init_info(&hw->info, &obt_as); ++ ++ /* report the buffer size to qemu */ ++ hw->samples = audio_buffer_frames( ++ qapi_AudiodevPipewirePerDirectionOptions_base(ppdo), &obt_as, 46440); ++ v->highwater_mark = MIN(RINGBUFFER_SIZE, ++ (ppdo->has_latency ? ppdo->latency : 46440) ++ * (uint64_t)v->info.rate / 1000000 * v->frame_size); ++ ++ pw_thread_loop_unlock(c->thread_loop); ++ return 0; ++} ++ ++static int ++qpw_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque) ++{ ++ PWVoiceIn *pw = (PWVoiceIn *) hw; ++ PWVoice *v = &pw->v; ++ struct audsettings obt_as = *as; ++ pwaudio *c = v->g = drv_opaque; ++ AudiodevPipewireOptions *popts = &c->dev->u.pipewire; ++ AudiodevPipewirePerDirectionOptions *ppdo = popts->in; ++ int r; ++ ++ pw_thread_loop_lock(c->thread_loop); ++ ++ v->info.format = audfmt_to_pw(as->fmt, as->endianness); ++ v->info.channels = as->nchannels; ++ qpw_set_position(as->nchannels, v->info.position); ++ v->info.rate = as->freq; ++ ++ obt_as.fmt = ++ pw_to_audfmt(v->info.format, &obt_as.endianness, &v->frame_size); ++ v->frame_size *= as->nchannels; ++ ++ /* call the function that creates a new stream for recording */ ++ r = qpw_stream_new(c, v, ppdo->stream_name ? : c->dev->id, ++ ppdo->name, SPA_DIRECTION_INPUT); ++ if (r < 0) { ++ pw_thread_loop_unlock(c->thread_loop); ++ return -1; ++ } ++ ++ /* report the audio format we support */ ++ audio_pcm_init_info(&hw->info, &obt_as); ++ ++ /* report the buffer size to qemu */ ++ hw->samples = audio_buffer_frames( ++ qapi_AudiodevPipewirePerDirectionOptions_base(ppdo), &obt_as, 46440); ++ ++ pw_thread_loop_unlock(c->thread_loop); ++ return 0; ++} ++ ++static void ++qpw_voice_fini(PWVoice *v) ++{ ++ pwaudio *c = v->g; ++ ++ if (!v->stream) { ++ return; ++ } ++ pw_thread_loop_lock(c->thread_loop); ++ pw_stream_destroy(v->stream); ++ v->stream = NULL; ++ pw_thread_loop_unlock(c->thread_loop); ++} ++ ++static void ++qpw_fini_out(HWVoiceOut *hw) ++{ ++ qpw_voice_fini(&PW_VOICE_OUT(hw)->v); ++} ++ ++static void ++qpw_fini_in(HWVoiceIn *hw) ++{ ++ qpw_voice_fini(&PW_VOICE_IN(hw)->v); ++} ++ ++static void ++qpw_voice_set_enabled(PWVoice *v, bool enable) ++{ ++ pwaudio *c = v->g; ++ pw_thread_loop_lock(c->thread_loop); ++ pw_stream_set_active(v->stream, enable); ++ pw_thread_loop_unlock(c->thread_loop); ++} ++ ++static void ++qpw_enable_out(HWVoiceOut *hw, bool enable) ++{ ++ qpw_voice_set_enabled(&PW_VOICE_OUT(hw)->v, enable); ++} ++ ++static void ++qpw_enable_in(HWVoiceIn *hw, bool enable) ++{ ++ qpw_voice_set_enabled(&PW_VOICE_IN(hw)->v, enable); ++} ++ ++static void ++qpw_voice_set_volume(PWVoice *v, Volume *vol) ++{ ++ pwaudio *c = v->g; ++ int i, ret; ++ ++ pw_thread_loop_lock(c->thread_loop); ++ v->volume.channels = vol->channels; ++ ++ for (i = 0; i < vol->channels; ++i) { ++ v->volume.values[i] = (float)vol->vol[i] / 255; ++ } ++ ++ ret = pw_stream_set_control(v->stream, ++ SPA_PROP_channelVolumes, v->volume.channels, v->volume.values, 0); ++ trace_pw_vol(ret == 0 ? "success" : "failed"); ++ ++ v->muted = vol->mute; ++ float val = v->muted ? 1.f : 0.f; ++ ret = pw_stream_set_control(v->stream, SPA_PROP_mute, 1, &val, 0); ++ pw_thread_loop_unlock(c->thread_loop); ++} ++ ++static void ++qpw_volume_out(HWVoiceOut *hw, Volume *vol) ++{ ++ qpw_voice_set_volume(&PW_VOICE_OUT(hw)->v, vol); ++} ++ ++static void ++qpw_volume_in(HWVoiceIn *hw, Volume *vol) ++{ ++ qpw_voice_set_volume(&PW_VOICE_IN(hw)->v, vol); ++} ++ ++static int wait_resync(pwaudio *pw) ++{ ++ int res; ++ pw->pending_seq = pw_core_sync(pw->core, PW_ID_CORE, pw->pending_seq); ++ ++ while (true) { ++ pw_thread_loop_wait(pw->thread_loop); ++ ++ res = pw->error; ++ if (res < 0) { ++ pw->error = 0; ++ return res; ++ } ++ if (pw->pending_seq == pw->last_seq) { ++ break; ++ } ++ } ++ return 0; ++} ++ ++static void ++on_core_error(void *data, uint32_t id, int seq, int res, const char *message) ++{ ++ pwaudio *pw = data; ++ ++ error_report("error id:%u seq:%d res:%d (%s): %s", ++ id, seq, res, spa_strerror(res), message); ++ ++ /* stop and exit the thread loop */ ++ pw_thread_loop_signal(pw->thread_loop, FALSE); ++} ++ ++static void ++on_core_done(void *data, uint32_t id, int seq) ++{ ++ pwaudio *pw = data; ++ assert(id == PW_ID_CORE); ++ pw->last_seq = seq; ++ if (pw->pending_seq == seq) { ++ /* stop and exit the thread loop */ ++ pw_thread_loop_signal(pw->thread_loop, FALSE); ++ } ++} ++ ++static const struct pw_core_events core_events = { ++ PW_VERSION_CORE_EVENTS, ++ .done = on_core_done, ++ .error = on_core_error, ++}; ++ ++static void * ++qpw_audio_init(Audiodev *dev, Error **errp) ++{ ++ g_autofree pwaudio *pw = g_new0(pwaudio, 1); ++ ++ assert(dev->driver == AUDIODEV_DRIVER_PIPEWIRE); ++ trace_pw_audio_init(); ++ ++ pw_init(NULL, NULL); ++ ++ pw->dev = dev; ++ pw->thread_loop = pw_thread_loop_new("PipeWire thread loop", NULL); ++ if (pw->thread_loop == NULL) { ++ error_setg_errno(errp, errno, "Could not create PipeWire loop"); ++ goto fail; ++ } ++ ++ pw->context = ++ pw_context_new(pw_thread_loop_get_loop(pw->thread_loop), NULL, 0); ++ if (pw->context == NULL) { ++ error_setg_errno(errp, errno, "Could not create PipeWire context"); ++ goto fail; ++ } ++ ++ if (pw_thread_loop_start(pw->thread_loop) < 0) { ++ error_setg_errno(errp, errno, "Could not start PipeWire loop"); ++ goto fail; ++ } ++ ++ pw_thread_loop_lock(pw->thread_loop); ++ ++ pw->core = pw_context_connect(pw->context, NULL, 0); ++ if (pw->core == NULL) { ++ pw_thread_loop_unlock(pw->thread_loop); ++ goto fail_error; ++ } ++ ++ if (pw_core_add_listener(pw->core, &pw->core_listener, ++ &core_events, pw) < 0) { ++ pw_thread_loop_unlock(pw->thread_loop); ++ goto fail_error; ++ } ++ if (wait_resync(pw) < 0) { ++ pw_thread_loop_unlock(pw->thread_loop); ++ } ++ ++ pw_thread_loop_unlock(pw->thread_loop); ++ ++ return g_steal_pointer(&pw); ++ ++fail_error: ++ error_setg(errp, "Failed to initialize PW context"); ++fail: ++ if (pw->thread_loop) { ++ pw_thread_loop_stop(pw->thread_loop); ++ } ++ g_clear_pointer(&pw->context, pw_context_destroy); ++ g_clear_pointer(&pw->thread_loop, pw_thread_loop_destroy); ++ return NULL; ++} ++ ++static void ++qpw_audio_fini(void *opaque) ++{ ++ pwaudio *pw = opaque; ++ ++ if (pw->thread_loop) { ++ pw_thread_loop_stop(pw->thread_loop); ++ } ++ ++ if (pw->core) { ++ spa_hook_remove(&pw->core_listener); ++ spa_zero(pw->core_listener); ++ pw_core_disconnect(pw->core); ++ } ++ ++ if (pw->context) { ++ pw_context_destroy(pw->context); ++ } ++ pw_thread_loop_destroy(pw->thread_loop); ++ ++ g_free(pw); ++} ++ ++static struct audio_pcm_ops qpw_pcm_ops = { ++ .init_out = qpw_init_out, ++ .fini_out = qpw_fini_out, ++ .write = qpw_write, ++ .buffer_get_free = qpw_buffer_get_free, ++ .run_buffer_out = audio_generic_run_buffer_out, ++ .enable_out = qpw_enable_out, ++ .volume_out = qpw_volume_out, ++ .volume_in = qpw_volume_in, ++ ++ .init_in = qpw_init_in, ++ .fini_in = qpw_fini_in, ++ .read = qpw_read, ++ .run_buffer_in = audio_generic_run_buffer_in, ++ .enable_in = qpw_enable_in ++}; ++ ++static struct audio_driver pw_audio_driver = { ++ .name = "pipewire", ++ .descr = "http://www.pipewire.org/", ++ .init = qpw_audio_init, ++ .fini = qpw_audio_fini, ++ .pcm_ops = &qpw_pcm_ops, ++ .max_voices_out = INT_MAX, ++ .max_voices_in = INT_MAX, ++ .voice_size_out = sizeof(PWVoiceOut), ++ .voice_size_in = sizeof(PWVoiceIn), ++}; ++ ++static void ++register_audio_pw(void) ++{ ++ audio_driver_register(&pw_audio_driver); ++} ++ ++type_init(register_audio_pw); +-- +2.41.0.windows.1 + diff --git a/backend-iommufd-Report-PASID-capability.patch b/backend-iommufd-Report-PASID-capability.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c38cf0602338aa3411e2423168552dbb43d494d --- /dev/null +++ b/backend-iommufd-Report-PASID-capability.patch @@ -0,0 +1,150 @@ +From 0978556247d968ffc83beff3b2611c93fd9b6b13 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Thu, 12 Sep 2024 00:17:31 -0700 +Subject: [PATCH] backend/iommufd: Report PASID capability + +Signed-off-by: Yi Liu +--- + backends/iommufd.c | 4 +++- + hw/arm/smmu-common.c | 4 ++-- + hw/arm/smmuv3.c | 4 +++- + hw/vfio/iommufd.c | 4 +++- + include/hw/arm/smmu-common.h | 2 +- + include/sysemu/host_iommu_device.h | 1 + + include/sysemu/iommufd.h | 3 ++- + 7 files changed, 15 insertions(+), 7 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index e9ce82297b..4f5df63331 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -326,7 +326,8 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, + + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, +- uint64_t *caps, Error **errp) ++ uint64_t *caps, uint8_t *max_pasid_log2, ++ Error **errp) + { + struct iommu_hw_info info = { + .size = sizeof(info), +@@ -344,6 +345,7 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + *type = info.out_data_type; + g_assert(caps); + *caps = info.out_capabilities; ++ *max_pasid_log2 = info.out_max_pasid_log2; + + return true; + } +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index c382fa16e5..e7028bd4ec 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -853,7 +853,7 @@ SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid) + + /* IOMMUFD helpers */ + int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type, +- uint32_t data_len, void *data) ++ uint32_t data_len, uint8_t *pasid, void *data) + { + uint64_t caps; + +@@ -863,7 +863,7 @@ int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type, + + return !iommufd_backend_get_device_info(sdev->idev->iommufd, + sdev->idev->devid, data_type, data, +- data_len, &caps, NULL); ++ data_len, &caps, pasid, NULL); + } + + void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort) +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 30c0ae4c3b..0ca0e96fcc 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -264,6 +264,7 @@ static void smmuv3_nested_init_regs(SMMUv3State *s) + SMMUDevice *sdev; + uint32_t data_type; + uint32_t val; ++ uint8_t pasid; + int ret; + + if (!bs->nested || !bs->viommu) { +@@ -280,7 +281,8 @@ static void smmuv3_nested_init_regs(SMMUv3State *s) + goto out; + } + +- ret = smmu_dev_get_info(sdev, &data_type, sizeof(sdev->info), &sdev->info); ++ ret = smmu_dev_get_info(sdev, &data_type, sizeof(sdev->info), &pasid, ++ &sdev->info); + if (ret) { + error_report("failed to get SMMU device info"); + return; +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index c0eb87c78c..a108beda29 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -871,18 +871,20 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + struct iommu_hw_info_vtd vtd; + } data; + uint64_t hw_caps; ++ uint8_t pasids; + + hiod->agent = opaque; + + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, + &type, &data, sizeof(data), +- &hw_caps, errp)) { ++ &hw_caps, &pasids, errp)) { + return false; + } + + hiod->name = g_strdup(vdev->name); + caps->type = type; + caps->hw_caps = hw_caps; ++ caps->max_pasid_log2 = pasids; + + return true; + } +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 087a11efc7..8ae33c3753 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -276,7 +276,7 @@ void smmu_inv_notifiers_all(SMMUState *s); + + /* IOMMUFD helpers */ + int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type, +- uint32_t data_len, void *data); ++ uint32_t data_len, uint8_t *pasid, void *data); + void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort); + int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type, + uint32_t data_len, void *data, +diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h +index 84131f5495..22c76a37a7 100644 +--- a/include/sysemu/host_iommu_device.h ++++ b/include/sysemu/host_iommu_device.h +@@ -26,6 +26,7 @@ + typedef struct HostIOMMUDeviceCaps { + uint32_t type; + uint64_t hw_caps; ++ uint8_t max_pasid_log2; + } HostIOMMUDeviceCaps; + + #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index b279184974..29afaa429d 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -57,7 +57,8 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size); + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, +- uint64_t *caps, Error **errp); ++ uint64_t *caps, uint8_t *max_pasid_log2, ++ Error **errp); + bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + uint32_t pt_id, uint32_t flags, + uint32_t data_type, uint32_t data_len, +-- +2.41.0.windows.1 + diff --git a/backends-Introduce-HostIOMMUDevice-abstract.patch b/backends-Introduce-HostIOMMUDevice-abstract.patch new file mode 100644 index 0000000000000000000000000000000000000000..42a92c8d751528b5e117eac781e7ca17f58e2280 --- /dev/null +++ b/backends-Introduce-HostIOMMUDevice-abstract.patch @@ -0,0 +1,162 @@ +From 626698a1e9edff6a1032f496858555e1a4614fbe Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:27 +0800 +Subject: [PATCH] backends: Introduce HostIOMMUDevice abstract +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +A HostIOMMUDevice is an abstraction for an assigned device that is protected +by a physical IOMMU (aka host IOMMU). The userspace interaction with this +physical IOMMU can be done either through the VFIO IOMMU type 1 legacy +backend or the new iommufd backend. The assigned device can be a VFIO device +or a VDPA device. The HostIOMMUDevice is needed to interact with the host +IOMMU that protects the assigned device. It is especially useful when the +device is also protected by a virtual IOMMU as this latter use the translation +services of the physical IOMMU and is constrained by it. In that context the +HostIOMMUDevice can be passed to the virtual IOMMU to collect physical IOMMU +capabilities such as the supported address width. In the future, the virtual +IOMMU will use the HostIOMMUDevice to program the guest page tables in the +first translation stage of the physical IOMMU. + +Introduce .realize() to initialize HostIOMMUDevice further after instance init. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + MAINTAINERS | 2 ++ + backends/host_iommu_device.c | 33 +++++++++++++++++++ + backends/meson.build | 1 + + include/sysemu/host_iommu_device.h | 53 ++++++++++++++++++++++++++++++ + 4 files changed, 89 insertions(+) + create mode 100644 backends/host_iommu_device.c + create mode 100644 include/sysemu/host_iommu_device.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 0ddb20a35f..ada87bfa9e 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan + S: Supported + F: backends/iommufd.c + F: include/sysemu/iommufd.h ++F: backends/host_iommu_device.c ++F: include/sysemu/host_iommu_device.h + F: include/qemu/chardev_open.h + F: util/chardev_open.c + F: docs/devel/vfio-iommufd.rst +diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c +new file mode 100644 +index 0000000000..8f2dda1beb +--- /dev/null ++++ b/backends/host_iommu_device.c +@@ -0,0 +1,33 @@ ++/* ++ * Host IOMMU device abstract ++ * ++ * Copyright (C) 2024 Intel Corporation. ++ * ++ * Authors: Zhenzhong Duan ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "sysemu/host_iommu_device.h" ++ ++OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice, ++ host_iommu_device, ++ HOST_IOMMU_DEVICE, ++ OBJECT) ++ ++static void host_iommu_device_class_init(ObjectClass *oc, void *data) ++{ ++} ++ ++static void host_iommu_device_init(Object *obj) ++{ ++} ++ ++static void host_iommu_device_finalize(Object *obj) ++{ ++ HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj); ++ ++ g_free(hiod->name); ++} +diff --git a/backends/meson.build b/backends/meson.build +index 9a5cea480d..68b5e34e04 100644 +--- a/backends/meson.build ++++ b/backends/meson.build +@@ -13,6 +13,7 @@ system_ss.add([files( + system_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c')) + system_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c')) + system_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c')) ++system_ss.add(when: 'CONFIG_LINUX', if_true: files('host_iommu_device.c')) + if keyutils.found() + system_ss.add(keyutils, files('cryptodev-lkcf.c')) + endif +diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h +new file mode 100644 +index 0000000000..db47a16189 +--- /dev/null ++++ b/include/sysemu/host_iommu_device.h +@@ -0,0 +1,53 @@ ++/* ++ * Host IOMMU device abstract declaration ++ * ++ * Copyright (C) 2024 Intel Corporation. ++ * ++ * Authors: Zhenzhong Duan ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#ifndef HOST_IOMMU_DEVICE_H ++#define HOST_IOMMU_DEVICE_H ++ ++#include "qom/object.h" ++#include "qapi/error.h" ++ ++#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" ++OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) ++ ++struct HostIOMMUDevice { ++ Object parent_obj; ++ ++ char *name; ++}; ++ ++/** ++ * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices. ++ * ++ * Different types of host devices (e.g., VFIO or VDPA device) or devices ++ * with different backend (e.g., VFIO legacy container or IOMMUFD backend) ++ * will have different implementations of the HostIOMMUDeviceClass. ++ */ ++struct HostIOMMUDeviceClass { ++ ObjectClass parent_class; ++ ++ /** ++ * @realize: initialize host IOMMU device instance further. ++ * ++ * Mandatory callback. ++ * ++ * @hiod: pointer to a host IOMMU device instance. ++ * ++ * @opaque: pointer to agent device of this host IOMMU device, ++ * e.g., VFIO base device or VDPA device. ++ * ++ * @errp: pass an Error out when realize fails. ++ * ++ * Returns: true on success, false on failure. ++ */ ++ bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); ++}; ++#endif +-- +2.41.0.windows.1 + diff --git a/backends-VirtCCA-cvm_gpa_start-supports-both-1GB-and.patch b/backends-VirtCCA-cvm_gpa_start-supports-both-1GB-and.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7066d9acd899b053190e4e5447e7d8dd3e09e09 --- /dev/null +++ b/backends-VirtCCA-cvm_gpa_start-supports-both-1GB-and.patch @@ -0,0 +1,113 @@ +From bc08940ad3c75da49e05c596f79e9e0164573709 Mon Sep 17 00:00:00 2001 +From: gongchangsui +Date: Mon, 17 Mar 2025 02:56:40 -0400 +Subject: [PATCH] backends: VirtCCA: cvm_gpa_start supports both 1GB and 3GB + +For TMM versions 2.1 and above, `cvm_gpa_start` is 1GB, while for +versions prior to 2.1, `cvm_gpa_start` is 3GB. Shared huge page memory +supports both `cvm_gpa_start` values. + +Signed-off-by: gongchangsui +--- + backends/hostmem-file.c | 17 ++++++++++++++--- + hw/arm/virt.c | 1 + + hw/core/numa.c | 2 +- + include/exec/memory.h | 11 +++++++---- + 4 files changed, 23 insertions(+), 8 deletions(-) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index 891fe4ac4a..ce63a372a3 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -27,6 +27,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE) + + bool virtcca_shared_hugepage_mapped = false; + uint64_t virtcca_cvm_ram_size = 0; ++uint64_t virtcca_cvm_gpa_start = 0; + + struct HostMemoryBackendFile { + HostMemoryBackend parent_obj; +@@ -101,8 +102,16 @@ virtcca_shared_backend_memory_alloc(char *mem_path, uint32_t ram_flags, Error ** + error_report("parse virtcca share memory path failed"); + exit(1); + } +- if (virtcca_cvm_ram_size >= VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE) { +- size = VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE; ++ ++ /* ++ * 1) CVM_GPA_START = 3GB --> fix size = 1GB ++ * 2) CVM_GPA_START = 1GB && ram_size >= 3GB --> size = 3GB ++ * 3) CVM_GPA_START = 1GB && ram_size < 3GB --> size = ram_size ++ */ ++ if (virtcca_cvm_gpa_start != DEFAULT_VM_GPA_START) { ++ size = VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT - virtcca_cvm_gpa_start; ++ } else if (virtcca_cvm_ram_size >= VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT - DEFAULT_VM_GPA_START) { ++ size = VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT - DEFAULT_VM_GPA_START; + } + + virtcca_shared_hugepage = g_new(MemoryRegion, 1); +@@ -172,7 +181,9 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + fb->mem_path, fb->offset, errp); + g_free(name); + +- if (virtcca_cvm_enabled() && backend->share && !virtcca_shared_hugepage_mapped) { ++ if (virtcca_cvm_enabled() && backend->share && ++ (strcmp(fb->mem_path, "/dev/shm") != 0) && ++ !virtcca_shared_hugepage_mapped) { + virtcca_shared_backend_memory_alloc(fb->mem_path, ram_flags, errp); + } + #endif +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6c5611826c..3c31d3667e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2063,6 +2063,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + if (tmi_version < MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM) { + vms->memmap[VIRT_MEM].base = 3 * GiB; + } ++ virtcca_cvm_gpa_start = vms->memmap[VIRT_MEM].base; + vms->memmap[VIRT_MEM].size = ms->ram_size; + info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base), + (unsigned long long)(vms->memmap[VIRT_MEM].base + ms->ram_size)); +diff --git a/hw/core/numa.c b/hw/core/numa.c +index c691578ef5..98d896e687 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -655,7 +655,7 @@ static void virtcca_shared_memory_configuration(MachineState *ms) + memory_region_init_alias(alias_mr, NULL, "alias-mr", virtcca_shared_hugepage, + 0, int128_get64(virtcca_shared_hugepage->size)); + memory_region_add_subregion(address_space_virtcca_shared_memory.root, +- VIRTCCA_GPA_START, alias_mr); ++ virtcca_cvm_gpa_start, alias_mr); + } + + void numa_complete_configuration(MachineState *ms) +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 33778f5c64..c14dc69d27 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -243,14 +243,17 @@ typedef struct IOMMUTLBEvent { + /* RAM FD is opened read-only */ + #define RAM_READONLY_FD (1 << 11) + +-/* The GPA range of the VirtCCA bounce buffer is from 1GB to 4GB. */ +-#define VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE 0xc0000000ULL ++/* The address limit of the VirtCCA bounce buffer is 4GB. */ ++#define VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT 0x100000000ULL + + /* The VirtCCA shared hugepage memory granularity is 1GB */ + #define VIRTCCA_SHARED_HUGEPAGE_ALIGN 0x40000000ULL + +-/* The GPA starting address of the VirtCCA CVM is 1GB */ +-#define VIRTCCA_GPA_START 0x40000000ULL ++/* The default GPA starting address of VM is 1GB */ ++#define DEFAULT_VM_GPA_START 0x40000000ULL ++ ++/* The GPA starting address of the VirtCCA CVM is 1GB or 3GB */ ++extern uint64_t virtcca_cvm_gpa_start; + + extern uint64_t virtcca_cvm_ram_size; + +-- +2.41.0.windows.1 + diff --git a/backends-cryptodev-Do-not-abort-for-invalid-session-.patch b/backends-cryptodev-Do-not-abort-for-invalid-session-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d81e171ccdaeb81c5e7be84e8106b4bbb2d8d21 --- /dev/null +++ b/backends-cryptodev-Do-not-abort-for-invalid-session-.patch @@ -0,0 +1,71 @@ +From 29080940b37ce7486a46ab5534383321319fe2c5 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 22 Mar 2025 15:10:32 +0800 +Subject: [PATCH] backends/cryptodev: Do not abort for invalid session ID +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from eaf2bd29538d039df80bb4b1584de33a61312bc6 + +Instead of aborting when a session ID is invalid, +return VIRTIO_CRYPTO_INVSESS ("Invalid session id"). + +Reproduced using: + + $ cat << EOF | qemu-system-i386 -display none \ + -machine q35,accel=qtest -m 512M -nodefaults \ + -object cryptodev-backend-builtin,id=cryptodev0 \ + -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ + -qtest stdio + outl 0xcf8 0x80000804 + outw 0xcfc 0x06 + outl 0xcf8 0x80000820 + outl 0xcfc 0xe0008000 + write 0x10800e 0x1 0x01 + write 0xe0008016 0x1 0x01 + write 0xe0008020 0x4 0x00801000 + write 0xe0008028 0x4 0x00c01000 + write 0xe000801c 0x1 0x01 + write 0x110000 0x1 0x05 + write 0x110001 0x1 0x04 + write 0x108002 0x1 0x11 + write 0x108008 0x1 0x48 + write 0x10800c 0x1 0x01 + write 0x108018 0x1 0x10 + write 0x10801c 0x1 0x02 + write 0x10c002 0x1 0x01 + write 0xe000b005 0x1 0x00 + EOF + Assertion failed: (session_id < MAX_NUM_SESSIONS && builtin->sessions[session_id]), + function cryptodev_builtin_close_session, file cryptodev-builtin.c, line 430. + +Cc: qemu-stable@nongnu.org +Reported-by: Zheyu Ma +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2274 +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: zhenwei pi +Message-Id: <20240409094757.9127-1-philmd@linaro.org> +Signed-off-by: gubin +--- + backends/cryptodev-builtin.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c +index 0822f198d9..940104ee55 100644 +--- a/backends/cryptodev-builtin.c ++++ b/backends/cryptodev-builtin.c +@@ -428,7 +428,9 @@ static int cryptodev_builtin_close_session( + CRYPTODEV_BACKEND_BUILTIN(backend); + CryptoDevBackendBuiltinSession *session; + +- assert(session_id < MAX_NUM_SESSIONS && builtin->sessions[session_id]); ++ if (session_id >= MAX_NUM_SESSIONS || !builtin->sessions[session_id]) { ++ return -VIRTIO_CRYPTO_INVSESS; ++ } + + session = builtin->sessions[session_id]; + if (session->cipher) { +-- +2.41.0.windows.1 + diff --git a/backends-cryptodev-Do-not-ignore-throttle-backends-E.patch b/backends-cryptodev-Do-not-ignore-throttle-backends-E.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d7a4d16eb1910a8fe9dd01359168db8275c7771 --- /dev/null +++ b/backends-cryptodev-Do-not-ignore-throttle-backends-E.patch @@ -0,0 +1,65 @@ +From 690812903469db798ebae012248b9231d5ce9f11 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 22 Mar 2025 15:15:08 +0800 +Subject: [PATCH] backends/cryptodev: Do not ignore throttle/backends Errors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 484aecf2d3a75251b63481be2a0c3aef635002af + +Both cryptodev_backend_set_throttle() and CryptoDevBackendClass::init() +can set their Error** argument. Do not ignore them, return early +on failure. Without that, running into another failure trips +error_setv()'s assertion. Use the ERRP_GUARD() macro as suggested +in commit ae7c80a7bd ("error: New macro ERRP_GUARD()"). + +Cc: qemu-stable@nongnu.org +Fixes: e7a775fd9f ("cryptodev: Account statistics") +Fixes: 2580b452ff ("cryptodev: support QoS") +Reviewed-by: zhenwei pi +Reviewed-by: Gonglei +Reviewed-by: Markus Armbruster +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20231120150418.93443-1-philmd@linaro.org> +Signed-off-by: gubin +--- + backends/cryptodev.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/backends/cryptodev.c b/backends/cryptodev.c +index e5006bd215..fff89fd62a 100644 +--- a/backends/cryptodev.c ++++ b/backends/cryptodev.c +@@ -398,6 +398,7 @@ static void cryptodev_backend_set_ops(Object *obj, Visitor *v, + static void + cryptodev_backend_complete(UserCreatable *uc, Error **errp) + { ++ ERRP_GUARD(); + CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc); + CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc); + uint32_t services; +@@ -406,11 +407,20 @@ cryptodev_backend_complete(UserCreatable *uc, Error **errp) + QTAILQ_INIT(&backend->opinfos); + value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg; + cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp); ++ if (*errp) { ++ return; ++ } + value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg; + cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp); ++ if (*errp) { ++ return; ++ } + + if (bc->init) { + bc->init(backend, errp); ++ if (*errp) { ++ return; ++ } + } + + services = backend->conf.crypto_services; +-- +2.41.0.windows.1 + diff --git a/backends-cryptodev-builtin-Fix-local_error-leaks.patch b/backends-cryptodev-builtin-Fix-local_error-leaks.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2ecf63d461a7e0591bd6f30a8eec5da5f9d12e3 --- /dev/null +++ b/backends-cryptodev-builtin-Fix-local_error-leaks.patch @@ -0,0 +1,63 @@ +From 2781f5673cc43d13b73e66fb266e7ea0b945429d Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 20:55:38 +0800 +Subject: [PATCH] backends/cryptodev-builtin: Fix local_error leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry picked from commit 06479dbf3d7d245572c4b3016e5a1d923ff04d66 + +backends/cryptodev-builtin: Fix local_error leaks +It seems that this error does not need to be propagated to the upper, +directly output the error to avoid the leaks + +Closes: https://gitlab.com/qemu-project/qemu/-/issues/2283 +Fixes: 2fda101 ("virtio-crypto: Support asynchronous mode") +Signed-off-by: Li Zhijian +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: zhenwei pi +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +Signed-off-by: Gao Jiazhen +--- + backends/cryptodev-builtin.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c +index 39d0455280..0822f198d9 100644 +--- a/backends/cryptodev-builtin.c ++++ b/backends/cryptodev-builtin.c +@@ -23,6 +23,7 @@ + + #include "qemu/osdep.h" + #include "sysemu/cryptodev.h" ++#include "qemu/error-report.h" + #include "qapi/error.h" + #include "standard-headers/linux/virtio_crypto.h" + #include "crypto/cipher.h" +@@ -396,8 +397,8 @@ static int cryptodev_builtin_create_session( + case VIRTIO_CRYPTO_HASH_CREATE_SESSION: + case VIRTIO_CRYPTO_MAC_CREATE_SESSION: + default: +- error_setg(&local_error, "Unsupported opcode :%" PRIu32 "", +- sess_info->op_code); ++ error_report("Unsupported opcode :%" PRIu32 "", ++ sess_info->op_code); + return -VIRTIO_CRYPTO_NOTSUPP; + } + +@@ -552,8 +553,8 @@ static int cryptodev_builtin_operation( + + if (op_info->session_id >= MAX_NUM_SESSIONS || + builtin->sessions[op_info->session_id] == NULL) { +- error_setg(&local_error, "Cannot find a valid session id: %" PRIu64 "", +- op_info->session_id); ++ error_report("Cannot find a valid session id: %" PRIu64 "", ++ op_info->session_id); + return -VIRTIO_CRYPTO_INVSESS; + } + +-- +2.41.0.windows.1 + diff --git a/backends-cryptodev-vhost-user-Fix-local_error-leaks.patch b/backends-cryptodev-vhost-user-Fix-local_error-leaks.patch new file mode 100644 index 0000000000000000000000000000000000000000..c5c1a4c74a9e4cd70ef334afac8135b781ed179a --- /dev/null +++ b/backends-cryptodev-vhost-user-Fix-local_error-leaks.patch @@ -0,0 +1,41 @@ +From c5a859ec02af99574dfac2e5cfab9570345eb2e4 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Wed, 5 Feb 2025 08:04:10 -0500 +Subject: [PATCH] backends/cryptodev-vhost-user: Fix local_error leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 78b0c15a563ac4be5afb0375602ca0a3adc6c442 + +Do not propagate error to the upper, directly output the error +to avoid leaks. + +Fixes: 2fda101de07 ("virtio-crypto: Support asynchronous mode") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2714 +Signed-off-by: Gabriel Barrantes +Reviewed-by: zhenwei pi +Message-Id: +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + backends/cryptodev-vhost-user.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c +index c3283ba84a..b8e95ca8b4 100644 +--- a/backends/cryptodev-vhost-user.c ++++ b/backends/cryptodev-vhost-user.c +@@ -281,8 +281,7 @@ static int cryptodev_vhost_user_create_session( + break; + + default: +- error_setg(&local_error, "Unsupported opcode :%" PRIu32 "", +- sess_info->op_code); ++ error_report("Unsupported opcode :%" PRIu32 "", sess_info->op_code); + return -VIRTIO_CRYPTO_NOTSUPP; + } + +-- +2.41.0.windows.1 + diff --git a/backends-host_iommu_device-Introduce-HostIOMMUDevice.patch b/backends-host_iommu_device-Introduce-HostIOMMUDevice.patch new file mode 100644 index 0000000000000000000000000000000000000000..8bdfb79d0590b798699f8f2df7298b449e019c11 --- /dev/null +++ b/backends-host_iommu_device-Introduce-HostIOMMUDevice.patch @@ -0,0 +1,91 @@ +From ca210a4a8fe97dd56baa184671bb48bff9a54ecb Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:28 +0800 +Subject: [PATCH] backends/host_iommu_device: Introduce HostIOMMUDeviceCaps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +HostIOMMUDeviceCaps's elements map to the host IOMMU's capabilities. +Different platform IOMMU can support different elements. + +Currently only two elements, type and aw_bits, type hints the host +platform IOMMU type, i.e., INTEL vtd, ARM smmu, etc; aw_bits hints +host IOMMU address width. + +Introduce .get_cap() handler to check if HOST_IOMMU_DEVICE_CAP_XXX +is supported. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + include/sysemu/host_iommu_device.h | 38 ++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h +index db47a16189..a57873958b 100644 +--- a/include/sysemu/host_iommu_device.h ++++ b/include/sysemu/host_iommu_device.h +@@ -15,6 +15,18 @@ + #include "qom/object.h" + #include "qapi/error.h" + ++/** ++ * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. ++ * ++ * @type: host platform IOMMU type. ++ * ++ * @aw_bits: host IOMMU address width. 0xff if no limitation. ++ */ ++typedef struct HostIOMMUDeviceCaps { ++ uint32_t type; ++ uint8_t aw_bits; ++} HostIOMMUDeviceCaps; ++ + #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" + OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) + +@@ -22,6 +34,7 @@ struct HostIOMMUDevice { + Object parent_obj; + + char *name; ++ HostIOMMUDeviceCaps caps; + }; + + /** +@@ -49,5 +62,30 @@ struct HostIOMMUDeviceClass { + * Returns: true on success, false on failure. + */ + bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); ++ /** ++ * @get_cap: check if a host IOMMU device capability is supported. ++ * ++ * Optional callback, if not implemented, hint not supporting query ++ * of @cap. ++ * ++ * @hiod: pointer to a host IOMMU device instance. ++ * ++ * @cap: capability to check. ++ * ++ * @errp: pass an Error out when fails to query capability. ++ * ++ * Returns: <0 on failure, 0 if a @cap is unsupported, or else ++ * 1 or some positive value for some special @cap, ++ * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS. ++ */ ++ int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp); + }; ++ ++/* ++ * Host IOMMU device capability list. ++ */ ++#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE 0 ++#define HOST_IOMMU_DEVICE_CAP_AW_BITS 1 ++ ++#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX 64 + #endif +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Add-helpers-for-invalidating-user-m.patch b/backends-iommufd-Add-helpers-for-invalidating-user-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..56eeca4cd3429c597ab835c2adca49f218ee0a4b --- /dev/null +++ b/backends-iommufd-Add-helpers-for-invalidating-user-m.patch @@ -0,0 +1,81 @@ +From cedca4d3635cde049151b5818df2cb66c2b1531f Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Fri, 3 Nov 2023 16:54:01 +0800 +Subject: [PATCH] backends/iommufd: Add helpers for invalidating user-managed + HWPT + +Signed-off-by: Nicolin Chen +Signed-off-by: Zhenzhong Duan +--- + backends/iommufd.c | 30 ++++++++++++++++++++++++++++++ + backends/trace-events | 1 + + include/sysemu/iommufd.h | 3 +++ + 3 files changed, 34 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index c1260766f0..cf24370385 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -330,6 +330,36 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + return true; + } + ++int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id, ++ uint32_t data_type, uint32_t entry_len, ++ uint32_t *entry_num, void *data_ptr) ++{ ++ int ret, fd = be->fd; ++ struct iommu_hwpt_invalidate cache = { ++ .size = sizeof(cache), ++ .hwpt_id = hwpt_id, ++ .data_type = data_type, ++ .entry_len = entry_len, ++ .entry_num = *entry_num, ++ .data_uptr = (uintptr_t)data_ptr, ++ }; ++ ++ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache); ++ ++ trace_iommufd_backend_invalidate_cache(fd, hwpt_id, data_type, entry_len, ++ *entry_num, cache.entry_num, ++ (uintptr_t)data_ptr, ret); ++ if (ret) { ++ *entry_num = cache.entry_num; ++ error_report("IOMMU_HWPT_INVALIDATE failed: %s", strerror(errno)); ++ ret = -errno; ++ } else { ++ g_assert(*entry_num == cache.entry_num); ++ } ++ ++ return ret; ++} ++ + static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) + { + HostIOMMUDeviceCaps *caps = &hiod->caps; +diff --git a/backends/trace-events b/backends/trace-events +index b02433710a..ef0ff98921 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_ + iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" + iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" + iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" ++iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 3b28c8a81c..f6596f6338 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -63,6 +63,9 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, + uint64_t iova, ram_addr_t size, + uint64_t page_size, uint64_t *data, + Error **errp); ++int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id, ++ uint32_t data_type, uint32_t entry_len, ++ uint32_t *entry_num, void *data_ptr); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + #endif +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Extend-iommufd_backend_get_device_i.patch b/backends-iommufd-Extend-iommufd_backend_get_device_i.patch new file mode 100644 index 0000000000000000000000000000000000000000..37e9041fdefa4d29f9d862bdbd293000ffa740e0 --- /dev/null +++ b/backends-iommufd-Extend-iommufd_backend_get_device_i.patch @@ -0,0 +1,78 @@ +From 7d53d0938921d0faa32e1fef4c7bcc45d21f9bfb Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Fri, 19 Jul 2024 13:04:51 +0100 +Subject: [PATCH] backends/iommufd: Extend iommufd_backend_get_device_info() to + fetch HW capabilities +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The helper will be able to fetch vendor agnostic IOMMU capabilities +supported both by hardware and software. Right now it is only iommu dirty +tracking. + +Signed-off-by: Joao Martins +Reviewed-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +--- + backends/iommufd.c | 4 +++- + hw/vfio/iommufd.c | 4 +++- + include/sysemu/iommufd.h | 2 +- + 3 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 7e805bd664..1ce2a24226 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -225,7 +225,7 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, +- Error **errp) ++ uint64_t *caps, Error **errp) + { + struct iommu_hw_info info = { + .size = sizeof(info), +@@ -241,6 +241,8 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + + g_assert(type); + *type = info.out_data_type; ++ g_assert(caps); ++ *caps = info.out_capabilities; + + return true; + } +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 7cbf0e44f1..d5b923ca83 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -647,9 +647,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + union { + struct iommu_hw_info_vtd vtd; + } data; ++ uint64_t hw_caps; + + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, +- &type, &data, sizeof(data), errp)) { ++ &type, &data, sizeof(data), ++ &hw_caps, errp)) { + return false; + } + +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index dfade18e6d..a0a0143856 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -51,7 +51,7 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size); + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, +- Error **errp); ++ uint64_t *caps, Error **errp); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + #endif +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch b/backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch new file mode 100644 index 0000000000000000000000000000000000000000..8685d1200499ea5152bc661f1c842ed0df8efc6c --- /dev/null +++ b/backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch @@ -0,0 +1,59 @@ +From 88006385c8e58b2aa612bf5aa184263f0d4245de Mon Sep 17 00:00:00 2001 +From: Zhao Liu +Date: Mon, 11 Mar 2024 11:37:55 +0800 +Subject: [PATCH] backends/iommufd: Fix missing ERRP_GUARD() for + error_prepend() + +As the comment in qapi/error, passing @errp to error_prepend() requires +ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +... +* - It should not be passed to error_prepend(), error_vprepend() or +* error_append_hint(), because that doesn't work with &error_fatal. +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. + +ERRP_GUARD() could avoid the case when @errp is &error_fatal, the user +can't see this additional information, because exit() happens in +error_setg earlier than information is added [1]. + +The iommufd_backend_set_fd() passes @errp to error_prepend(), to avoid +the above issue, add missing ERRP_GUARD() at the beginning of this +function. + +[1]: Issue description in the commit message of commit ae7c80a7bd73 + ("error: New macro ERRP_GUARD()"). + +Cc: Yi Liu +Cc: Eric Auger +Cc: Zhenzhong Duan +Signed-off-by: Zhao Liu +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Message-ID: <20240311033822.3142585-3-zhao1.liu@linux.intel.com> +Signed-off-by: Thomas Huth +--- + backends/iommufd.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 3cbf11fc8b..f061b6869a 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -44,6 +44,7 @@ static void iommufd_backend_finalize(Object *obj) + + static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) + { ++ ERRP_GUARD(); + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + int fd = -1; + +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Get-rid-of-qemu_open_old.patch b/backends-iommufd-Get-rid-of-qemu_open_old.patch new file mode 100644 index 0000000000000000000000000000000000000000..184d44ddd2edf4e28d5b6df53375e913f3b53155 --- /dev/null +++ b/backends-iommufd-Get-rid-of-qemu_open_old.patch @@ -0,0 +1,45 @@ +From 959b91b9b45b3ec649c6de0e268a4dcd603ce8af Mon Sep 17 00:00:00 2001 +From: Zhao Liu +Date: Mon, 15 Jul 2024 16:21:54 +0800 +Subject: [PATCH] backends/iommufd: Get rid of qemu_open_old() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For qemu_open_old(), osdep.h said: + +> Don't introduce new usage of this function, prefer the following +> qemu_open/qemu_create that take an "Error **errp". + +So replace qemu_open_old() with qemu_open(). + +Cc: Yi Liu +Cc: Eric Auger +Cc: Zhenzhong Duan +Signed-off-by: Zhao Liu +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Yi Liu +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +--- + backends/iommufd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index fad580fdcb..62df6e41f0 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -79,9 +79,8 @@ bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + int fd; + + if (be->owned && !be->users) { +- fd = qemu_open_old("/dev/iommu", O_RDWR); ++ fd = qemu_open("/dev/iommu", O_RDWR, errp); + if (fd < 0) { +- error_setg_errno(errp, errno, "/dev/iommu opening failed"); + return false; + } + be->fd = fd; +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Implement-HostIOMMUDeviceClass-get_.patch b/backends-iommufd-Implement-HostIOMMUDeviceClass-get_.patch new file mode 100644 index 0000000000000000000000000000000000000000..e92fe42b532e8a5ced8892799b70ab64a34b4619 --- /dev/null +++ b/backends-iommufd-Implement-HostIOMMUDeviceClass-get_.patch @@ -0,0 +1,61 @@ +From 2f1a2f4b320e70a85cef8392cd5f4b1e54afb9c9 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:36 +0800 +Subject: [PATCH] backends/iommufd: Implement HostIOMMUDeviceClass::get_cap() + handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + backends/iommufd.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 604a8f4e7d..7e805bd664 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -245,6 +245,28 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + return true; + } + ++static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) ++{ ++ HostIOMMUDeviceCaps *caps = &hiod->caps; ++ ++ switch (cap) { ++ case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE: ++ return caps->type; ++ case HOST_IOMMU_DEVICE_CAP_AW_BITS: ++ return caps->aw_bits; ++ default: ++ error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); ++ return -EINVAL; ++ } ++} ++ ++static void hiod_iommufd_class_init(ObjectClass *oc, void *data) ++{ ++ HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); ++ ++ hioc->get_cap = hiod_iommufd_get_cap; ++}; ++ + static const TypeInfo types[] = { + { + .name = TYPE_IOMMUFD_BACKEND, +@@ -261,6 +283,7 @@ static const TypeInfo types[] = { + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .parent = TYPE_HOST_IOMMU_DEVICE, ++ .class_init = hiod_iommufd_class_init, + .abstract = true, + } + }; +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Introduce-TYPE_HOST_IOMMU_DEVICE_IO.patch b/backends-iommufd-Introduce-TYPE_HOST_IOMMU_DEVICE_IO.patch new file mode 100644 index 0000000000000000000000000000000000000000..13e26c512f929e1b6e285bb2799e742c811548cc --- /dev/null +++ b/backends-iommufd-Introduce-TYPE_HOST_IOMMU_DEVICE_IO.patch @@ -0,0 +1,158 @@ +From 50142057ec070a70f3f38ec272ec61cc3ae6e071 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:30 +0800 +Subject: [PATCH] backends/iommufd: Introduce + TYPE_HOST_IOMMU_DEVICE_IOMMUFD[_VFIO] devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +TYPE_HOST_IOMMU_DEVICE_IOMMUFD represents a host IOMMU device under +iommufd backend. It is abstract, because it is going to be derived +into VFIO or VDPA type'd device. + +It will have its own .get_cap() implementation. + +TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO is a sub-class of +TYPE_HOST_IOMMU_DEVICE_IOMMUFD, represents a VFIO type'd host IOMMU +device under iommufd backend. It will be created during VFIO device +attaching and passed to vIOMMU. + +It will have its own .realize() implementation. + +Opportunistically, add missed header to include/sysemu/iommufd.h. + +Suggested-by: Cédric Le Goater +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + backends/iommufd.c | 36 +++++++++++++++++------------------ + hw/vfio/iommufd.c | 5 ++++- + include/hw/vfio/vfio-common.h | 3 +++ + include/sysemu/iommufd.h | 16 ++++++++++++++++ + 4 files changed, 41 insertions(+), 19 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index ba58a0eb0d..a2b7f5c3c4 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -223,23 +223,23 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + return ret; + } + +-static const TypeInfo iommufd_backend_info = { +- .name = TYPE_IOMMUFD_BACKEND, +- .parent = TYPE_OBJECT, +- .instance_size = sizeof(IOMMUFDBackend), +- .instance_init = iommufd_backend_init, +- .instance_finalize = iommufd_backend_finalize, +- .class_size = sizeof(IOMMUFDBackendClass), +- .class_init = iommufd_backend_class_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_IOMMUFD_BACKEND, ++ .parent = TYPE_OBJECT, ++ .instance_size = sizeof(IOMMUFDBackend), ++ .instance_init = iommufd_backend_init, ++ .instance_finalize = iommufd_backend_finalize, ++ .class_size = sizeof(IOMMUFDBackendClass), ++ .class_init = iommufd_backend_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++ }, { ++ .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, ++ .parent = TYPE_HOST_IOMMU_DEVICE, ++ .abstract = true, + } + }; +- +-static void register_types(void) +-{ +- type_register_static(&iommufd_backend_info); +-} +- +-type_init(register_types); ++DEFINE_TYPES(types) +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index d4c586e842..7a4b818830 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -641,7 +641,10 @@ static const TypeInfo types[] = { + .name = TYPE_VFIO_IOMMU_IOMMUFD, + .parent = TYPE_VFIO_IOMMU, + .class_init = vfio_iommu_iommufd_class_init, +- }, ++ }, { ++ .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, ++ .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, ++ } + }; + + DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 0c807c2806..2cfc8521cd 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -32,6 +32,7 @@ + #include "sysemu/sysemu.h" + #include "hw/vfio/vfio-container-base.h" + #include "sysemu/host_iommu_device.h" ++#include "sysemu/iommufd.h" + + #define VFIO_MSG_PREFIX "vfio %s: " + +@@ -77,6 +78,8 @@ typedef struct VFIOMigration { + struct VFIOGroup; + + #define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" ++#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ ++ TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" + + typedef struct VFIODMARange { + QLIST_ENTRY(VFIODMARange) next; +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 9c5524b0ed..1a75e82f42 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -1,3 +1,16 @@ ++/* ++ * iommufd container backend declaration ++ * ++ * Copyright (C) 2024 Intel Corporation. ++ * Copyright Red Hat, Inc. 2024 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * Zhenzhong Duan ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ + #ifndef SYSEMU_IOMMUFD_H + #define SYSEMU_IOMMUFD_H + +@@ -5,6 +18,7 @@ + #include "qemu/thread.h" + #include "exec/hwaddr.h" + #include "exec/cpu-common.h" ++#include "sysemu/host_iommu_device.h" + + #define TYPE_IOMMUFD_BACKEND "iommufd" + OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) +@@ -35,4 +49,6 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); + int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size); ++ ++#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + #endif +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Introduce-helper-function-iommufd_b.patch b/backends-iommufd-Introduce-helper-function-iommufd_b.patch new file mode 100644 index 0000000000000000000000000000000000000000..b45a86888b7bc857ea056547446deedf4949ca3a --- /dev/null +++ b/backends-iommufd-Introduce-helper-function-iommufd_b.patch @@ -0,0 +1,69 @@ +From ccd8baf4648e6fd6b69e65ee249609904edc92e1 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:33 +0800 +Subject: [PATCH] backends/iommufd: Introduce helper function + iommufd_backend_get_device_info() + +Introduce a helper function iommufd_backend_get_device_info() to get +host IOMMU related information through iommufd uAPI. + +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + backends/iommufd.c | 22 ++++++++++++++++++++++ + include/sysemu/iommufd.h | 3 +++ + 2 files changed, 25 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index a2b7f5c3c4..604a8f4e7d 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -223,6 +223,28 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + return ret; + } + ++bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, ++ uint32_t *type, void *data, uint32_t len, ++ Error **errp) ++{ ++ struct iommu_hw_info info = { ++ .size = sizeof(info), ++ .dev_id = devid, ++ .data_len = len, ++ .data_uptr = (uintptr_t)data, ++ }; ++ ++ if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) { ++ error_setg_errno(errp, errno, "Failed to get hardware info"); ++ return false; ++ } ++ ++ g_assert(type); ++ *type = info.out_data_type; ++ ++ return true; ++} ++ + static const TypeInfo types[] = { + { + .name = TYPE_IOMMUFD_BACKEND, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 1a75e82f42..dfade18e6d 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -49,6 +49,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); + int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size); ++bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, ++ uint32_t *type, void *data, uint32_t len, ++ Error **errp); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + #endif +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Introduce-iommufd_backend_alloc_vio.patch b/backends-iommufd-Introduce-iommufd_backend_alloc_vio.patch new file mode 100644 index 0000000000000000000000000000000000000000..be79447e026308cef5a70b976d4c2a41b6f9c18f --- /dev/null +++ b/backends-iommufd-Introduce-iommufd_backend_alloc_vio.patch @@ -0,0 +1,100 @@ +From 207259b8f08e87b4a741a8b7884e699c95641a2e Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Sat, 13 Apr 2024 00:15:17 +0000 +Subject: [PATCH] backends/iommufd: Introduce iommufd_backend_alloc_viommu + +Add a helper to allocate a viommu object. + +Signed-off-by: Nicolin Chen +--- + backends/iommufd.c | 35 +++++++++++++++++++++++++++++++++++ + backends/trace-events | 1 + + include/sysemu/iommufd.h | 10 ++++++++++ + 3 files changed, 46 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index c10aa9b011..82368a3918 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -360,6 +360,41 @@ int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id, + return ret; + } + ++struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be, ++ uint32_t dev_id, ++ uint32_t viommu_type, ++ uint32_t hwpt_id) ++{ ++ int ret, fd = be->fd; ++ struct IOMMUFDViommu *viommu = g_malloc(sizeof(*viommu)); ++ struct iommu_viommu_alloc alloc_viommu = { ++ .size = sizeof(alloc_viommu), ++ .type = viommu_type, ++ .dev_id = dev_id, ++ .hwpt_id = hwpt_id, ++ }; ++ ++ if (!viommu) { ++ error_report("failed to allocate viommu object"); ++ return NULL; ++ } ++ ++ ret = ioctl(fd, IOMMU_VIOMMU_ALLOC, &alloc_viommu); ++ ++ trace_iommufd_backend_alloc_viommu(fd, viommu_type, dev_id, hwpt_id, ++ alloc_viommu.out_viommu_id, ret); ++ if (ret) { ++ error_report("IOMMU_VIOMMU_ALLOC failed: %s", strerror(errno)); ++ g_free(viommu); ++ return NULL; ++ } ++ ++ viommu->viommu_id = alloc_viommu.out_viommu_id; ++ viommu->s2_hwpt_id = hwpt_id; ++ viommu->iommufd = be; ++ return viommu; ++} ++ + bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) + { +diff --git a/backends/trace-events b/backends/trace-events +index ef0ff98921..c24cd378df 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -19,3 +19,4 @@ iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (% + iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" + iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" + iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)" ++iommufd_backend_alloc_viommu(int iommufd, uint32_t type, uint32_t dev_id, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 3dc6934144..05a08c49c2 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -39,6 +39,12 @@ struct IOMMUFDBackend { + /*< public >*/ + }; + ++typedef struct IOMMUFDViommu { ++ IOMMUFDBackend *iommufd; ++ uint32_t s2_hwpt_id; ++ uint32_t viommu_id; ++} IOMMUFDViommu; ++ + int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); + void iommufd_backend_disconnect(IOMMUFDBackend *be); + +@@ -66,6 +72,10 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, + int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id, + uint32_t data_type, uint32_t entry_len, + uint32_t *entry_num, void *data_ptr); ++struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be, ++ uint32_t dev_id, ++ uint32_t viommu_type, ++ uint32_t hwpt_id); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Introduce-iommufd_vdev_alloc.patch b/backends-iommufd-Introduce-iommufd_vdev_alloc.patch new file mode 100644 index 0000000000000000000000000000000000000000..91f8ea357fc14ec71698eeab793267366c3ac15a --- /dev/null +++ b/backends-iommufd-Introduce-iommufd_vdev_alloc.patch @@ -0,0 +1,89 @@ +From 005b8f4b6cef11982abcc2c071cbe40b69fb22e7 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Sat, 13 Apr 2024 00:21:22 +0000 +Subject: [PATCH] backends/iommufd: Introduce iommufd_vdev_alloc + +Add a helper to allocate an iommufd device's virtual device (in the user +space) per a viommu instance. + +Signed-off-by: Nicolin Chen +--- + backends/iommufd.c | 31 +++++++++++++++++++++++++++++++ + backends/trace-events | 1 + + include/sysemu/iommufd.h | 11 +++++++++++ + 3 files changed, 43 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 82368a3918..af3376d0bf 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -395,6 +395,37 @@ struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be, + return viommu; + } + ++struct IOMMUFDVdev *iommufd_backend_alloc_vdev(HostIOMMUDeviceIOMMUFD *idev, ++ IOMMUFDViommu *viommu, ++ uint64_t virt_id) ++{ ++ int ret, fd = viommu->iommufd->fd; ++ struct IOMMUFDVdev *vdev = g_malloc(sizeof(*vdev)); ++ struct iommu_vdevice_alloc alloc_vdev = { ++ .size = sizeof(alloc_vdev), ++ .viommu_id = viommu->viommu_id, ++ .dev_id = idev->devid, ++ .virt_id = virt_id, ++ }; ++ ++ ret = ioctl(fd, IOMMU_VDEVICE_ALLOC, &alloc_vdev); ++ ++ trace_iommufd_backend_alloc_vdev(fd, idev->devid, viommu->viommu_id, virt_id, ++ alloc_vdev.out_vdevice_id, ret); ++ ++ if (ret) { ++ error_report("IOMMU_VDEVICE_ALLOC failed: %s", strerror(errno)); ++ g_free(vdev); ++ return NULL; ++ } ++ ++ vdev->idev = idev; ++ vdev->viommu = viommu; ++ vdev->virt_id = virt_id; ++ vdev->vdev_id = alloc_vdev.out_vdevice_id; ++ return vdev; ++} ++ + bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) + { +diff --git a/backends/trace-events b/backends/trace-events +index c24cd378df..e150a37e9a 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -20,3 +20,4 @@ iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " + iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" + iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)" + iommufd_backend_alloc_viommu(int iommufd, uint32_t type, uint32_t dev_id, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)" ++iommufd_backend_alloc_vdev(int iommufd, uint32_t dev_id, uint32_t viommu_id, uint64_t virt_id, uint32_t vdev_id, int ret) " iommufd=%d dev_id=%u viommu_id=%u virt_id=0x%"PRIx64" vdev_id=%u (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 05a08c49c2..0284e95460 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -128,4 +128,15 @@ bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp); + bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp); ++ ++typedef struct IOMMUFDVdev { ++ HostIOMMUDeviceIOMMUFD *idev; ++ IOMMUFDViommu *viommu; ++ uint32_t vdev_id; ++ uint64_t virt_id; ++} IOMMUFDVdev; ++ ++struct IOMMUFDVdev *iommufd_backend_alloc_vdev(HostIOMMUDeviceIOMMUFD *idev, ++ IOMMUFDViommu *viommu, ++ uint64_t virt_id); + #endif +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Introduce-iommufd_viommu_invalidate.patch b/backends-iommufd-Introduce-iommufd_viommu_invalidate.patch new file mode 100644 index 0000000000000000000000000000000000000000..a835fb06dc5c9aa31876bb399112bb9b676eb583 --- /dev/null +++ b/backends-iommufd-Introduce-iommufd_viommu_invalidate.patch @@ -0,0 +1,84 @@ +From 2be28f75e4ed2a0a35549dd1a545e0655e63973d Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Fri, 12 Apr 2024 23:27:54 +0000 +Subject: [PATCH] backends/iommufd: Introduce iommufd_viommu_invalidate_cache + +Similar to iommufd_backend_invalidate_cache for iotlb invalidation via +IOMMU_HWPT_INVALIDATE ioctl, add a new helper for viommu specific cache +invalidation via IOMMU_VIOMMU_INVALIDATE ioctl. + +Signed-off-by: Nicolin Chen +--- + backends/iommufd.c | 31 +++++++++++++++++++++++++++++++ + backends/trace-events | 1 + + include/sysemu/iommufd.h | 3 +++ + 3 files changed, 35 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index af3376d0bf..ee6f5bcf65 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -426,6 +426,37 @@ struct IOMMUFDVdev *iommufd_backend_alloc_vdev(HostIOMMUDeviceIOMMUFD *idev, + return vdev; + } + ++int iommufd_viommu_invalidate_cache(IOMMUFDBackend *be, uint32_t viommu_id, ++ uint32_t data_type, uint32_t entry_len, ++ uint32_t *entry_num, void *data_ptr) ++{ ++ int ret, fd = be->fd; ++ struct iommu_hwpt_invalidate cache = { ++ .size = sizeof(cache), ++ .hwpt_id = viommu_id, ++ .data_type = data_type, ++ .entry_len = entry_len, ++ .entry_num = *entry_num, ++ .data_uptr = (uint64_t)data_ptr, ++ }; ++ ++ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache); ++ ++ trace_iommufd_viommu_invalidate_cache(fd, viommu_id, data_type, ++ entry_len, *entry_num, ++ cache.entry_num, ++ (uint64_t)data_ptr, ret); ++ if (ret) { ++ *entry_num = cache.entry_num; ++ error_report("IOMMU_VIOMMU_INVALIDATE failed: %s", strerror(errno)); ++ ret = -errno; ++ } else { ++ g_assert(*entry_num == cache.entry_num); ++ } ++ ++ return ret; ++} ++ + bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) + { +diff --git a/backends/trace-events b/backends/trace-events +index e150a37e9a..f8592a2711 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -21,3 +21,4 @@ iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, u + iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)" + iommufd_backend_alloc_viommu(int iommufd, uint32_t type, uint32_t dev_id, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)" + iommufd_backend_alloc_vdev(int iommufd, uint32_t dev_id, uint32_t viommu_id, uint64_t virt_id, uint32_t vdev_id, int ret) " iommufd=%d dev_id=%u viommu_id=%u virt_id=0x%"PRIx64" vdev_id=%u (%d)" ++iommufd_viommu_invalidate_cache(int iommufd, uint32_t viommu_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d viommu_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 0284e95460..0f2c826036 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -76,6 +76,9 @@ struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be, + uint32_t dev_id, + uint32_t viommu_type, + uint32_t hwpt_id); ++int iommufd_viommu_invalidate_cache(IOMMUFDBackend *be, uint32_t viommu_id, ++ uint32_t data_type, uint32_t entry_len, ++ uint32_t *entry_num, void *data_ptr); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Introduce-the-iommufd-object.patch b/backends-iommufd-Introduce-the-iommufd-object.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6a2a10f5d3ddd84e6f767eb4763983015e7dbae --- /dev/null +++ b/backends-iommufd-Introduce-the-iommufd-object.patch @@ -0,0 +1,468 @@ +From 6cb41a55992571dd215fee86ed910bb4d6688bf8 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:37 +0800 +Subject: [PATCH] backends/iommufd: Introduce the iommufd object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce an iommufd object which allows the interaction +with the host /dev/iommu device. + +The /dev/iommu can have been already pre-opened outside of qemu, +in which case the fd can be passed directly along with the +iommufd object: + +This allows the iommufd object to be shared accross several +subsystems (VFIO, VDPA, ...). For example, libvirt would open +the /dev/iommu once. + +If no fd is passed along with the iommufd object, the /dev/iommu +is opened by the qemu code. + +Suggested-by: Alex Williamson +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + MAINTAINERS | 8 ++ + backends/Kconfig | 4 + + backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++ + backends/meson.build | 1 + + backends/trace-events | 10 ++ + include/sysemu/iommufd.h | 38 ++++++ + qapi/qom.json | 19 +++ + qemu-options.hx | 12 ++ + 8 files changed, 337 insertions(+) + create mode 100644 backends/iommufd.c + create mode 100644 include/sysemu/iommufd.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 695e0bd34f..a5a446914a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c + F: docs/system/s390x/vfio-ap.rst + L: qemu-s390x@nongnu.org + ++iommufd ++M: Yi Liu ++M: Eric Auger ++M: Zhenzhong Duan ++S: Supported ++F: backends/iommufd.c ++F: include/sysemu/iommufd.h ++ + vhost + M: Michael S. Tsirkin + S: Supported +diff --git a/backends/Kconfig b/backends/Kconfig +index f35abc1609..2cb23f62fa 100644 +--- a/backends/Kconfig ++++ b/backends/Kconfig +@@ -1 +1,5 @@ + source tpm/Kconfig ++ ++config IOMMUFD ++ bool ++ depends on VFIO +diff --git a/backends/iommufd.c b/backends/iommufd.c +new file mode 100644 +index 0000000000..ba58a0eb0d +--- /dev/null ++++ b/backends/iommufd.c +@@ -0,0 +1,245 @@ ++/* ++ * iommufd container backend ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "sysemu/iommufd.h" ++#include "qapi/error.h" ++#include "qapi/qmp/qerror.h" ++#include "qemu/module.h" ++#include "qom/object_interfaces.h" ++#include "qemu/error-report.h" ++#include "monitor/monitor.h" ++#include "trace.h" ++#include ++#include ++ ++static void iommufd_backend_init(Object *obj) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ ++ be->fd = -1; ++ be->users = 0; ++ be->owned = true; ++ qemu_mutex_init(&be->lock); ++} ++ ++static void iommufd_backend_finalize(Object *obj) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ ++ if (be->owned) { ++ close(be->fd); ++ be->fd = -1; ++ } ++} ++ ++static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ int fd = -1; ++ ++ fd = monitor_fd_param(monitor_cur(), str, errp); ++ if (fd == -1) { ++ error_prepend(errp, "Could not parse remote object fd %s:", str); ++ return; ++ } ++ qemu_mutex_lock(&be->lock); ++ be->fd = fd; ++ be->owned = false; ++ qemu_mutex_unlock(&be->lock); ++ trace_iommu_backend_set_fd(be->fd); ++} ++ ++static bool iommufd_backend_can_be_deleted(UserCreatable *uc) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); ++ ++ return !be->users; ++} ++ ++static void iommufd_backend_class_init(ObjectClass *oc, void *data) ++{ ++ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); ++ ++ ucc->can_be_deleted = iommufd_backend_can_be_deleted; ++ ++ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); ++} ++ ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) ++{ ++ int fd, ret = 0; ++ ++ qemu_mutex_lock(&be->lock); ++ if (be->users == UINT32_MAX) { ++ error_setg(errp, "too many connections"); ++ ret = -E2BIG; ++ goto out; ++ } ++ if (be->owned && !be->users) { ++ fd = qemu_open_old("/dev/iommu", O_RDWR); ++ if (fd < 0) { ++ error_setg_errno(errp, errno, "/dev/iommu opening failed"); ++ ret = fd; ++ goto out; ++ } ++ be->fd = fd; ++ } ++ be->users++; ++out: ++ trace_iommufd_backend_connect(be->fd, be->owned, ++ be->users, ret); ++ qemu_mutex_unlock(&be->lock); ++ return ret; ++} ++ ++void iommufd_backend_disconnect(IOMMUFDBackend *be) ++{ ++ qemu_mutex_lock(&be->lock); ++ if (!be->users) { ++ goto out; ++ } ++ be->users--; ++ if (!be->users && be->owned) { ++ close(be->fd); ++ be->fd = -1; ++ } ++out: ++ trace_iommufd_backend_disconnect(be->fd, be->users); ++ qemu_mutex_unlock(&be->lock); ++} ++ ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_alloc alloc_data = { ++ .size = sizeof(alloc_data), ++ .flags = 0, ++ }; ++ ++ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); ++ if (ret) { ++ error_setg_errno(errp, errno, "Failed to allocate ioas"); ++ return ret; ++ } ++ ++ *ioas_id = alloc_data.out_ioas_id; ++ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); ++ ++ return ret; ++} ++ ++void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) ++{ ++ int ret, fd = be->fd; ++ struct iommu_destroy des = { ++ .size = sizeof(des), ++ .id = id, ++ }; ++ ++ ret = ioctl(fd, IOMMU_DESTROY, &des); ++ trace_iommufd_backend_free_id(fd, id, ret); ++ if (ret) { ++ error_report("Failed to free id: %u %m", id); ++ } ++} ++ ++int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_map map = { ++ .size = sizeof(map), ++ .flags = IOMMU_IOAS_MAP_READABLE | ++ IOMMU_IOAS_MAP_FIXED_IOVA, ++ .ioas_id = ioas_id, ++ .__reserved = 0, ++ .user_va = (uintptr_t)vaddr, ++ .iova = iova, ++ .length = size, ++ }; ++ ++ if (!readonly) { ++ map.flags |= IOMMU_IOAS_MAP_WRITEABLE; ++ } ++ ++ ret = ioctl(fd, IOMMU_IOAS_MAP, &map); ++ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, ++ vaddr, readonly, ret); ++ if (ret) { ++ ret = -errno; ++ ++ /* TODO: Not support mapping hardware PCI BAR region for now. */ ++ if (errno == EFAULT) { ++ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); ++ } else { ++ error_report("IOMMU_IOAS_MAP failed: %m"); ++ } ++ } ++ return ret; ++} ++ ++int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, ++ hwaddr iova, ram_addr_t size) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_unmap unmap = { ++ .size = sizeof(unmap), ++ .ioas_id = ioas_id, ++ .iova = iova, ++ .length = size, ++ }; ++ ++ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); ++ /* ++ * IOMMUFD takes mapping as some kind of object, unmapping ++ * nonexistent mapping is treated as deleting a nonexistent ++ * object and return ENOENT. This is different from legacy ++ * backend which allows it. vIOMMU may trigger a lot of ++ * redundant unmapping, to avoid flush the log, treat them ++ * as succeess for IOMMUFD just like legacy backend. ++ */ ++ if (ret && errno == ENOENT) { ++ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); ++ ret = 0; ++ } else { ++ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); ++ } ++ ++ if (ret) { ++ ret = -errno; ++ error_report("IOMMU_IOAS_UNMAP failed: %m"); ++ } ++ return ret; ++} ++ ++static const TypeInfo iommufd_backend_info = { ++ .name = TYPE_IOMMUFD_BACKEND, ++ .parent = TYPE_OBJECT, ++ .instance_size = sizeof(IOMMUFDBackend), ++ .instance_init = iommufd_backend_init, ++ .instance_finalize = iommufd_backend_finalize, ++ .class_size = sizeof(IOMMUFDBackendClass), ++ .class_init = iommufd_backend_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static void register_types(void) ++{ ++ type_register_static(&iommufd_backend_info); ++} ++ ++type_init(register_types); +diff --git a/backends/meson.build b/backends/meson.build +index 914c7c4afb..9a5cea480d 100644 +--- a/backends/meson.build ++++ b/backends/meson.build +@@ -20,6 +20,7 @@ if have_vhost_user + system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) + endif + system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) ++system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) + if have_vhost_user_crypto + system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) + endif +diff --git a/backends/trace-events b/backends/trace-events +index 652eb76a57..d45c6e31a6 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void) + dbus_vmstate_post_load(int version_id) "version_id: %d" + dbus_vmstate_loading(const char *id) "id: %s" + dbus_vmstate_saving(const char *id) "id: %s" ++ ++# iommufd.c ++iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" ++iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" ++iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" ++iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" ++iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" ++iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" ++iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" ++iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +new file mode 100644 +index 0000000000..9c5524b0ed +--- /dev/null ++++ b/include/sysemu/iommufd.h +@@ -0,0 +1,38 @@ ++#ifndef SYSEMU_IOMMUFD_H ++#define SYSEMU_IOMMUFD_H ++ ++#include "qom/object.h" ++#include "qemu/thread.h" ++#include "exec/hwaddr.h" ++#include "exec/cpu-common.h" ++ ++#define TYPE_IOMMUFD_BACKEND "iommufd" ++OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) ++ ++struct IOMMUFDBackendClass { ++ ObjectClass parent_class; ++}; ++ ++struct IOMMUFDBackend { ++ Object parent; ++ ++ /*< protected >*/ ++ int fd; /* /dev/iommu file descriptor */ ++ bool owned; /* is the /dev/iommu opened internally */ ++ QemuMutex lock; ++ uint32_t users; ++ ++ /*< public >*/ ++}; ++ ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); ++void iommufd_backend_disconnect(IOMMUFDBackend *be); ++ ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp); ++void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); ++int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly); ++int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, ++ hwaddr iova, ram_addr_t size); ++#endif +diff --git a/qapi/qom.json b/qapi/qom.json +index a74c7a91f9..a5336e6b11 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -794,6 +794,23 @@ + { 'struct': 'VfioUserServerProperties', + 'data': { 'socket': 'SocketAddress', 'device': 'str' } } + ++## ++# @IOMMUFDProperties: ++# ++# Properties for iommufd objects. ++# ++# @fd: file descriptor name previously passed via 'getfd' command, ++# which represents a pre-opened /dev/iommu. This allows the ++# iommufd object to be shared accross several subsystems ++# (VFIO, VDPA, ...), and the file descriptor to be shared ++# with other process, e.g. DPDK. (default: QEMU opens ++# /dev/iommu by itself) ++# ++# Since: 9.0 ++## ++{ 'struct': 'IOMMUFDProperties', ++ 'data': { '*fd': 'str' } } ++ + ## + # @RngProperties: + # +@@ -969,6 +986,7 @@ + 'input-barrier', + { 'name': 'input-linux', + 'if': 'CONFIG_LINUX' }, ++ 'iommufd', + 'iothread', + 'main-loop', + { 'name': 'memory-backend-epc', +@@ -1039,6 +1057,7 @@ + 'input-barrier': 'InputBarrierProperties', + 'input-linux': { 'type': 'InputLinuxProperties', + 'if': 'CONFIG_LINUX' }, ++ 'iommufd': 'IOMMUFDProperties', + 'iothread': 'IothreadProperties', + 'main-loop': 'MainLoopProperties', + 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', +diff --git a/qemu-options.hx b/qemu-options.hx +index 8516b73206..7fe76c4b1d 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5224,6 +5224,18 @@ SRST + + The ``share`` boolean option is on by default with memfd. + ++ ``-object iommufd,id=id[,fd=fd]`` ++ Creates an iommufd backend which allows control of DMA mapping ++ through the ``/dev/iommu`` device. ++ ++ The ``id`` parameter is a unique ID which frontends (such as ++ vfio-pci of vdpa) will use to connect with the iommufd backend. ++ ++ The ``fd`` parameter is an optional pre-opened file descriptor ++ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared ++ across all subsystems, bringing the benefit of centralized ++ reference counting. ++ + ``-object rng-builtin,id=id`` + Creates a random number generator backend which obtains entropy + from QEMU builtin functions. The ``id`` parameter is a unique ID +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Make-iommufd_backend_-return-bool.patch b/backends-iommufd-Make-iommufd_backend_-return-bool.patch new file mode 100644 index 0000000000000000000000000000000000000000..134f5c15f35c886ca23eb3b5ab393f6330a2c92e --- /dev/null +++ b/backends-iommufd-Make-iommufd_backend_-return-bool.patch @@ -0,0 +1,140 @@ +From c9a107b1f73bddb4c9844c12444e3802e5f576b4 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 7 May 2024 14:42:52 +0800 +Subject: [PATCH] backends/iommufd: Make iommufd_backend_*() return bool +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is to follow the coding standand to return bool if 'Error **' +is used to pass error. + +The changed functions include: + +iommufd_backend_connect +iommufd_backend_alloc_ioas + +By this chance, simplify the functions a bit by avoiding duplicate +recordings, e.g., log through either error interface or trace, not +both. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +--- + backends/iommufd.c | 29 +++++++++++++---------------- + backends/trace-events | 4 ++-- + include/sysemu/iommufd.h | 6 +++--- + 3 files changed, 18 insertions(+), 21 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index f061b6869a..fad580fdcb 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -74,24 +74,22 @@ static void iommufd_backend_class_init(ObjectClass *oc, void *data) + object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); + } + +-int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) ++bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + { +- int fd, ret = 0; ++ int fd; + + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "/dev/iommu opening failed"); +- ret = fd; +- goto out; ++ return false; + } + be->fd = fd; + } + be->users++; +-out: +- trace_iommufd_backend_connect(be->fd, be->owned, +- be->users, ret); +- return ret; ++ ++ trace_iommufd_backend_connect(be->fd, be->owned, be->users); ++ return true; + } + + void iommufd_backend_disconnect(IOMMUFDBackend *be) +@@ -108,25 +106,24 @@ out: + trace_iommufd_backend_disconnect(be->fd, be->users); + } + +-int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +- Error **errp) ++bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp) + { +- int ret, fd = be->fd; ++ int fd = be->fd; + struct iommu_ioas_alloc alloc_data = { + .size = sizeof(alloc_data), + .flags = 0, + }; + +- ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); +- if (ret) { ++ if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) { + error_setg_errno(errp, errno, "Failed to allocate ioas"); +- return ret; ++ return false; + } + + *ioas_id = alloc_data.out_ioas_id; +- trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); ++ trace_iommufd_backend_alloc_ioas(fd, *ioas_id); + +- return ret; ++ return true; + } + + void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) +diff --git a/backends/trace-events b/backends/trace-events +index f8592a2711..8fe77149b2 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -7,13 +7,13 @@ dbus_vmstate_loading(const char *id) "id: %s" + dbus_vmstate_saving(const char *id) "id: %s" + + # iommufd.c +-iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" ++iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d" + iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" + iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" + iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" + iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" + iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" +-iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" ++iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d" + iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)" + iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" + iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 908c94d811..0531a4ad98 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -43,11 +43,11 @@ typedef struct IOMMUFDViommu { + uint32_t viommu_id; + } IOMMUFDViommu; + +-int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); ++bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); + void iommufd_backend_disconnect(IOMMUFDBackend *be); + +-int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +- Error **errp); ++bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp); + void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); + int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Remove-check-on-number-of-backend-u.patch b/backends-iommufd-Remove-check-on-number-of-backend-u.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ec96e2aa1d7b51a5dc9646732e9add93f61c2a3 --- /dev/null +++ b/backends-iommufd-Remove-check-on-number-of-backend-u.patch @@ -0,0 +1,37 @@ +From e2bc395c5db34111faf2adcecdb385e5a4e8d23d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 22 Dec 2023 08:55:23 +0100 +Subject: [PATCH] backends/iommufd: Remove check on number of backend users +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +QOM already has a ref count on objects and it will assert much +earlier, when INT_MAX is reached. + +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +--- + backends/iommufd.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 4f5df63331..f17a846aab 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -81,11 +81,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + int fd, ret = 0; + + qemu_mutex_lock(&be->lock); +- if (be->users == UINT32_MAX) { +- error_setg(errp, "too many connections"); +- ret = -E2BIG; +- goto out; +- } + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +-- +2.41.0.windows.1 + diff --git a/backends-iommufd-Remove-mutex.patch b/backends-iommufd-Remove-mutex.patch new file mode 100644 index 0000000000000000000000000000000000000000..db4217e8e10b12d4471c208bf9226ebe8bc9244e --- /dev/null +++ b/backends-iommufd-Remove-mutex.patch @@ -0,0 +1,103 @@ +From 1e6734af14b3223a7d7e304262c96051ddf8637f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 21 Dec 2023 16:58:41 +0100 +Subject: [PATCH] backends/iommufd: Remove mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Coverity reports a concurrent data access violation because be->users +is being accessed in iommufd_backend_can_be_deleted() without holding +the mutex. + +However, these routines are called from the QEMU main thread when a +device is created. In this case, the code paths should be protected by +the BQL lock and it should be safe to drop the IOMMUFD backend mutex. +Simply remove it. + +Fixes: CID 1531550 +Fixes: CID 1531549 +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +--- + backends/iommufd.c | 7 ------- + include/sysemu/iommufd.h | 2 -- + 2 files changed, 9 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index f17a846aab..3cbf11fc8b 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -30,7 +30,6 @@ static void iommufd_backend_init(Object *obj) + be->fd = -1; + be->users = 0; + be->owned = true; +- qemu_mutex_init(&be->lock); + } + + static void iommufd_backend_finalize(Object *obj) +@@ -53,10 +52,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) + error_prepend(errp, "Could not parse remote object fd %s:", str); + return; + } +- qemu_mutex_lock(&be->lock); + be->fd = fd; + be->owned = false; +- qemu_mutex_unlock(&be->lock); + trace_iommu_backend_set_fd(be->fd); + } + +@@ -80,7 +77,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + { + int fd, ret = 0; + +- qemu_mutex_lock(&be->lock); + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +@@ -94,13 +90,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + out: + trace_iommufd_backend_connect(be->fd, be->owned, + be->users, ret); +- qemu_mutex_unlock(&be->lock); + return ret; + } + + void iommufd_backend_disconnect(IOMMUFDBackend *be) + { +- qemu_mutex_lock(&be->lock); + if (!be->users) { + goto out; + } +@@ -111,7 +105,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) + } + out: + trace_iommufd_backend_disconnect(be->fd, be->users); +- qemu_mutex_unlock(&be->lock); + } + + int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 29afaa429d..908c94d811 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -15,7 +15,6 @@ + #define SYSEMU_IOMMUFD_H + + #include "qom/object.h" +-#include "qemu/thread.h" + #include "exec/hwaddr.h" + #include "exec/cpu-common.h" + #include "sysemu/host_iommu_device.h" +@@ -33,7 +32,6 @@ struct IOMMUFDBackend { + /*< protected >*/ + int fd; /* /dev/iommu file descriptor */ + bool owned; /* is the /dev/iommu opened internally */ +- QemuMutex lock; + uint32_t users; + + /*< public >*/ +-- +2.41.0.windows.1 + diff --git a/backends-tpm-Avoid-using-g_alloca.patch b/backends-tpm-Avoid-using-g_alloca.patch new file mode 100644 index 0000000000000000000000000000000000000000..dba5000f0690a02c743cf7bfb788f7850e23bae2 --- /dev/null +++ b/backends-tpm-Avoid-using-g_alloca.patch @@ -0,0 +1,44 @@ +From ab6aa0196a12fa15db9c94212ccea39164417cc8 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 14 Jun 2025 15:43:39 +0800 +Subject: [PATCH] backends/tpm: Avoid using g_alloca() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + cherry picked from commit 0ff9cd9a6af54ccaa293e252aa356fb150788099 + tpm_emulator_ctrlcmd() is not in hot path. + Use the heap instead of the stack, removing + the g_alloca() call. + Signed-off-by: Philippe Mathieu-Daudé + Reviewed-by: Pierrick Bouvier + Reviewed-by: Thomas Huth + Reviewed-by: Stefan Berger + Reviewed-by: Stefan Hajnoczi + Message-Id: <20250605193540.59874-3-philmd@linaro.org> + + Signed-off-by: dinglimin +--- + backends/tpm/tpm_emulator.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c +index f7f1b4ad7a..0d07df216e 100644 +--- a/backends/tpm/tpm_emulator.c ++++ b/backends/tpm/tpm_emulator.c +@@ -128,10 +128,10 @@ static int tpm_emulator_ctrlcmd(TPMEmulator *tpm, unsigned long cmd, void *msg, + CharBackend *dev = &tpm->ctrl_chr; + uint32_t cmd_no = cpu_to_be32(cmd); + ssize_t n = sizeof(uint32_t) + msg_len_in; +- uint8_t *buf = NULL; + + WITH_QEMU_LOCK_GUARD(&tpm->mutex) { +- buf = g_alloca(n); ++ g_autofree uint8_t *buf = g_malloc(n); ++ + memcpy(buf, &cmd_no, sizeof(cmd_no)); + memcpy(buf + sizeof(cmd_no), msg, msg_len_in); + +-- +2.33.0 + diff --git a/backup-Improve-error-for-bdrv_getlength-failure.patch b/backup-Improve-error-for-bdrv_getlength-failure.patch deleted file mode 100644 index df188942c913062b499c1d6579556784661b2985..0000000000000000000000000000000000000000 --- a/backup-Improve-error-for-bdrv_getlength-failure.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 0b66aef5389d622434128fc7db9abd2cd4724b51 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:19 +0100 -Subject: [PATCH] backup: Improve error for bdrv_getlength() failure - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-6-kwolf@redhat.com> -Patchwork-id: 97103 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 05/11] backup: Improve error for bdrv_getlength() failure -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -bdrv_get_device_name() will be an empty string with modern management -tools that don't use -drive. Use bdrv_get_device_or_node_name() instead -so that the node name is used if the BlockBackend is anonymous. - -While at it, start with upper case to make the message consistent with -the rest of the function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Message-Id: <20200430142755.315494-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 58226634c4b02af7b10862f7fbd3610a344bfb7f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 8761f1f9a7..88354dcb32 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -613,8 +613,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - - len = bdrv_getlength(bs); - if (len < 0) { -- error_setg_errno(errp, -len, "unable to get length for '%s'", -- bdrv_get_device_name(bs)); -+ error_setg_errno(errp, -len, "Unable to get length for '%s'", -+ bdrv_get_device_or_node_name(bs)); - goto error; - } - --- -2.27.0 - diff --git a/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch b/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a08dc5ebb4da87c7704a272ce63b058a690df42 --- /dev/null +++ b/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch @@ -0,0 +1,101 @@ +From c28455a0bac4bbf171d1f19e162557377a85e96c Mon Sep 17 00:00:00 2001 +From: Ming Yang +Date: Sat, 23 Mar 2024 16:32:46 +0800 +Subject: [PATCH] [backup] memory: bakcup hugepages: hugepages files maybe + leftover + +old info: +commit id: +3cb1b0ce091998532a30793e3272925da4e6f3aa +old messages: +hugepages: hugepages files maybe leftover + +Before qemu uses the hugepage memory directory /dev/hugepages/libvirt/qemu/xxx, +The directory may be deleted because of the destroy virtual machine. +Cause qemu to create files directly under /dev/hugepages/libvirt/qemu/. +After the file is created, the file is not cleaned up by unlink, +and when the virtual machine is destroyed, libvirt will only clean up +/dev/hugepages/libvirt/qemu/xxx directory. After creating the hugepage file, +execute unlink to clean up the file to fix the problem. + +Signed-off-by: Jinhua Cao +Signed-off-by: Jiajie Li + +Signed-off-by: Ming Yang +--- + include/qemu/mmap-alloc.h | 4 ++++ + system/physmem.c | 9 ++++++++- + util/mmap-alloc.c | 22 ++++++++++++++++++++++ + 3 files changed, 34 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h +index 8344daaa03..63e4edfd2f 100644 +--- a/include/qemu/mmap-alloc.h ++++ b/include/qemu/mmap-alloc.h +@@ -1,6 +1,10 @@ + #ifndef QEMU_MMAP_ALLOC_H + #define QEMU_MMAP_ALLOC_H + ++#define HUGETLBFS_MAGIC 0x958458f6 ++ ++size_t qemu_fd_getfiletype(int fd); ++ + typedef enum { + QEMU_FS_TYPE_UNKNOWN = 0, + QEMU_FS_TYPE_TMPFS, +diff --git a/system/physmem.c b/system/physmem.c +index a63853a7bc..f14d64819b 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1329,7 +1329,14 @@ static int file_ram_open(const char *path, + /* @path names a file that doesn't exist, create it */ + fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd >= 0) { +- *created = true; ++ info_report("open %s success \n", path); ++ /* if fd file type is HUGETLBFS_MAGIC, unlink it, */ ++ /* in case to prevent residue after qemu killed */ ++ if (qemu_fd_getfiletype(fd) == HUGETLBFS_MAGIC) { ++ unlink(path); ++ } else { ++ *created = true; ++ } + break; + } + } else if (errno == EISDIR) { +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index ed14f9c64d..6890ad676c 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -30,6 +30,28 @@ + #include + #endif + ++size_t qemu_fd_getfiletype(int fd) ++{ ++ struct statfs fs; ++ int ret; ++ ++ if (fd != -1) { ++ do { ++ ret = fstatfs(fd, &fs); ++ } while (ret != 0 && errno == EINTR); ++ ++ if (ret != 0) { ++ fprintf(stderr, "Couldn't fstatfs() fd: %s\n", ++ strerror(errno)); ++ return -1; ++ } ++ return fs.f_type; ++ } else { ++ fprintf(stderr, "fd is invalid \n"); ++ return -1; ++ } ++} ++ + QemuFsType qemu_fd_getfs(int fd) + { + #ifdef CONFIG_LINUX +-- +2.27.0 + diff --git a/bakcend-VirtCCA-resolve-hugepage-memory-waste-issue-.patch b/bakcend-VirtCCA-resolve-hugepage-memory-waste-issue-.patch new file mode 100644 index 0000000000000000000000000000000000000000..25ee218f9b2c8aef56ec4a4d88d67d6a53ba5b6e --- /dev/null +++ b/bakcend-VirtCCA-resolve-hugepage-memory-waste-issue-.patch @@ -0,0 +1,320 @@ +From da6ee14de85b4e619eedfbe3a6cac3f09d948589 Mon Sep 17 00:00:00 2001 +From: nonce <2774337358@qq.com> +Date: Thu, 23 Jan 2025 21:03:10 +0800 +Subject: [PATCH] bakcend: VirtCCA:resolve hugepage memory waste issue in + vhost-user scenario + +VirtCCA is based on SWIOTLB to implement virtio and will only allocate +Bounce Buffer in the lower address range below 4GB. Therefore, the +backend hugepages memory allocated above 4GB will not be used, resulting +in significant waste. + +New address space and memory region are added to manage the backend +hugepages memory corresponding to the GPA below 4GB, and there are +shared with the vhostuser backend. + +Signed-off-by: nonce0_0 <2774337358@qq.com> +--- + backends/hostmem-file.c | 85 +++++++++++++++++++++++++++++++++++ + hw/core/numa.c | 20 +++++++++ + hw/virtio/vhost.c | 8 +++- + include/exec/address-spaces.h | 3 ++ + include/exec/cpu-common.h | 1 + + include/exec/memory.h | 11 +++++ + system/physmem.c | 17 +++++++ + system/vl.c | 9 ++++ + 8 files changed, 153 insertions(+), 1 deletion(-) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index 361d4a8103..891fe4ac4a 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -20,9 +20,13 @@ + #include "qom/object.h" + #include "qapi/visitor.h" + #include "qapi/qapi-visit-common.h" ++#include "sysemu/kvm.h" ++#include "exec/address-spaces.h" + + OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE) + ++bool virtcca_shared_hugepage_mapped = false; ++uint64_t virtcca_cvm_ram_size = 0; + + struct HostMemoryBackendFile { + HostMemoryBackend parent_obj; +@@ -36,6 +40,83 @@ struct HostMemoryBackendFile { + OnOffAuto rom; + }; + ++/* Parse the path of the hugepages memory file used for memory sharing */ ++static int virtcca_parse_share_mem_path(char *src, char *dst) ++{ ++ int ret = 0; ++ char src_copy[PATH_MAX]; ++ char *token = NULL; ++ char *last_dir = NULL; ++ char *second_last_dir = NULL; ++ static const char delimiter[] = "/"; ++ ++ if (src == NULL || dst == NULL || ++ strlen(src) == 0 || strlen(src) > PATH_MAX - 1) { ++ error_report("Invalid input: NULL pointer or invalid string length."); ++ return -1; ++ } ++ ++ strcpy(src_copy, src); ++ token = strtok(src_copy, delimiter); ++ ++ /* Iterate over the path segments to find the second-to-last directory */ ++ while (token != NULL) { ++ second_last_dir = last_dir; ++ last_dir = token; ++ token = strtok(NULL, delimiter); ++ } ++ ++ /* Check if the second-to-last directory is found */ ++ if (second_last_dir == NULL) { ++ error_report("Invalid path: second-to-last directory not found."); ++ return -1; ++ } ++ ++ /* ++ * Construct the share memory path by appending the extracted domain name ++ * to the hugepages memory filesystem prefix ++ */ ++ ret = snprintf(dst, PATH_MAX, "/dev/hugepages/libvirt/qemu/%s", ++ second_last_dir); ++ ++ if (ret < 0 || ret >= PATH_MAX) { ++ error_report("Error: snprintf failed to construct the share mem path"); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Create a hugepage memory region in the virtcca scenario ++ * for sharing with process like vhost-user and others. ++ */ ++static void ++virtcca_shared_backend_memory_alloc(char *mem_path, uint32_t ram_flags, Error **errp) ++{ ++ char dst[PATH_MAX]; ++ uint64_t size = virtcca_cvm_ram_size; ++ ++ if (virtcca_parse_share_mem_path(mem_path, dst)) { ++ error_report("parse virtcca share memory path failed"); ++ exit(1); ++ } ++ if (virtcca_cvm_ram_size >= VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE) { ++ size = VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE; ++ } ++ ++ virtcca_shared_hugepage = g_new(MemoryRegion, 1); ++ memory_region_init_ram_from_file(virtcca_shared_hugepage, NULL, ++ "virtcca_shared_hugepage", size, ++ VIRTCCA_SHARED_HUGEPAGE_ALIGN, ++ ram_flags, dst, 0, errp); ++ if (*errp) { ++ error_reportf_err(*errp, "cannot init RamBlock for virtcca_shared_hugepage: "); ++ exit(1); ++ } ++ virtcca_shared_hugepage_mapped = true; ++} ++ + static void + file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + { +@@ -90,6 +171,10 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + backend->size, fb->align, ram_flags, + fb->mem_path, fb->offset, errp); + g_free(name); ++ ++ if (virtcca_cvm_enabled() && backend->share && !virtcca_shared_hugepage_mapped) { ++ virtcca_shared_backend_memory_alloc(fb->mem_path, ram_flags, errp); ++ } + #endif + } + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index f08956ddb0..e7c48dab61 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -42,6 +42,8 @@ + #include "qemu/option.h" + #include "qemu/config-file.h" + #include "qemu/cutils.h" ++#include "exec/address-spaces.h" ++#include "sysemu/kvm.h" + + QemuOptsList qemu_numa_opts = { + .name = "numa", +@@ -641,6 +643,21 @@ static void numa_init_memdev_container(MachineState *ms, MemoryRegion *ram) + } + } + ++/* ++ * Add virtcca_shared_hugepage as a sub-MR to the root MR of address space ++ * address_space_memory and address_space_virtcca_shared_memory. ++ */ ++static void virtcca_shared_memory_configuration(MachineState *ms) ++{ ++ MemoryRegion *alias_mr = g_new(MemoryRegion, 1); ++ ++ memory_region_add_subregion_overlap(ms->ram, 0, virtcca_shared_hugepage, 1); ++ memory_region_init_alias(alias_mr, NULL, "alias-mr", virtcca_shared_hugepage, ++ 0, int128_get64(virtcca_shared_hugepage->size)); ++ memory_region_add_subregion(address_space_virtcca_shared_memory.root, ++ VIRTCCA_GPA_START, alias_mr); ++} ++ + void numa_complete_configuration(MachineState *ms) + { + int i; +@@ -711,6 +728,9 @@ void numa_complete_configuration(MachineState *ms) + memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id, + ms->ram_size); + numa_init_memdev_container(ms, ms->ram); ++ if (virtcca_cvm_enabled() && virtcca_shared_hugepage->ram_block) { ++ virtcca_shared_memory_configuration(ms); ++ } + } + /* QEMU needs at least all unique node pair distances to build + * the whole NUMA distance table. QEMU treats the distance table +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index d29075aa04..8b95558013 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -30,6 +30,7 @@ + #include "sysemu/dma.h" + #include "trace.h" + #include "qapi/qapi-commands-migration.h" ++#include "sysemu/kvm.h" + + /* enabled until disconnected backend stabilizes */ + #define _VHOST_DEBUG 1 +@@ -1616,7 +1617,12 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + hdev->log_size = 0; + hdev->log_enabled = false; + hdev->started = false; +- memory_listener_register(&hdev->memory_listener, &address_space_memory); ++ if (virtcca_cvm_enabled()) { ++ memory_listener_register(&hdev->memory_listener, ++ &address_space_virtcca_shared_memory); ++ } else { ++ memory_listener_register(&hdev->memory_listener, &address_space_memory); ++ } + QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); + + /* +diff --git a/include/exec/address-spaces.h b/include/exec/address-spaces.h +index 0d0aa61d68..4518b5da86 100644 +--- a/include/exec/address-spaces.h ++++ b/include/exec/address-spaces.h +@@ -33,6 +33,9 @@ MemoryRegion *get_system_io(void); + + extern AddressSpace address_space_memory; + extern AddressSpace address_space_io; ++extern AddressSpace address_space_virtcca_shared_memory; ++ ++extern MemoryRegion *virtcca_shared_hugepage; + + #endif + +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index c7fd30d5b9..d21d9990ad 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -28,6 +28,7 @@ typedef uint64_t vaddr; + + void cpu_exec_init_all(void); + void cpu_exec_step_atomic(CPUState *cpu); ++void virtcca_shared_memory_address_space_init(void); + + /* Using intptr_t ensures that qemu_*_page_mask is sign-extended even + * when intptr_t is 32-bit and we are aligning a long long. +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 542c9da918..33778f5c64 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -243,6 +243,17 @@ typedef struct IOMMUTLBEvent { + /* RAM FD is opened read-only */ + #define RAM_READONLY_FD (1 << 11) + ++/* The GPA range of the VirtCCA bounce buffer is from 1GB to 4GB. */ ++#define VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE 0xc0000000ULL ++ ++/* The VirtCCA shared hugepage memory granularity is 1GB */ ++#define VIRTCCA_SHARED_HUGEPAGE_ALIGN 0x40000000ULL ++ ++/* The GPA starting address of the VirtCCA CVM is 1GB */ ++#define VIRTCCA_GPA_START 0x40000000ULL ++ ++extern uint64_t virtcca_cvm_ram_size; ++ + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end, +diff --git a/system/physmem.c b/system/physmem.c +index 250f315bc8..8f4be2d131 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -89,9 +89,17 @@ RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; + + static MemoryRegion *system_memory; + static MemoryRegion *system_io; ++static MemoryRegion *virtcca_shared_memory; ++ ++/* ++ * Serves as the sub-MR of the root MR (virtcca_shared_memory) ++ * and is associated with the RAMBlock. ++ */ ++MemoryRegion *virtcca_shared_hugepage; + + AddressSpace address_space_io; + AddressSpace address_space_memory; ++AddressSpace address_space_virtcca_shared_memory; + + static MemoryRegion io_mem_unassigned; + +@@ -2586,6 +2594,15 @@ static void memory_map_init(void) + address_space_init(&address_space_io, system_io, "I/O"); + } + ++void virtcca_shared_memory_address_space_init(void) ++{ ++ virtcca_shared_memory = g_malloc(sizeof(*virtcca_shared_memory)); ++ memory_region_init(virtcca_shared_memory, NULL, ++ "virtcca_shared_memory", UINT64_MAX); ++ address_space_init(&address_space_virtcca_shared_memory, ++ virtcca_shared_memory, "virtcca_shared_memory"); ++} ++ + MemoryRegion *get_system_memory(void) + { + return system_memory; +diff --git a/system/vl.c b/system/vl.c +index a1e5e68773..7c10cd1337 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -3784,6 +3784,15 @@ void qemu_init(int argc, char **argv) + configure_accelerators(argv[0]); + phase_advance(PHASE_ACCEL_CREATED); + ++ /* ++ * Must run after kvm_init completes, as virtcca_cvm_enabled() ++ * depends on initialization performed in kvm_init. ++ */ ++ if (virtcca_cvm_enabled()) { ++ virtcca_cvm_ram_size = current_machine->ram_size; ++ virtcca_shared_memory_address_space_init(); ++ } ++ + /* + * Beware, QOM objects created before this point miss global and + * compat properties. +-- +2.41.0.windows.1 + diff --git a/bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch b/bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch deleted file mode 100644 index 1dc656892b5f124d3ad732aed9c31b0f71a3363b..0000000000000000000000000000000000000000 --- a/bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 2892a4b1f7dfc75e06d0ce770d44a062b6334eb0 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 15 Apr 2020 17:03:54 +0800 -Subject: [PATCH] bios-tables-test: prepare to change ARM virt ACPI DSDT - -We will change ARM virt ACPI DSDT table in order to add the cpufreq device, -which use ACPI CPPC to show CPU frequency info to guest. - -Signed-off-by: Ying Fang ---- - tests/bios-tables-test-allowed-diff.h | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h -index dfb8523c..32a401ae 100644 ---- a/tests/bios-tables-test-allowed-diff.h -+++ b/tests/bios-tables-test-allowed-diff.h -@@ -1 +1,4 @@ - /* List of comma-separated changed AML files to ignore */ -+"tests/data/acpi/virt/DSDT", -+"tests/data/acpi/virt/DSDT.memhp", -+"tests/data/acpi/virt/DSDT.numamem", --- -2.23.0 diff --git a/blkio-Respect-memory-alignment-for-bounce-buffer-all.patch b/blkio-Respect-memory-alignment-for-bounce-buffer-all.patch new file mode 100644 index 0000000000000000000000000000000000000000..3acc7ea338d5f9b2baa6298881417ec07f2cfcec --- /dev/null +++ b/blkio-Respect-memory-alignment-for-bounce-buffer-all.patch @@ -0,0 +1,47 @@ +From c93d512dddb00e3eed2ce9484c55f5f1fbb54c8b Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 19:02:52 +0800 +Subject: [PATCH] blkio: Respect memory-alignment for bounce buffer allocations + +cheery-pick from 10b2393e5e7f4c1d633f1ac8578465681c333efb + +blkio_alloc_mem_region() requires that the requested buffer size is a +multiple of the memory-alignment property. If it isn't, the allocation +fails with a return value of -EINVAL. + +Fix the call in blkio_resize_bounce_pool() to make sure the requested +size is properly aligned. + +I observed this problem with vhost-vdpa, which requires page aligned +memory. As the virtio-blk device behind it still had 512 byte blocks, we +got bs->bl.request_alignment = 512, but actually any request that needed +a bounce buffer and was not aligned to 4k would fail without this fix. + +Suggested-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +Message-ID: <20240131173140.42398-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +Signed-off-by: dinglimin +--- + block/blkio.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/blkio.c b/block/blkio.c +index 0a0a6c0f5f..b989617608 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -89,6 +89,9 @@ static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes) + /* Pad size to reduce frequency of resize calls */ + bytes += 128 * 1024; + ++ /* Align the pool size to avoid blkio_alloc_mem_region() failure */ ++ bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment); ++ + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + int ret; + +-- +2.27.0 + diff --git a/block-Add-bdrv_co_get_self_request.patch b/block-Add-bdrv_co_get_self_request.patch deleted file mode 100644 index 4972f084649f70253978ad8fb1d3842bbf741d81..0000000000000000000000000000000000000000 --- a/block-Add-bdrv_co_get_self_request.patch +++ /dev/null @@ -1,59 +0,0 @@ -From d9b88f7e0d56feb4d7daa2506e2756fc48e975a1 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Fri, 1 Nov 2019 16:25:09 +0100 -Subject: [PATCH] block: Add bdrv_co_get_self_request() - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20191101152510.11719-3-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit c28107e9e55b11cd35cf3dc2505e3e69d10dcf13) -Signed-off-by: Michael Roth ---- - block/io.c | 18 ++++++++++++++++++ - include/block/block_int.h | 1 + - 2 files changed, 19 insertions(+) - -diff --git a/block/io.c b/block/io.c -index d4ceaaa2ce..65b5102714 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -721,6 +721,24 @@ static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req) - (req->bytes == req->overlap_bytes); - } - -+/** -+ * Return the tracked request on @bs for the current coroutine, or -+ * NULL if there is none. -+ */ -+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) -+{ -+ BdrvTrackedRequest *req; -+ Coroutine *self = qemu_coroutine_self(); -+ -+ QLIST_FOREACH(req, &bs->tracked_requests, list) { -+ if (req->co == self) { -+ return req; -+ } -+ } -+ -+ return NULL; -+} -+ - /** - * Round a region to cluster boundaries - */ -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 4465b02242..05ee6b4866 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -964,6 +964,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); - - bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); - void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); -+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); - - int get_tmp_filename(char *filename, int size); - BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, --- -2.23.0 diff --git a/block-Add-error-retry-param-setting.patch b/block-Add-error-retry-param-setting.patch index 72f214b1283635a32308e67ba095ff981368ea2e..6399cd5072dc92a60ba03eacafd83500c7b7efe8 100644 --- a/block-Add-error-retry-param-setting.patch +++ b/block-Add-error-retry-param-setting.patch @@ -1,30 +1,32 @@ -From 3464a135565d718d0fedadd67081a0f76d81a9c6 Mon Sep 17 00:00:00 2001 -From: Jiahui Cen +From d777d1585603aa7599ae8bac4492fafdf1e4b109 Mon Sep 17 00:00:00 2001 +From: yexiao Date: Thu, 21 Jan 2021 15:46:50 +0800 Subject: [PATCH] block: Add error retry param setting Add "retry_interval" and "retry_timeout" parameter for drive and device option. These parameter are valid only when werror/rerror=retry. -eg. --drive file=image,rerror=retry,retry_interval=1000,retry_timeout=5000 +eg. -device device_name,drive=drive_id,rerror=retry,retry_interval=1000,retry_timeout=5000 Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- - block/block-backend.c | 13 +++++++-- - blockdev.c | 50 ++++++++++++++++++++++++++++++++++ - hw/block/block.c | 10 +++++++ - include/hw/block/block.h | 7 ++++- - include/sysemu/block-backend.h | 5 ++++ - 5 files changed, 81 insertions(+), 4 deletions(-) + block/block-backend.c | 13 ++++-- + blockdev.c | 50 +++++++++++++++++++++ + hw/block/block.c | 10 +++++ + include/hw/block/block.h | 7 ++- + include/sysemu/block-backend-common.h | 3 ++ + include/sysemu/block-backend-global-state.h | 2 + + 6 files changed, 81 insertions(+), 4 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c -index 0fe99ffe52..2d812e2254 100644 +index 919699bb70..85d732de7e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c -@@ -31,9 +31,6 @@ +@@ -33,9 +33,6 @@ - static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ -/* block backend default retry interval */ -#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 @@ -32,7 +34,7 @@ index 0fe99ffe52..2d812e2254 100644 typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); -@@ -1633,6 +1630,16 @@ void blk_drain_all(void) +@@ -2149,6 +2146,16 @@ void blk_drain_all(void) bdrv_drain_all_end(); } @@ -50,18 +52,18 @@ index 0fe99ffe52..2d812e2254 100644 { /* No timeout set, infinite retries. */ diff --git a/blockdev.c b/blockdev.c -index 0f49fd290e..99c92b96d2 100644 +index 2817f73fad..6a229e77a5 100644 --- a/blockdev.c +++ b/blockdev.c -@@ -470,6 +470,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, +@@ -484,6 +484,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, const char *buf; int bdrv_flags = 0; int on_read_error, on_write_error; + int64_t retry_interval, retry_timeout; - bool account_invalid, account_failed; + OnOffAuto account_invalid, account_failed; bool writethrough, read_only; BlockBackend *blk; -@@ -565,6 +566,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, +@@ -576,6 +577,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, } } @@ -72,7 +74,7 @@ index 0f49fd290e..99c92b96d2 100644 if (snapshot) { bdrv_flags |= BDRV_O_SNAPSHOT; } -@@ -629,6 +634,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, +@@ -639,6 +644,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, blk_set_enable_write_cache(blk, !writethrough); blk_set_on_error(blk, on_read_error, on_write_error); @@ -84,7 +86,7 @@ index 0f49fd290e..99c92b96d2 100644 if (!monitor_add_blk(blk, id, errp)) { blk_unref(blk); -@@ -754,6 +764,14 @@ QemuOptsList qemu_legacy_drive_opts = { +@@ -773,6 +783,14 @@ QemuOptsList qemu_legacy_drive_opts = { .name = "werror", .type = QEMU_OPT_STRING, .help = "write error action", @@ -99,7 +101,7 @@ index 0f49fd290e..99c92b96d2 100644 },{ .name = "copy-on-read", .type = QEMU_OPT_BOOL, -@@ -776,6 +794,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, +@@ -795,6 +813,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, BlockInterfaceType type; int max_devs, bus_id, unit_id, index; const char *werror, *rerror; @@ -107,7 +109,7 @@ index 0f49fd290e..99c92b96d2 100644 bool read_only = false; bool copy_on_read; const char *filename; -@@ -992,6 +1011,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, +@@ -1013,6 +1032,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, qdict_put_str(bs_opts, "rerror", rerror); } @@ -135,9 +137,9 @@ index 0f49fd290e..99c92b96d2 100644 + } + /* Actual block device init: Functionality shared with blockdev-add */ - blk = blockdev_init(filename, bs_opts, &local_err); + blk = blockdev_init(filename, bs_opts, errp); bs_opts = NULL; -@@ -4593,6 +4635,14 @@ QemuOptsList qemu_common_drive_opts = { +@@ -3794,6 +3836,14 @@ QemuOptsList qemu_common_drive_opts = { .name = "werror", .type = QEMU_OPT_STRING, .help = "write error action", @@ -153,10 +155,10 @@ index 0f49fd290e..99c92b96d2 100644 .name = BDRV_OPT_READ_ONLY, .type = QEMU_OPT_BOOL, diff --git a/hw/block/block.c b/hw/block/block.c -index bf56c7612b..56141940ca 100644 +index 9f52ee6e72..6bece87709 100644 --- a/hw/block/block.c +++ b/hw/block/block.c -@@ -134,6 +134,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, +@@ -239,6 +239,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, blk_set_enable_write_cache(blk, wce); blk_set_on_error(blk, rerror, werror); @@ -170,15 +172,15 @@ index bf56c7612b..56141940ca 100644 + } + } + + block_acct_setup(blk_get_stats(blk), conf->account_invalid, + conf->account_failed); return true; - } - diff --git a/include/hw/block/block.h b/include/hw/block/block.h -index 607539057a..d12603aabd 100644 +index 15fff66435..fb8c0df4a5 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h -@@ -30,6 +30,8 @@ typedef struct BlockConf { - bool share_rw; +@@ -34,6 +34,8 @@ typedef struct BlockConf { + OnOffAuto account_invalid, account_failed; BlockdevOnError rerror; BlockdevOnError werror; + int64_t retry_interval; @@ -186,7 +188,7 @@ index 607539057a..d12603aabd 100644 } BlockConf; static inline unsigned int get_physical_block_exp(BlockConf *conf) -@@ -71,7 +73,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) +@@ -84,7 +86,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror, \ BLOCKDEV_ON_ERROR_AUTO), \ DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ @@ -198,22 +200,26 @@ index 607539057a..d12603aabd 100644 /* Backend access helpers */ -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index 58dde446ca..dc10e507ae 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -25,6 +25,9 @@ - */ - #include "block/block.h" +diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h +index b76df8834a..5a1cdac9c4 100644 +--- a/include/sysemu/block-backend-common.h ++++ b/include/sysemu/block-backend-common.h +@@ -16,6 +16,9 @@ + #include "qemu/iov.h" + #include "block/throttle-groups.h" +/* block backend default retry interval */ +#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 + - /* Callbacks for block device models */ - typedef struct BlockDevOps { - /* -@@ -184,6 +187,8 @@ void blk_inc_in_flight(BlockBackend *blk); - void blk_dec_in_flight(BlockBackend *blk); + /* + * TODO Have to include block/block.h for a bunch of block layer + * types. Unfortunately, this pulls in the whole BlockDriverState +diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h +index 7f59fd411d..d56592c22e 100644 +--- a/include/sysemu/block-backend-global-state.h ++++ b/include/sysemu/block-backend-global-state.h +@@ -84,6 +84,8 @@ int blk_commit_all(void); + bool blk_in_drain(BlockBackend *blk); void blk_drain(BlockBackend *blk); void blk_drain_all(void); +void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval); diff --git a/block-Add-sanity-check-when-setting-retry-parameters.patch b/block-Add-sanity-check-when-setting-retry-parameters.patch index 0af7b6e14e2be698821105d386c4c39893c67a83..27c253e367c4ff62202478dc4b5a3460e11f0ef3 100644 --- a/block-Add-sanity-check-when-setting-retry-parameters.patch +++ b/block-Add-sanity-check-when-setting-retry-parameters.patch @@ -1,4 +1,4 @@ -From 6642b2c6fcad2e1099c61b56f4fe78f3180d005e Mon Sep 17 00:00:00 2001 +From e880fc334edb8d07593679cf0c6a9af810c51d0d Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 18 Mar 2021 19:45:11 +0800 Subject: [PATCH] block: Add sanity check when setting retry parameters @@ -7,18 +7,34 @@ Add sanity check when setting retry parameters to avoid invalid retry configuration. Signed-off-by: Jiahui Cen +Signed-off-by: Alex Chen --- - hw/core/qdev-properties.c | 45 ++++++++++++++++++++++++++++++++++++ - include/hw/block/block.h | 7 +++--- - include/hw/qdev-properties.h | 8 +++++++ - 3 files changed, 57 insertions(+), 3 deletions(-) + hw/core/qdev-prop-internal.h | 2 ++ + hw/core/qdev-properties-system.c | 45 +++++++++++++++++++++++++++++ + hw/core/qdev-properties.c | 4 +-- + include/hw/block/block.h | 7 +++-- + include/hw/qdev-properties-system.h | 8 +++++ + 5 files changed, 61 insertions(+), 5 deletions(-) -diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c -index 709f9e0f9d..2601091f8f 100644 ---- a/hw/core/qdev-properties.c -+++ b/hw/core/qdev-properties.c -@@ -628,6 +628,51 @@ const PropertyInfo qdev_prop_blockdev_on_error = { - .set_default_value = set_default_value_enum, +diff --git a/hw/core/qdev-prop-internal.h b/hw/core/qdev-prop-internal.h +index d7b77844fe..68b1b9d10c 100644 +--- a/hw/core/qdev-prop-internal.h ++++ b/hw/core/qdev-prop-internal.h +@@ -22,6 +22,8 @@ void qdev_propinfo_set_default_value_uint(ObjectProperty *op, + + void qdev_propinfo_get_int32(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp); ++void qdev_propinfo_get_int64(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp); + void qdev_propinfo_get_size32(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp); + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 1473ab3d5e..f2e2718c74 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -635,6 +635,51 @@ const PropertyInfo qdev_prop_blockdev_on_error = { + .set_default_value = qdev_propinfo_set_default_value_enum, }; +static void set_retry_time(Object *obj, Visitor *v, const char *name, @@ -26,7 +42,7 @@ index 709f9e0f9d..2601091f8f 100644 +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; -+ int64_t value, *ptr = qdev_get_prop_ptr(dev, prop); ++ int64_t value, *ptr = object_field_prop_ptr(obj, prop); + Error *local_err = NULL; + + if (dev->realized) { @@ -53,27 +69,49 @@ index 709f9e0f9d..2601091f8f 100644 +const PropertyInfo qdev_prop_blockdev_retry_interval = { + .name = "BlockdevRetryInterval", + .description = "Interval for retry error handling policy", -+ .get = get_int64, ++ .get = qdev_propinfo_get_int64, + .set = set_retry_time, -+ .set_default_value = set_default_value_int, ++ .set_default_value = qdev_propinfo_set_default_value_int, +}; + +const PropertyInfo qdev_prop_blockdev_retry_timeout = { + .name = "BlockdevRetryTimeout", + .description = "Timeout for retry error handling policy", -+ .get = get_int64, ++ .get = qdev_propinfo_get_int64, + .set = set_retry_time, -+ .set_default_value = set_default_value_int, ++ .set_default_value = qdev_propinfo_set_default_value_int, +}; + /* --- BIOS CHS translation */ QEMU_BUILD_BUG_ON(sizeof(BiosAtaTranslation) != sizeof(int)); +diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c +index 840006e953..19b7450b4d 100644 +--- a/hw/core/qdev-properties.c ++++ b/hw/core/qdev-properties.c +@@ -398,7 +398,7 @@ static void set_uint64(Object *obj, Visitor *v, const char *name, + visit_type_uint64(v, name, ptr, errp); + } + +-static void get_int64(Object *obj, Visitor *v, const char *name, ++void qdev_propinfo_get_int64(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { + Property *prop = opaque; +@@ -425,7 +425,7 @@ const PropertyInfo qdev_prop_uint64 = { + + const PropertyInfo qdev_prop_int64 = { + .name = "int64", +- .get = get_int64, ++ .get = qdev_propinfo_get_int64, + .set = set_int64, + .set_default_value = qdev_propinfo_set_default_value_int, + }; diff --git a/include/hw/block/block.h b/include/hw/block/block.h -index d12603aabd..c5276fec0d 100644 +index fb8c0df4a5..844e87495a 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h -@@ -74,9 +74,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) +@@ -87,9 +87,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) BLOCKDEV_ON_ERROR_AUTO), \ DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ BLOCKDEV_ON_ERROR_AUTO), \ @@ -87,12 +125,12 @@ index d12603aabd..c5276fec0d 100644 /* Backend access helpers */ -diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h -index a22a532eb8..d7742be3bc 100644 ---- a/include/hw/qdev-properties.h -+++ b/include/hw/qdev-properties.h -@@ -26,6 +26,8 @@ extern const PropertyInfo qdev_prop_on_off_auto; - extern const PropertyInfo qdev_prop_compress_method; +diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h +index 91f7a2452d..7cf27e51b9 100644 +--- a/include/hw/qdev-properties-system.h ++++ b/include/hw/qdev-properties-system.h +@@ -10,6 +10,8 @@ extern const PropertyInfo qdev_prop_multifd_compression; + extern const PropertyInfo qdev_prop_mig_mode; extern const PropertyInfo qdev_prop_losttickpolicy; extern const PropertyInfo qdev_prop_blockdev_on_error; +extern const PropertyInfo qdev_prop_blockdev_retry_interval; @@ -100,7 +138,7 @@ index a22a532eb8..d7742be3bc 100644 extern const PropertyInfo qdev_prop_bios_chs_trans; extern const PropertyInfo qdev_prop_fdc_drive_type; extern const PropertyInfo qdev_prop_drive; -@@ -215,6 +217,12 @@ extern const PropertyInfo qdev_prop_pcie_link_width; +@@ -52,6 +54,12 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; #define DEFINE_PROP_BLOCKDEV_ON_ERROR(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_on_error, \ BlockdevOnError) diff --git a/block-Avoid-memleak-on-qcow2-image-info-failure.patch b/block-Avoid-memleak-on-qcow2-image-info-failure.patch deleted file mode 100644 index 13917f5b61ed267f584feac9041450e6fe9bbca6..0000000000000000000000000000000000000000 --- a/block-Avoid-memleak-on-qcow2-image-info-failure.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 6a39af8880c18fb3bcbfb715aef909c64286524e Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 20 Mar 2020 13:36:20 -0500 -Subject: [PATCH 04/14] block: Avoid memleak on qcow2 image info failure - -If we fail to get bitmap info, we must not leak the encryption info. - -Fixes: b8968c875f403 -Fixes: Coverity CID 1421894 -Signed-off-by: Eric Blake -Message-Id: <20200320183620.1112123-1-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Andrey Shinkevich -Tested-by: Andrey Shinkevich -Signed-off-by: Max Reitz -Signed-off-by: Peng Liang ---- - block/qcow2.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 27c54b9905aa..0f4b0940d457 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -4588,6 +4588,7 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs, - if (local_err) { - error_propagate(errp, local_err); - qapi_free_ImageInfoSpecific(spec_info); -+ qapi_free_QCryptoBlockInfo(encrypt_info); - return NULL; - } - *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ --- -2.26.2 - diff --git a/block-Call-attention-to-truncation-of-long-NBD-expor.patch b/block-Call-attention-to-truncation-of-long-NBD-expor.patch deleted file mode 100644 index 91745acf1dd03b8186fd70e5a538014727c7099a..0000000000000000000000000000000000000000 --- a/block-Call-attention-to-truncation-of-long-NBD-expor.patch +++ /dev/null @@ -1,105 +0,0 @@ -From e94c1625c0f8155740b1bb7b2c749df759e04526 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 10 Jun 2020 18:32:02 -0400 -Subject: [PATCH] block: Call attention to truncation of long NBD exports - -RH-Author: Eric Blake -Message-id: <20200610183202.3780750-3-eblake@redhat.com> -Patchwork-id: 97495 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block: Call attention to truncation of long NBD exports -Bugzilla: 1845384 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Commit 93676c88 relaxed our NBD client code to request export names up -to the NBD protocol maximum of 4096 bytes without NUL terminator, even -though the block layer can't store anything longer than 4096 bytes -including NUL terminator for display to the user. Since this means -there are some export names where we have to truncate things, we can -at least try to make the truncation a bit more obvious for the user. -Note that in spite of the truncated display name, we can still -communicate with an NBD server using such a long export name; this was -deemed nicer than refusing to even connect to such a server (since the -server may not be under our control, and since determining our actual -length limits gets tricky when nbd://host:port/export and -nbd+unix:///export?socket=/path are themselves variable-length -expansions beyond the export name but count towards the block layer -name length). - -Reported-by: Xueqiang Wei -Fixes: https://bugzilla.redhat.com/1843684 -Signed-off-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200610163741.3745251-3-eblake@redhat.com> -(cherry picked from commit 5c86bdf1208916ece0b87e1151c9b48ee54faa3e) -Signed-off-by: Eric Blake -Signed-off-by: Eduardo Lima (Etrunko) ---- - block.c | 7 +++++-- - block/nbd.c | 21 +++++++++++++-------- - 2 files changed, 18 insertions(+), 10 deletions(-) - -diff --git a/block.c b/block.c -index 38880eabf8..ba36b53a00 100644 ---- a/block.c -+++ b/block.c -@@ -6444,8 +6444,11 @@ void bdrv_refresh_filename(BlockDriverState *bs) - pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); - } else { - QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); -- snprintf(bs->filename, sizeof(bs->filename), "json:%s", -- qstring_get_str(json)); -+ if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", -+ qstring_get_str(json)) >= sizeof(bs->filename)) { -+ /* Give user a hint if we truncated things. */ -+ strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); -+ } - qobject_unref(json); - } - } -diff --git a/block/nbd.c b/block/nbd.c -index 3977b1efc7..63cdd051ab 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -1714,6 +1714,7 @@ static void nbd_refresh_filename(BlockDriverState *bs) - { - BDRVNBDState *s = bs->opaque; - const char *host = NULL, *port = NULL, *path = NULL; -+ size_t len = 0; - - if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) { - const InetSocketAddress *inet = &s->saddr->u.inet; -@@ -1726,17 +1727,21 @@ static void nbd_refresh_filename(BlockDriverState *bs) - } /* else can't represent as pseudo-filename */ - - if (path && s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd+unix:///%s?socket=%s", s->export, path); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd+unix:///%s?socket=%s", s->export, path); - } else if (path && !s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd+unix://?socket=%s", path); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd+unix://?socket=%s", path); - } else if (host && s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd://%s:%s/%s", host, port, s->export); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd://%s:%s/%s", host, port, s->export); - } else if (host && !s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd://%s:%s", host, port); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd://%s:%s", host, port); -+ } -+ if (len > sizeof(bs->exact_filename)) { -+ /* Name is too long to represent exactly, so leave it empty. */ -+ bs->exact_filename[0] = '\0'; - } - } - --- -2.27.0 - diff --git a/block-Fix-cross-AioContext-blockdev-snapshot.patch b/block-Fix-cross-AioContext-blockdev-snapshot.patch deleted file mode 100644 index a4a4d9dbb4c51b74a8258b6368bd9a9ca88b71c6..0000000000000000000000000000000000000000 --- a/block-Fix-cross-AioContext-blockdev-snapshot.patch +++ /dev/null @@ -1,78 +0,0 @@ -From ec96b9f64c239736003413d70dc3999ad0b8271c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 10 Mar 2020 12:38:29 +0100 -Subject: [PATCH] block: Fix cross-AioContext blockdev-snapshot - -external_snapshot_prepare() tries to move the overlay to the AioContext -of the backing file (the snapshotted node). However, it's possible that -this doesn't work, but the backing file can instead be moved to the -overlay's AioContext (e.g. opening the backing chain for a mirror -target). - -bdrv_append() already indirectly uses bdrv_attach_node(), which takes -care to move nodes to make sure they use the same AioContext and which -tries both directions. - -So the problem has a simple fix: Just delete the unnecessary extra -bdrv_try_set_aio_context() call in external_snapshot_prepare() and -instead assert in bdrv_append() that both nodes were indeed moved to the -same AioContext. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-6-kwolf@redhat.com> -Tested-by: Peter Krempa -Signed-off-by: Kevin Wolf ---- - block.c | 1 + - blockdev.c | 16 ---------------- - 2 files changed, 1 insertion(+), 16 deletions(-) - -diff --git a/block.c b/block.c -index ba36b53a00..824025f781 100644 ---- a/block.c -+++ b/block.c -@@ -4165,6 +4165,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, - bdrv_ref(from); - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -+ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); - bdrv_drained_begin(from); - - /* Put all parents into @list and calculate their cumulative permissions */ -diff --git a/blockdev.c b/blockdev.c -index 79112be2e6..d1a3b6a630 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1578,8 +1578,6 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -- AioContext *old_context; -- int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar - * purpose but a different set of parameters */ -@@ -1719,20 +1717,6 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(state->new_bs); -- aio_context_release(aio_context); -- aio_context_acquire(old_context); -- -- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -- -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- -- if (ret < 0) { -- goto out; -- } -- - /* This removes our old bs and adds the new bs. This is an operation that - * can fail, so we need to do it in .prepare; undoing it for abort is - * always possible. */ --- -2.27.0 - diff --git a/block-Make-wait-mark-serialising-requests-public.patch b/block-Make-wait-mark-serialising-requests-public.patch deleted file mode 100644 index 162463c7769093014562846d9d7c0da4e131b5e3..0000000000000000000000000000000000000000 --- a/block-Make-wait-mark-serialising-requests-public.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 590cff8230749794ba09b38f3ea4eb6b0f2f73b5 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Fri, 1 Nov 2019 16:25:08 +0100 -Subject: [PATCH] block: Make wait/mark serialising requests public - -Make both bdrv_mark_request_serialising() and -bdrv_wait_serialising_requests() public so they can be used from block -drivers. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20191101152510.11719-2-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 304d9d7f034ff7f5e1e66a65b7f720f63a72c57e) - Conflicts: - block/io.c -*drop context dependency on 1acc3466a2 -Signed-off-by: Michael Roth ---- - block/io.c | 24 ++++++++++++------------ - include/block/block_int.h | 3 +++ - 2 files changed, 15 insertions(+), 12 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 07d2d825c3..d4ceaaa2ce 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -694,7 +694,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req, - qemu_co_mutex_unlock(&bs->reqs_lock); - } - --static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) -+void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) - { - int64_t overlap_offset = req->offset & ~(align - 1); - uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) -@@ -784,7 +784,7 @@ void bdrv_dec_in_flight(BlockDriverState *bs) - bdrv_wakeup(bs); - } - --static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) -+bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) - { - BlockDriverState *bs = self->bs; - BdrvTrackedRequest *req; -@@ -1340,14 +1340,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, - * with each other for the same cluster. For example, in copy-on-read - * it ensures that the CoR read and write operations are atomic and - * guest writes cannot interleave between them. */ -- mark_request_serialising(req, bdrv_get_cluster_size(bs)); -+ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); - } - - /* BDRV_REQ_SERIALISING is only for write operation */ - assert(!(flags & BDRV_REQ_SERIALISING)); - - if (!(flags & BDRV_REQ_NO_SERIALISING)) { -- wait_serialising_requests(req); -+ bdrv_wait_serialising_requests(req); - } - - if (flags & BDRV_REQ_COPY_ON_READ) { -@@ -1736,10 +1736,10 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, - assert(!(flags & ~BDRV_REQ_MASK)); - - if (flags & BDRV_REQ_SERIALISING) { -- mark_request_serialising(req, bdrv_get_cluster_size(bs)); -+ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); - } - -- waited = wait_serialising_requests(req); -+ waited = bdrv_wait_serialising_requests(req); - - assert(!waited || !req->serialising || - is_request_serialising_and_aligned(req)); -@@ -1905,8 +1905,8 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, - - padding = bdrv_init_padding(bs, offset, bytes, &pad); - if (padding) { -- mark_request_serialising(req, align); -- wait_serialising_requests(req); -+ bdrv_mark_request_serialising(req, align); -+ bdrv_wait_serialising_requests(req); - - bdrv_padding_rmw_read(child, req, &pad, true); - -@@ -1993,8 +1993,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, - } - - if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { -- mark_request_serialising(&req, align); -- wait_serialising_requests(&req); -+ bdrv_mark_request_serialising(&req, align); -+ bdrv_wait_serialising_requests(&req); - bdrv_padding_rmw_read(child, &req, &pad, false); - } - -@@ -3078,7 +3078,7 @@ static int coroutine_fn bdrv_co_copy_range_internal( - /* BDRV_REQ_SERIALISING is only for write operation */ - assert(!(read_flags & BDRV_REQ_SERIALISING)); - if (!(read_flags & BDRV_REQ_NO_SERIALISING)) { -- wait_serialising_requests(&req); -+ bdrv_wait_serialising_requests(&req); - } - - ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, -@@ -3205,7 +3205,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, - * new area, we need to make sure that no write requests are made to it - * concurrently or they might be overwritten by preallocation. */ - if (new_bytes) { -- mark_request_serialising(&req, 1); -+ bdrv_mark_request_serialising(&req, 1); - } - if (bs->read_only) { - error_setg(errp, "Image is read-only"); -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 3aa1e832a8..4465b02242 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -962,6 +962,9 @@ extern unsigned int bdrv_drain_all_count; - void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); - void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); - -+bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); -+void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); -+ - int get_tmp_filename(char *filename, int size); - BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, - const char *filename); --- -2.23.0 diff --git a/block-Parse-filenames-only-when-explicitly-requested.patch b/block-Parse-filenames-only-when-explicitly-requested.patch new file mode 100644 index 0000000000000000000000000000000000000000..54f2fecf501370cda52ed8d3bf358463914c69ea --- /dev/null +++ b/block-Parse-filenames-only-when-explicitly-requested.patch @@ -0,0 +1,252 @@ +From fc74f24988cc2160d6115337330e8549df3aad0d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:56:02 +0200 +Subject: [PATCH] block: Parse filenames only when explicitly requested + (CVE-2024-4467) + +When handling image filenames from legacy options such as -drive or from +tools, these filenames are parsed for protocol prefixes, including for +the json:{} pseudo-protocol. + +This behaviour is intended for filenames that come directly from the +command line and for backing files, which may come from the image file +itself. Higher level management tools generally take care to verify that +untrusted images don't contain a bad (or any) backing file reference; +'qemu-img info' is a suitable tool for this. + +However, for other files that can be referenced in images, such as +qcow2 data files or VMDK extents, the string from the image file is +usually not verified by management tools - and 'qemu-img info' wouldn't +be suitable because in contrast to backing files, it already opens these +other referenced files. So here the string should be interpreted as a +literal local filename. More complex configurations need to be specified +explicitly on the command line or in QMP. + +This patch changes bdrv_open_inherit() so that it only parses filenames +if a new parameter parse_filename is true. It is set for the top level +in bdrv_open(), for the file child and for the backing file child. All +other callers pass false and disable filename parsing this way. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Signed-off-by: liuxiangdong +--- + block.c | 98 +++++++++++++++++++++++++++++++++++---------------------- + 1 file changed, 61 insertions(+), 37 deletions(-) + +diff --git a/block.c b/block.c +index 3bfd4be6b4..6a2abfabcb 100644 +--- a/block.c ++++ b/block.c +@@ -89,6 +89,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, ++ bool parse_filename, + Error **errp); + + static bool bdrv_recurse_has_child(BlockDriverState *bs, +@@ -2050,7 +2051,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename, + * block driver has been specified explicitly. + */ + static int bdrv_fill_options(QDict **options, const char *filename, +- int *flags, Error **errp) ++ int *flags, bool allow_parse_filename, ++ Error **errp) + { + const char *drvname; + bool protocol = *flags & BDRV_O_PROTOCOL; +@@ -2092,7 +2094,7 @@ static int bdrv_fill_options(QDict **options, const char *filename, + if (protocol && filename) { + if (!qdict_haskey(*options, "filename")) { + qdict_put_str(*options, "filename", filename); +- parse_filename = true; ++ parse_filename = allow_parse_filename; + } else { + error_setg(errp, "Can't specify 'file' and 'filename' options at " + "the same time"); +@@ -3678,7 +3680,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + } + + backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, +- &child_of_bds, bdrv_backing_role(bs), errp); ++ &child_of_bds, bdrv_backing_role(bs), true, ++ errp); + if (!backing_hd) { + bs->open_flags |= BDRV_O_NO_BACKING; + error_prepend(errp, "Could not open backing file: "); +@@ -3715,7 +3718,8 @@ free_exit: + static BlockDriverState * + bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + BlockDriverState *parent, const BdrvChildClass *child_class, +- BdrvChildRole child_role, bool allow_none, Error **errp) ++ BdrvChildRole child_role, bool allow_none, ++ bool parse_filename, Error **errp) + { + BlockDriverState *bs = NULL; + QDict *image_options; +@@ -3746,7 +3750,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + } + + bs = bdrv_open_inherit(filename, reference, image_options, 0, +- parent, child_class, child_role, errp); ++ parent, child_class, child_role, parse_filename, ++ errp); + if (!bs) { + goto done; + } +@@ -3756,6 +3761,37 @@ done: + return bs; + } + ++static BdrvChild *bdrv_open_child_common(const char *filename, ++ QDict *options, const char *bdref_key, ++ BlockDriverState *parent, ++ const BdrvChildClass *child_class, ++ BdrvChildRole child_role, ++ bool allow_none, bool parse_filename, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvChild *child; ++ AioContext *ctx; ++ ++ GLOBAL_STATE_CODE(); ++ ++ bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, ++ child_role, allow_none, parse_filename, errp); ++ if (bs == NULL) { ++ return NULL; ++ } ++ ++ bdrv_graph_wrlock(NULL); ++ ctx = bdrv_get_aio_context(bs); ++ aio_context_acquire(ctx); ++ child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, ++ errp); ++ aio_context_release(ctx); ++ bdrv_graph_wrunlock(NULL); ++ ++ return child; ++} ++ + /* + * Opens a disk image whose options are given as BlockdevRef in another block + * device's options. +@@ -3781,31 +3817,15 @@ BdrvChild *bdrv_open_child(const char *filename, + BdrvChildRole child_role, + bool allow_none, Error **errp) + { +- BlockDriverState *bs; +- BdrvChild *child; +- AioContext *ctx; +- +- GLOBAL_STATE_CODE(); +- +- bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, +- child_role, allow_none, errp); +- if (bs == NULL) { +- return NULL; +- } +- +- bdrv_graph_wrlock(NULL); +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, +- errp); +- aio_context_release(ctx); +- bdrv_graph_wrunlock(NULL); +- +- return child; ++ return bdrv_open_child_common(filename, options, bdref_key, parent, ++ child_class, child_role, allow_none, false, ++ errp); + } + + /* +- * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. ++ * This does mostly the same as bdrv_open_child(), but for opening the primary ++ * child of a node. A notable difference from bdrv_open_child() is that it ++ * enables filename parsing for protocol names (including json:). + * + * The caller must hold the lock of the main AioContext and no other AioContext. + * @parent can move to a different AioContext in this function. Callers must +@@ -3822,8 +3842,8 @@ int bdrv_open_file_child(const char *filename, + role = parent->drv->is_filter ? + (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; + +- if (!bdrv_open_child(filename, options, bdref_key, parent, +- &child_of_bds, role, false, errp)) ++ if (!bdrv_open_child_common(filename, options, bdref_key, parent, ++ &child_of_bds, role, false, true, errp)) + { + return -EINVAL; + } +@@ -3868,7 +3888,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) + + } + +- bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); ++ bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false, ++ errp); + obj = NULL; + qobject_unref(obj); + visit_free(v); +@@ -3965,7 +3986,7 @@ static BlockDriverState * no_coroutine_fn + bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + int flags, BlockDriverState *parent, + const BdrvChildClass *child_class, BdrvChildRole child_role, +- Error **errp) ++ bool parse_filename, Error **errp) + { + int ret; + BlockBackend *file = NULL; +@@ -4014,9 +4035,11 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + } + + /* json: syntax counts as explicit options, as if in the QDict */ +- parse_json_protocol(options, &filename, &local_err); +- if (local_err) { +- goto fail; ++ if (parse_filename) { ++ parse_json_protocol(options, &filename, &local_err); ++ if (local_err) { ++ goto fail; ++ } + } + + bs->explicit_options = qdict_clone_shallow(options); +@@ -4041,7 +4064,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + parent->open_flags, parent->options); + } + +- ret = bdrv_fill_options(&options, filename, &flags, &local_err); ++ ret = bdrv_fill_options(&options, filename, &flags, parse_filename, ++ &local_err); + if (ret < 0) { + goto fail; + } +@@ -4110,7 +4134,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + + file_bs = bdrv_open_child_bs(filename, options, "file", bs, + &child_of_bds, BDRV_CHILD_IMAGE, +- true, &local_err); ++ true, true, &local_err); + if (local_err) { + goto fail; + } +@@ -4273,7 +4297,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, + GLOBAL_STATE_CODE(); + + return bdrv_open_inherit(filename, reference, options, flags, NULL, +- NULL, 0, errp); ++ NULL, 0, true, errp); + } + + /* Return true if the NULL-terminated @list contains @str */ +-- +2.41.0.windows.1 + diff --git a/block-Remove-unused-include.patch b/block-Remove-unused-include.patch deleted file mode 100644 index f643ebc66f0f631e949412b706c3210e9d3aead1..0000000000000000000000000000000000000000 --- a/block-Remove-unused-include.patch +++ /dev/null @@ -1,31 +0,0 @@ -From b353d059bddf4b211c2560e7c123f874ed5c8cf6 Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Wed, 21 Oct 2020 17:12:52 +0800 -Subject: [PATCH] block: Remove unused include - -The "qemu-common.h" include is not used, remove it. - -Reported-by: Euler Robot -Signed-off-by: AlexChen -Message-Id: <5F8FFB94.3030209@huawei.com> -Signed-off-by: Max Reitz -(cherry-picked from commit 3d86af858e) ---- - block/dmg-lzfse.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/block/dmg-lzfse.c b/block/dmg-lzfse.c -index 19d25bc646..6798cf4fbf 100644 ---- a/block/dmg-lzfse.c -+++ b/block/dmg-lzfse.c -@@ -22,7 +22,6 @@ - * THE SOFTWARE. - */ - #include "qemu/osdep.h" --#include "qemu-common.h" - #include "dmg.h" - #include - --- -2.27.0 - diff --git a/block-backend-Add-device-specific-retry-callback.patch b/block-backend-Add-device-specific-retry-callback.patch index d50d6bc99cdd87e74838b3b2b61a2eabd1df3ed1..c912ea20e8d1f2bcce46d2719232aff75456b9e4 100644 --- a/block-backend-Add-device-specific-retry-callback.patch +++ b/block-backend-Add-device-specific-retry-callback.patch @@ -1,5 +1,5 @@ -From f74edc7c8c85874691daf8801c159874ef45aae0 Mon Sep 17 00:00:00 2001 -From: Jiahui Cen +From 94580294f0fda3c715caa19f4b33718212c9c531 Mon Sep 17 00:00:00 2001 +From: yexiao Date: Thu, 21 Jan 2021 15:46:47 +0800 Subject: [PATCH] block-backend: Add device specific retry callback @@ -9,16 +9,17 @@ on errors and the device supports retry action. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- - block/block-backend.c | 8 ++++++++ - include/sysemu/block-backend.h | 4 ++++ + block/block-backend.c | 8 ++++++++ + include/sysemu/block-backend-common.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c -index a9a43b1440..b8f535a5fd 100644 +index 2f56cc8382..7e25d5a058 100644 --- a/block/block-backend.c +++ b/block/block-backend.c -@@ -958,6 +958,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, +@@ -1123,6 +1123,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, blk->dev_ops = ops; blk->dev_opaque = opaque; @@ -31,23 +32,23 @@ index a9a43b1440..b8f535a5fd 100644 + } + /* Are we currently quiesced? Should we enforce this right now? */ - if (blk->quiesce_counter && ops->drained_begin) { + if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) { ops->drained_begin(opaque); -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index 733c4957eb..b58dc6bde8 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -66,6 +66,10 @@ typedef struct BlockDevOps { - * Runs when the backend's last drain request ends. +diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h +index 780cea7305..b76df8834a 100644 +--- a/include/sysemu/block-backend-common.h ++++ b/include/sysemu/block-backend-common.h +@@ -71,6 +71,10 @@ typedef struct BlockDevOps { + * Is the device still busy? */ - void (*drained_end)(void *opaque); + bool (*drained_poll)(void *opaque); + /* + * Runs when retrying failed requests. + */ + void (*retry_request_cb)(void *opaque); - } BlockDevOps; - /* This struct is embedded in (the private) BlockBackend struct and contains + /* + * I/O API functions. These functions are thread-safe. -- 2.27.0 diff --git a/block-backend-Add-timeout-support-for-retry.patch b/block-backend-Add-timeout-support-for-retry.patch index ac1bc66230f533b542e0f0a31d0035f868f3c06b..8a35cd27449e352d4d4edc0280ae3edee484b2c8 100644 --- a/block-backend-Add-timeout-support-for-retry.patch +++ b/block-backend-Add-timeout-support-for-retry.patch @@ -1,5 +1,5 @@ -From c58269c64af18bc2a22bbef8b92e489214272429 Mon Sep 17 00:00:00 2001 -From: Jiahui Cen +From b4bb154e6587b6d3fef819efcced803e309c4e05 Mon Sep 17 00:00:00 2001 +From: yexiao Date: Thu, 21 Jan 2021 15:46:49 +0800 Subject: [PATCH] block-backend: Add timeout support for retry @@ -9,16 +9,17 @@ successful retry. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- - block/block-backend.c | 25 ++++++++++++++++++++++++- - include/sysemu/block-backend.h | 1 + + block/block-backend.c | 25 ++++++++++++++++++++- + include/sysemu/block-backend-global-state.h | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c -index 11f8ff4301..0fe99ffe52 100644 +index e62808fc03..919699bb70 100644 --- a/block/block-backend.c +++ b/block/block-backend.c -@@ -1633,6 +1633,29 @@ void blk_drain_all(void) +@@ -2149,6 +2149,29 @@ void blk_drain_all(void) bdrv_drain_all_end(); } @@ -48,7 +49,7 @@ index 11f8ff4301..0fe99ffe52 100644 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { -@@ -1661,7 +1684,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, +@@ -2180,7 +2203,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, case BLOCKDEV_ON_ERROR_IGNORE: return BLOCK_ERROR_ACTION_IGNORE; case BLOCKDEV_ON_ERROR_RETRY: @@ -57,18 +58,18 @@ index 11f8ff4301..0fe99ffe52 100644 BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_AUTO: default: -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index b58dc6bde8..58dde446ca 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -184,6 +184,7 @@ void blk_inc_in_flight(BlockBackend *blk); - void blk_dec_in_flight(BlockBackend *blk); +diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h +index 49c12b0fa9..7f59fd411d 100644 +--- a/include/sysemu/block-backend-global-state.h ++++ b/include/sysemu/block-backend-global-state.h +@@ -84,6 +84,7 @@ int blk_commit_all(void); + bool blk_in_drain(BlockBackend *blk); void blk_drain(BlockBackend *blk); void blk_drain_all(void); +void blk_error_retry_reset_timeout(BlockBackend *blk); void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error); - BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); + bool blk_supports_write_perm(BlockBackend *blk); -- 2.27.0 diff --git a/block-backend-Enable-retry-action-on-errors.patch b/block-backend-Enable-retry-action-on-errors.patch index 241f945cff9edae0f464177226b2b3ef33d98ca0..6581ac1b362f71773e5dae8806fd73e00ee2289f 100644 --- a/block-backend-Enable-retry-action-on-errors.patch +++ b/block-backend-Enable-retry-action-on-errors.patch @@ -1,5 +1,5 @@ -From 8df36cddd1e5e2b3c3598c83a70e8cbb81c26cec Mon Sep 17 00:00:00 2001 -From: Jiahui Cen +From 7bcf4385f518580509990ff71c8209505c887abc Mon Sep 17 00:00:00 2001 +From: yexiao Date: Thu, 21 Jan 2021 15:46:48 +0800 Subject: [PATCH] block-backend: Enable retry action on errors @@ -8,15 +8,16 @@ trigger the timer to do device specific retry action. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- block/block-backend.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c -index b8f535a5fd..11f8ff4301 100644 +index 7e25d5a058..e62808fc03 100644 --- a/block/block-backend.c +++ b/block/block-backend.c -@@ -1660,6 +1660,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, +@@ -2179,6 +2179,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, return BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_IGNORE: return BLOCK_ERROR_ACTION_IGNORE; @@ -26,7 +27,7 @@ index b8f535a5fd..11f8ff4301 100644 case BLOCKDEV_ON_ERROR_AUTO: default: abort(); -@@ -1707,6 +1710,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, +@@ -2227,6 +2230,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, qemu_system_vmstop_request_prepare(); send_qmp_error_event(blk, action, is_read, error); qemu_system_vmstop_request(RUN_STATE_IO_ERROR); diff --git a/block-backend-Introduce-retry-timer.patch b/block-backend-Introduce-retry-timer.patch index e45aacc149860baf2951e34a5b964cae7cc9aea3..e085a3eb1cbd90656a2ba816a17ae48ed693cefc 100644 --- a/block-backend-Introduce-retry-timer.patch +++ b/block-backend-Introduce-retry-timer.patch @@ -1,5 +1,5 @@ -From 805c2e121e1ad612f63bafec458284554e76d034 Mon Sep 17 00:00:00 2001 -From: Jiahui Cen +From 9567fce96050342f393f546d3c5131118c3cad7c Mon Sep 17 00:00:00 2001 +From: yexiao Date: Thu, 21 Jan 2021 15:46:46 +0800 Subject: [PATCH] block-backend: Introduce retry timer @@ -7,17 +7,18 @@ Add a timer to regularly trigger retry on errors. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- block/block-backend.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c -index 0056b526b8..a9a43b1440 100644 +index ec21148806..2f56cc8382 100644 --- a/block/block-backend.c +++ b/block/block-backend.c -@@ -31,6 +31,9 @@ +@@ -33,6 +33,9 @@ - static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +/* block backend default retry interval */ +#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 @@ -25,7 +26,7 @@ index 0056b526b8..a9a43b1440 100644 typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); -@@ -88,6 +91,15 @@ struct BlockBackend { +@@ -92,6 +95,15 @@ struct BlockBackend { * Accessed with atomic ops. */ unsigned int in_flight; @@ -41,7 +42,7 @@ index 0056b526b8..a9a43b1440 100644 }; typedef struct BlockBackendAIOCB { -@@ -337,6 +349,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) +@@ -368,6 +380,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; @@ -52,8 +53,8 @@ index 0056b526b8..a9a43b1440 100644 + block_acct_init(&blk->stats); - notifier_list_init(&blk->remove_bs_notifiers); -@@ -423,6 +440,10 @@ static void blk_delete(BlockBackend *blk) + qemu_mutex_init(&blk->queued_requests_lock); +@@ -508,6 +525,10 @@ static void blk_delete(BlockBackend *blk) QTAILQ_REMOVE(&block_backends, blk, link); drive_info_del(blk->legacy_dinfo); block_acct_cleanup(&blk->stats); diff --git a/block-backend-Stop-retrying-when-draining.patch b/block-backend-Stop-retrying-when-draining.patch index 13f3ad64d1c2d2db02d830ca67ee3f90e893e46a..d2996b16ade4968d08bf9ee2cedb690aed5e48ce 100644 --- a/block-backend-Stop-retrying-when-draining.patch +++ b/block-backend-Stop-retrying-when-draining.patch @@ -1,4 +1,4 @@ -From da64af4b1e92c345296d937e66136f86027d1ca2 Mon Sep 17 00:00:00 2001 +From bbac66be575c76216c18d68c558e0dc80a078f68 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 25 Feb 2021 18:03:57 +0800 Subject: [PATCH] block-backend: Stop retrying when draining @@ -9,15 +9,16 @@ virtual devices go back to work, they would retry those queued requests. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- block/block-backend.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c -index 2d812e2254..f6c918f1d9 100644 +index 85d732de7e..bfbbb18af1 100644 --- a/block/block-backend.c +++ b/block/block-backend.c -@@ -1741,9 +1741,11 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, +@@ -2261,9 +2261,11 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, send_qmp_error_event(blk, action, is_read, error); qemu_system_vmstop_request(RUN_STATE_IO_ERROR); } else if (action == BLOCK_ERROR_ACTION_RETRY) { diff --git a/block-backup-Add-mirror-sync-mode-bitmap.patch b/block-backup-Add-mirror-sync-mode-bitmap.patch deleted file mode 100644 index fb111206baa228ef558d0a567f7d88421a66ad84..0000000000000000000000000000000000000000 --- a/block-backup-Add-mirror-sync-mode-bitmap.patch +++ /dev/null @@ -1,252 +0,0 @@ -From e0a0150e671e8129f11aa3df907e444e91711f53 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:52 -0400 -Subject: [PATCH] block/backup: Add mirror sync mode 'bitmap' - -We don't need or want a new sync mode for simple differences in -semantics. Create a new mode simply named "BITMAP" that is designed to -make use of the new Bitmap Sync Mode field. - -Because the only bitmap sync mode is 'on-success', this adds no new -functionality to the backup job (yet). The old incremental backup mode -is maintained as a syntactic sugar for sync=bitmap, mode=on-success. - -Add all of the plumbing necessary to support this new instruction. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190709232550.10724-6-jsnow@redhat.com -Signed-off-by: John Snow ---- - block/backup.c | 20 ++++++++++++-------- - block/mirror.c | 6 ++++-- - block/replication.c | 2 +- - blockdev.c | 25 +++++++++++++++++++++++-- - include/block/block_int.h | 4 +++- - qapi/block-core.json | 21 +++++++++++++++------ - 6 files changed, 58 insertions(+), 20 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 88354dcb32..e37eda80cd 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -38,9 +38,9 @@ typedef struct CowRequest { - typedef struct BackupBlockJob { - BlockJob common; - BlockBackend *target; -- /* bitmap for sync=incremental */ - BdrvDirtyBitmap *sync_bitmap; - MirrorSyncMode sync_mode; -+ BitmapSyncMode bitmap_mode; - BlockdevOnError on_source_error; - BlockdevOnError on_target_error; - CoRwlock flush_rwlock; -@@ -461,7 +461,7 @@ static int coroutine_fn backup_run(Job *job, Error **errp) - - job_progress_set_remaining(job, s->len); - -- if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { -+ if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) { - backup_incremental_init_copy_bitmap(s); - } else { - hbitmap_set(s->copy_bitmap, 0, s->len); -@@ -545,6 +545,7 @@ static int64_t backup_calculate_cluster_size(BlockDriverState *target, - BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, int64_t speed, - MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, -+ BitmapSyncMode bitmap_mode, - bool compress, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, -@@ -592,10 +593,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - return NULL; - } - -- if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { -+ /* QMP interface should have handled translating this to bitmap mode */ -+ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); -+ -+ if (sync_mode == MIRROR_SYNC_MODE_BITMAP) { - if (!sync_bitmap) { - error_setg(errp, "must provide a valid bitmap name for " -- "\"incremental\" sync mode"); -+ "'%s' sync mode", MirrorSyncMode_str(sync_mode)); - return NULL; - } - -@@ -605,8 +609,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - } - } else if (sync_bitmap) { - error_setg(errp, -- "a sync_bitmap was provided to backup_run, " -- "but received an incompatible sync_mode (%s)", -+ "a bitmap was given to backup_job_create, " -+ "but it received an incompatible sync_mode (%s)", - MirrorSyncMode_str(sync_mode)); - return NULL; - } -@@ -648,8 +652,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - job->on_source_error = on_source_error; - job->on_target_error = on_target_error; - job->sync_mode = sync_mode; -- job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ? -- sync_bitmap : NULL; -+ job->sync_bitmap = sync_bitmap; -+ job->bitmap_mode = bitmap_mode; - job->compress = compress; - - /* Detect image-fleecing (and similar) schemes */ -diff --git a/block/mirror.c b/block/mirror.c -index abcf60a961..ccae49a28e 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -1770,8 +1770,10 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - bool is_none_mode; - BlockDriverState *base; - -- if (mode == MIRROR_SYNC_MODE_INCREMENTAL) { -- error_setg(errp, "Sync mode 'incremental' not supported"); -+ if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) || -+ (mode == MIRROR_SYNC_MODE_BITMAP)) { -+ error_setg(errp, "Sync mode '%s' not supported", -+ MirrorSyncMode_str(mode)); - return; - } - is_none_mode = mode == MIRROR_SYNC_MODE_NONE; -diff --git a/block/replication.c b/block/replication.c -index 23b2993d74..936b2f8b5a 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -543,7 +543,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - - s->backup_job = backup_job_create( - NULL, s->secondary_disk->bs, s->hidden_disk->bs, -- 0, MIRROR_SYNC_MODE_NONE, NULL, false, -+ 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, - BLOCKDEV_ON_ERROR_REPORT, - BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL, - backup_job_completed, bs, NULL, &local_err); -diff --git a/blockdev.c b/blockdev.c -index aa15ed1f00..34c8b651e1 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3508,12 +3508,31 @@ static BlockJob *do_backup_common(BackupCommon *backup, - return NULL; - } - -+ if (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL) { -+ if (backup->has_bitmap_mode && -+ backup->bitmap_mode != BITMAP_SYNC_MODE_ON_SUCCESS) { -+ error_setg(errp, "Bitmap sync mode must be '%s' " -+ "when using sync mode '%s'", -+ BitmapSyncMode_str(BITMAP_SYNC_MODE_ON_SUCCESS), -+ MirrorSyncMode_str(backup->sync)); -+ return NULL; -+ } -+ backup->has_bitmap_mode = true; -+ backup->sync = MIRROR_SYNC_MODE_BITMAP; -+ backup->bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS; -+ } -+ - if (backup->has_bitmap) { - bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); - if (!bmap) { - error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); - return NULL; - } -+ if (!backup->has_bitmap_mode) { -+ error_setg(errp, "Bitmap sync mode must be given " -+ "when providing a bitmap"); -+ return NULL; -+ } - if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { - return NULL; - } -@@ -3527,8 +3546,10 @@ static BlockJob *do_backup_common(BackupCommon *backup, - } - - job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, -- backup->sync, bmap, backup->compress, -- backup->on_source_error, backup->on_target_error, -+ backup->sync, bmap, backup->bitmap_mode, -+ backup->compress, -+ backup->on_source_error, -+ backup->on_target_error, - job_flags, NULL, NULL, txn, errp); - return job; - } -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 05ee6b4866..76117a761a 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1152,7 +1152,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - * @target: Block device to write to. - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @sync_mode: What parts of the disk image should be copied to the destination. -- * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL. -+ * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' -+ * @bitmap_mode: The bitmap synchronization policy to use. - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. - * @creation_flags: Flags that control the behavior of the Job lifetime. -@@ -1168,6 +1169,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, int64_t speed, - MirrorSyncMode sync_mode, - BdrvDirtyBitmap *sync_bitmap, -+ BitmapSyncMode bitmap_mode, - bool compress, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index b8d12a4951..97baff3a8c 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1127,12 +1127,15 @@ - # - # @none: only copy data written from now on - # --# @incremental: only copy data described by the dirty bitmap. Since: 2.4 -+# @incremental: only copy data described by the dirty bitmap. (since: 2.4) -+# -+# @bitmap: only copy data described by the dirty bitmap. (since: 4.2) -+# Behavior on completion is determined by the BitmapSyncMode. - # - # Since: 1.3 - ## - { 'enum': 'MirrorSyncMode', -- 'data': ['top', 'full', 'none', 'incremental'] } -+ 'data': ['top', 'full', 'none', 'incremental', 'bitmap'] } - - ## - # @BitmapSyncMode: -@@ -1343,9 +1346,14 @@ - # @speed: the maximum speed, in bytes per second. The default is 0, - # for unlimited. - # --# @bitmap: the name of dirty bitmap if sync is "incremental". --# Must be present if sync is "incremental", must NOT be present --# otherwise. (Since 2.4 (drive-backup), 3.1 (blockdev-backup)) -+# @bitmap: the name of a dirty bitmap if sync is "bitmap" or "incremental". -+# Must be present if sync is "bitmap" or "incremental". -+# Must not be present otherwise. -+# (Since 2.4 (drive-backup), 3.1 (blockdev-backup)) -+# -+# @bitmap-mode: Specifies the type of data the bitmap should contain after -+# the operation concludes. Must be present if sync is "bitmap". -+# Must NOT be present otherwise. (Since 4.2) - # - # @compress: true to compress data, if the target format supports it. - # (default: false) (since 2.8) -@@ -1380,7 +1388,8 @@ - { 'struct': 'BackupCommon', - 'data': { '*job-id': 'str', 'device': 'str', - 'sync': 'MirrorSyncMode', '*speed': 'int', -- '*bitmap': 'str', '*compress': 'bool', -+ '*bitmap': 'str', '*bitmap-mode': 'BitmapSyncMode', -+ '*compress': 'bool', - '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError', - '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } --- -2.27.0 - diff --git a/block-backup-add-never-policy-to-bitmap-sync-mode.patch b/block-backup-add-never-policy-to-bitmap-sync-mode.patch deleted file mode 100644 index e7a3dc356084623f89f89f9421467233e82b7dbb..0000000000000000000000000000000000000000 --- a/block-backup-add-never-policy-to-bitmap-sync-mode.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 98ed0f915cf3335768ed84ee5dfa54f4e99aaf00 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:53 -0400 -Subject: [PATCH] block/backup: add 'never' policy to bitmap sync mode - -This adds a "never" policy for bitmap synchronization. Regardless of if -the job succeeds or fails, we never update the bitmap. This can be used -to perform differential backups, or simply to avoid the job modifying a -bitmap. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190709232550.10724-7-jsnow@redhat.com -Signed-off-by: John Snow ---- - block/backup.c | 7 +++++-- - qapi/block-core.json | 5 ++++- - 2 files changed, 9 insertions(+), 3 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index e37eda80cd..84a56337ac 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -274,8 +274,11 @@ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) - BdrvDirtyBitmap *bm; - BlockDriverState *bs = blk_bs(job->common.blk); - -- if (ret < 0) { -- /* Merge the successor back into the parent, delete nothing. */ -+ if (ret < 0 || job->bitmap_mode == BITMAP_SYNC_MODE_NEVER) { -+ /* -+ * Failure, or we don't want to synchronize the bitmap. -+ * Merge the successor back into the parent, delete nothing. -+ */ - bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); - assert(bm); - } else { -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 97baff3a8c..48a0bfab63 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1146,10 +1146,13 @@ - # @on-success: The bitmap is only synced when the operation is successful. - # This is the behavior always used for 'INCREMENTAL' backups. - # -+# @never: The bitmap is never synchronized with the operation, and is -+# treated solely as a read-only manifest of blocks to copy. -+# - # Since: 4.2 - ## - { 'enum': 'BitmapSyncMode', -- 'data': ['on-success'] } -+ 'data': ['on-success', 'never'] } - - ## - # @MirrorCopyMode: --- -2.27.0 - diff --git a/block-backup-deal-with-zero-detection.patch b/block-backup-deal-with-zero-detection.patch deleted file mode 100644 index 9f111e58efff5bb63808d91441f4028bb743fce9..0000000000000000000000000000000000000000 --- a/block-backup-deal-with-zero-detection.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 3cf14b9a7daf0a40eb2af7a86e67cb05f6d2bea6 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 30 Jul 2019 19:32:49 +0300 -Subject: [PATCH] block/backup: deal with zero detection - -We have detect_zeroes option, so at least for blockdev-backup user -should define it if zero-detection is needed. For drive-backup leave -detection enabled by default but do it through existing option instead -of open-coding. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190730163251.755248-2-vsementsov@virtuozzo.com -Signed-off-by: John Snow ---- - block/backup.c | 15 ++++++--------- - blockdev.c | 8 ++++---- - 2 files changed, 10 insertions(+), 13 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index cc19643b47..6023573299 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -110,7 +110,10 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, - BlockBackend *blk = job->common.blk; - int nbytes; - int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; -- int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; -+ int write_flags = -+ (job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0) | -+ (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0); -+ - - assert(QEMU_IS_ALIGNED(start, job->cluster_size)); - hbitmap_reset(job->copy_bitmap, start, job->cluster_size); -@@ -128,14 +131,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, - goto fail; - } - -- if (buffer_is_zero(*bounce_buffer, nbytes)) { -- ret = blk_co_pwrite_zeroes(job->target, start, -- nbytes, write_flags | BDRV_REQ_MAY_UNMAP); -- } else { -- ret = blk_co_pwrite(job->target, start, -- nbytes, *bounce_buffer, write_flags | -- (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0)); -- } -+ ret = blk_co_pwrite(job->target, start, nbytes, *bounce_buffer, -+ write_flags); - if (ret < 0) { - trace_backup_do_cow_write_fail(job, start, ret); - if (error_is_read) { -diff --git a/blockdev.c b/blockdev.c -index 0a71a15fa2..94e5aee30b 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3572,7 +3572,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - BlockDriverState *source = NULL; - BlockJob *job = NULL; - AioContext *aio_context; -- QDict *options = NULL; -+ QDict *options; - Error *local_err = NULL; - int flags; - int64_t size; -@@ -3645,10 +3645,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - goto out; - } - -+ options = qdict_new(); -+ qdict_put_str(options, "discard", "unmap"); -+ qdict_put_str(options, "detect-zeroes", "unmap"); - if (backup->format) { -- if (!options) { -- options = qdict_new(); -- } - qdict_put_str(options, "driver", backup->format); - } - --- -2.27.0 - diff --git a/block-backup-fix-backup_cow_with_offload-for-last-cl.patch b/block-backup-fix-backup_cow_with_offload-for-last-cl.patch deleted file mode 100644 index 1dfccaca1200b02b9ff0225bf488804f0e7790ae..0000000000000000000000000000000000000000 --- a/block-backup-fix-backup_cow_with_offload-for-last-cl.patch +++ /dev/null @@ -1,35 +0,0 @@ -From adb934c8d2cfd8b920e69712f07a8fb9399fdc2d Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Fri, 20 Sep 2019 17:20:43 +0300 -Subject: [PATCH] block/backup: fix backup_cow_with_offload for last cluster - -We shouldn't try to copy bytes beyond EOF. Fix it. - -Fixes: 9ded4a0114968e -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Reviewed-by: John Snow -Message-id: 20190920142056.12778-3-vsementsov@virtuozzo.com -Signed-off-by: Max Reitz -(cherry picked from commit 1048ddf0a32dcdaa952e581bd503d49adad527cc) -Signed-off-by: Michael Roth ---- - block/backup.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/backup.c b/block/backup.c -index 8119d3c..55736ea 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -169,7 +169,7 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, - - assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size)); - assert(QEMU_IS_ALIGNED(start, job->cluster_size)); -- nbytes = MIN(job->copy_range_size, end - start); -+ nbytes = MIN(job->copy_range_size, MIN(end, job->len) - start); - nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size); - hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters); - ret = blk_co_copy_range(blk, start, job->target, start, nbytes, --- -1.8.3.1 - diff --git a/block-backup-fix-max_transfer-handling-for-copy_rang.patch b/block-backup-fix-max_transfer-handling-for-copy_rang.patch deleted file mode 100644 index 2303b5faf858182f9820d6b3497150d481b4879b..0000000000000000000000000000000000000000 --- a/block-backup-fix-max_transfer-handling-for-copy_rang.patch +++ /dev/null @@ -1,51 +0,0 @@ -From bad8a640a29f16b4d333673577b06880894766e1 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Fri, 20 Sep 2019 17:20:42 +0300 -Subject: [PATCH] block/backup: fix max_transfer handling for copy_range - -Of course, QEMU_ALIGN_UP is a typo, it should be QEMU_ALIGN_DOWN, as we -are trying to find aligned size which satisfy both source and target. -Also, don't ignore too small max_transfer. In this case seems safer to -disable copy_range. - -Fixes: 9ded4a0114968e -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-id: 20190920142056.12778-2-vsementsov@virtuozzo.com -Signed-off-by: Max Reitz -(cherry picked from commit 981fb5810aa3f68797ee6e261db338bd78857614) -Signed-off-by: Michael Roth ---- - block/backup.c | 15 +++++++++++---- - 1 file changed, 11 insertions(+), 4 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 381659d..8119d3c 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -666,12 +666,19 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - job->cluster_size = cluster_size; - job->copy_bitmap = copy_bitmap; - copy_bitmap = NULL; -- job->use_copy_range = !compress; /* compression isn't supported for it */ - job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk), - blk_get_max_transfer(job->target)); -- job->copy_range_size = MAX(job->cluster_size, -- QEMU_ALIGN_UP(job->copy_range_size, -- job->cluster_size)); -+ job->copy_range_size = QEMU_ALIGN_DOWN(job->copy_range_size, -+ job->cluster_size); -+ /* -+ * Set use_copy_range, consider the following: -+ * 1. Compression is not supported for copy_range. -+ * 2. copy_range does not respect max_transfer (it's a TODO), so we factor -+ * that in here. If max_transfer is smaller than the job->cluster_size, -+ * we do not use copy_range (in that case it's zero after aligning down -+ * above). -+ */ -+ job->use_copy_range = !compress && job->copy_range_size > 0; - - /* Required permissions are already taken with target's blk_new() */ - block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, --- -1.8.3.1 - diff --git a/block-backup-hoist-bitmap-check-into-QMP-interface.patch b/block-backup-hoist-bitmap-check-into-QMP-interface.patch deleted file mode 100644 index 51dc67ccbcd131c09200963de192185c0aa97671..0000000000000000000000000000000000000000 --- a/block-backup-hoist-bitmap-check-into-QMP-interface.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 9cc9e9657aad126502183fa4ceb9b962b55471cb Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:55 -0400 -Subject: [PATCH] block/backup: hoist bitmap check into QMP interface - -This is nicer to do in the unified QMP interface that we have now, -because it lets us use the right terminology back at the user. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190716000117.25219-5-jsnow@redhat.com -Signed-off-by: John Snow ---- - block/backup.c | 13 ++++--------- - blockdev.c | 10 ++++++++++ - 2 files changed, 14 insertions(+), 9 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 59ac2c0396..cc19643b47 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -565,6 +565,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - assert(bs); - assert(target); - -+ /* QMP interface protects us from these cases */ -+ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); -+ assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP); -+ - if (bs == target) { - error_setg(errp, "Source and target cannot be the same"); - return NULL; -@@ -596,16 +600,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - return NULL; - } - -- /* QMP interface should have handled translating this to bitmap mode */ -- assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); -- - if (sync_mode == MIRROR_SYNC_MODE_BITMAP) { -- if (!sync_bitmap) { -- error_setg(errp, "must provide a valid bitmap name for " -- "'%s' sync mode", MirrorSyncMode_str(sync_mode)); -- return NULL; -- } -- - /* If we need to write to this bitmap, check that we can: */ - if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && - bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { -diff --git a/blockdev.c b/blockdev.c -index efb69d343a..0a71a15fa2 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3508,6 +3508,16 @@ static BlockJob *do_backup_common(BackupCommon *backup, - return NULL; - } - -+ if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || -+ (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { -+ /* done before desugaring 'incremental' to print the right message */ -+ if (!backup->has_bitmap) { -+ error_setg(errp, "must provide a valid bitmap name for " -+ "'%s' sync mode", MirrorSyncMode_str(backup->sync)); -+ return NULL; -+ } -+ } -+ - if (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL) { - if (backup->has_bitmap_mode && - backup->bitmap_mode != BITMAP_SYNC_MODE_ON_SUCCESS) { --- -2.27.0 - diff --git a/block-backup-loosen-restriction-on-readonly-bitmaps.patch b/block-backup-loosen-restriction-on-readonly-bitmaps.patch deleted file mode 100644 index ab0617c2b580040a460aad2bd4eb16f3e4687141..0000000000000000000000000000000000000000 --- a/block-backup-loosen-restriction-on-readonly-bitmaps.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 801e9452bc80a38ee26fe12ba42356851acd6a9e Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:54 -0400 -Subject: [PATCH] block/backup: loosen restriction on readonly bitmaps - -With the "never" sync policy, we actually can utilize readonly bitmaps -now. Loosen the check at the QMP level, and tighten it based on -provided arguments down at the job creation level instead. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190709232550.10724-19-jsnow@redhat.com -Signed-off-by: John Snow ---- - block/backup.c | 6 ++++++ - blockdev.c | 2 +- - 2 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/block/backup.c b/block/backup.c -index 84a56337ac..59ac2c0396 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -606,6 +606,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - return NULL; - } - -+ /* If we need to write to this bitmap, check that we can: */ -+ if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && -+ bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { -+ return NULL; -+ } -+ - /* Create a new bitmap, and freeze/disable this one. */ - if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) { - return NULL; -diff --git a/blockdev.c b/blockdev.c -index 34c8b651e1..efb69d343a 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3533,7 +3533,7 @@ static BlockJob *do_backup_common(BackupCommon *backup, - "when providing a bitmap"); - return NULL; - } -- if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { -+ if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_ALLOW_RO, errp)) { - return NULL; - } - } --- -2.27.0 - diff --git a/block-bdrv_set_backing_bs-fix-use-after-free.patch b/block-bdrv_set_backing_bs-fix-use-after-free.patch deleted file mode 100644 index 93ac72169d8518a8fcadc82c7ee01fcfdfcf94fc..0000000000000000000000000000000000000000 --- a/block-bdrv_set_backing_bs-fix-use-after-free.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 3754525eb383f91869634766ccd041cfe40bbf17 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Mon, 16 Mar 2020 09:06:30 +0300 -Subject: [PATCH 05/14] block: bdrv_set_backing_bs: fix use-after-free -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -There is a use-after-free possible: bdrv_unref_child() leaves -bs->backing freed but not NULL. bdrv_attach_child may produce nested -polling loop due to drain, than access of freed pointer is possible. - -I've produced the following crash on 30 iotest with modified code. It -does not reproduce on master, but still seems possible: - - #0 __strcmp_avx2 () at /lib64/libc.so.6 - #1 bdrv_backing_overridden (bs=0x55c9d3cc2060) at block.c:6350 - #2 bdrv_refresh_filename (bs=0x55c9d3cc2060) at block.c:6404 - #3 bdrv_backing_attach (c=0x55c9d48e5520) at block.c:1063 - #4 bdrv_replace_child_noperm - (child=child@entry=0x55c9d48e5520, - new_bs=new_bs@entry=0x55c9d3cc2060) at block.c:2290 - #5 bdrv_replace_child - (child=child@entry=0x55c9d48e5520, - new_bs=new_bs@entry=0x55c9d3cc2060) at block.c:2320 - #6 bdrv_root_attach_child - (child_bs=child_bs@entry=0x55c9d3cc2060, - child_name=child_name@entry=0x55c9d241d478 "backing", - child_role=child_role@entry=0x55c9d26ecee0 , - ctx=, perm=, shared_perm=21, - opaque=0x55c9d3c5a3d0, errp=0x7ffd117108e0) at block.c:2424 - #7 bdrv_attach_child - (parent_bs=parent_bs@entry=0x55c9d3c5a3d0, - child_bs=child_bs@entry=0x55c9d3cc2060, - child_name=child_name@entry=0x55c9d241d478 "backing", - child_role=child_role@entry=0x55c9d26ecee0 , - errp=errp@entry=0x7ffd117108e0) at block.c:5876 - #8 in bdrv_set_backing_hd - (bs=bs@entry=0x55c9d3c5a3d0, - backing_hd=backing_hd@entry=0x55c9d3cc2060, - errp=errp@entry=0x7ffd117108e0) - at block.c:2576 - #9 stream_prepare (job=0x55c9d49d84a0) at block/stream.c:150 - #10 job_prepare (job=0x55c9d49d84a0) at job.c:761 - #11 job_txn_apply (txn=, fn=) at - job.c:145 - #12 job_do_finalize (job=0x55c9d49d84a0) at job.c:778 - #13 job_completed_txn_success (job=0x55c9d49d84a0) at job.c:832 - #14 job_completed (job=0x55c9d49d84a0) at job.c:845 - #15 job_completed (job=0x55c9d49d84a0) at job.c:836 - #16 job_exit (opaque=0x55c9d49d84a0) at job.c:864 - #17 aio_bh_call (bh=0x55c9d471a160) at util/async.c:117 - #18 aio_bh_poll (ctx=ctx@entry=0x55c9d3c46720) at util/async.c:117 - #19 aio_poll (ctx=ctx@entry=0x55c9d3c46720, - blocking=blocking@entry=true) - at util/aio-posix.c:728 - #20 bdrv_parent_drained_begin_single (poll=true, c=0x55c9d3d558f0) - at block/io.c:121 - #21 bdrv_parent_drained_begin_single (c=c@entry=0x55c9d3d558f0, - poll=poll@entry=true) - at block/io.c:114 - #22 bdrv_replace_child_noperm - (child=child@entry=0x55c9d3d558f0, - new_bs=new_bs@entry=0x55c9d3d27300) at block.c:2258 - #23 bdrv_replace_child - (child=child@entry=0x55c9d3d558f0, - new_bs=new_bs@entry=0x55c9d3d27300) at block.c:2320 - #24 bdrv_root_attach_child - (child_bs=child_bs@entry=0x55c9d3d27300, - child_name=child_name@entry=0x55c9d241d478 "backing", - child_role=child_role@entry=0x55c9d26ecee0 , - ctx=, perm=, shared_perm=21, - opaque=0x55c9d3cc2060, errp=0x7ffd11710c60) at block.c:2424 - #25 bdrv_attach_child - (parent_bs=parent_bs@entry=0x55c9d3cc2060, - child_bs=child_bs@entry=0x55c9d3d27300, - child_name=child_name@entry=0x55c9d241d478 "backing", - child_role=child_role@entry=0x55c9d26ecee0 , - errp=errp@entry=0x7ffd11710c60) at block.c:5876 - #26 bdrv_set_backing_hd - (bs=bs@entry=0x55c9d3cc2060, - backing_hd=backing_hd@entry=0x55c9d3d27300, - errp=errp@entry=0x7ffd11710c60) - at block.c:2576 - #27 stream_prepare (job=0x55c9d495ead0) at block/stream.c:150 - ... - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200316060631.30052-2-vsementsov@virtuozzo.com> -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: John Snow -Signed-off-by: Max Reitz -Signed-off-by: Peng Liang ---- - block.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block.c b/block.c -index 29e504b86aff..e834102c87f7 100644 ---- a/block.c -+++ b/block.c -@@ -2549,10 +2549,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - - if (bs->backing) { - bdrv_unref_child(bs, bs->backing); -+ bs->backing = NULL; - } - - if (!backing_hd) { -- bs->backing = NULL; - goto out; - } - --- -2.26.2 - diff --git a/block-blkio-Make-s-mem_region_alignment-be-64-bits.patch b/block-blkio-Make-s-mem_region_alignment-be-64-bits.patch new file mode 100644 index 0000000000000000000000000000000000000000..3b95eea9958acf0599c701844ffa1dc9e7c82f0d --- /dev/null +++ b/block-blkio-Make-s-mem_region_alignment-be-64-bits.patch @@ -0,0 +1,48 @@ +From ede25e9b7c5cc8ce1c668f306bfbe5c90564570b Mon Sep 17 00:00:00 2001 +From: gubin +Date: Wed, 25 Jun 2025 17:13:10 +0800 +Subject: [PATCH] block/blkio: Make s->mem_region_alignment be 64 bits +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 615eaeab3d318ba239d54141a4251746782f65c1 + +With GCC 14 the code failed to compile on i686 (and was wrong for any +version of GCC): + +../block/blkio.c: In function ‘blkio_file_open’: +../block/blkio.c:857:28: error: passing argument 3 of ‘blkio_get_uint64’ from incompatible pointer type [-Wincompatible-pointer-types] + 857 | &s->mem_region_alignment); + | ^~~~~~~~~~~~~~~~~~~~~~~~ + | | + | size_t * {aka unsigned int *} +In file included from ../block/blkio.c:12: +/usr/include/blkio.h:49:67: note: expected ‘uint64_t *’ {aka ‘long long unsigned int *’} but argument is of type ‘size_t *’ {aka ‘unsigned int *’} + 49 | int blkio_get_uint64(struct blkio *b, const char *name, uint64_t *value); + | ~~~~~~~~~~^~~~~ + +Signed-off-by: Richard W.M. Jones +Message-id: 20240130122006.2977938-1-rjones@redhat.com +Signed-off-by: Stefan Hajnoczi +Signed-off-by: gubin +--- + block/blkio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 027c16ceb6..52ac94527f 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -68,7 +68,7 @@ typedef struct { + CoQueue bounce_available; + + /* The value of the "mem-region-alignment" property */ +- size_t mem_region_alignment; ++ uint64_t mem_region_alignment; + + /* Can we skip adding/deleting blkio_mem_regions? */ + bool needs_mem_regions; +-- +2.33.0 + diff --git a/block-blkio-use-FUA-flag-on-write-zeroes-only-if-sup.patch b/block-blkio-use-FUA-flag-on-write-zeroes-only-if-sup.patch new file mode 100644 index 0000000000000000000000000000000000000000..969fd30f4c5a6219bfb9271774669228b32fdc13 --- /dev/null +++ b/block-blkio-use-FUA-flag-on-write-zeroes-only-if-sup.patch @@ -0,0 +1,61 @@ +From 9daf2b936101d612a295217822791d323e908fc9 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 8 Aug 2024 10:05:45 +0200 +Subject: [PATCH] block/blkio: use FUA flag on write zeroes only if supported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +libblkio supports BLKIO_REQ_FUA with write zeros requests only since +version 1.4.0, so let's inform the block layer that the blkio driver +supports it only in this case. Otherwise we can have runtime errors +as reported in https://issues.redhat.com/browse/RHEL-32878 + +Fixes: fd66dbd424 ("blkio: add libblkio block driver") +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-32878 +Signed-off-by: Stefano Garzarella +Reviewed-by: Eric Blake +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20240808080545.40744-1-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 547c4e50929ec6c091d9c16a7b280e829b12b463) +Signed-off-by: zhujun2 +--- + block/blkio.c | 6 ++++-- + meson.build | 2 ++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index b989617608..027c16ceb6 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -899,8 +899,10 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + } + + bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF; +- bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | +- BDRV_REQ_NO_FALLBACK; ++ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; ++#ifdef CONFIG_BLKIO_WRITE_ZEROS_FUA ++ bs->supported_zero_flags |= BDRV_REQ_FUA; ++#endif + + qemu_mutex_init(&s->blkio_lock); + qemu_co_mutex_init(&s->bounce_lock); +diff --git a/meson.build b/meson.build +index 4024f9a4bb..ce2fd07963 100644 +--- a/meson.build ++++ b/meson.build +@@ -2181,6 +2181,8 @@ config_host_data.set('CONFIG_BLKIO', blkio.found()) + if blkio.found() + config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', + blkio.version().version_compare('>=1.3.0')) ++ config_host_data.set('CONFIG_BLKIO_WRITE_ZEROS_FUA', ++ blkio.version().version_compare('>=1.4.0')) + endif + config_host_data.set('CONFIG_CURL', curl.found()) + config_host_data.set('CONFIG_CURSES', curses.found()) +-- +2.41.0.windows.1 + diff --git a/block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch b/block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch new file mode 100644 index 0000000000000000000000000000000000000000..d082d283e83399d2d8655fc9c2137f2f329ad56c --- /dev/null +++ b/block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch @@ -0,0 +1,33 @@ +From ea0feb8a262383582416283ad1af1819c1e0e22a Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 16:10:22 +0800 +Subject: [PATCH] block: bugfix: Don't pause vm when NOSPACE EIO happened + +When backend disk is FULL and disk IO type is 'dataplane', +QEMU will pause the vm, and this may cause endless-loop in +QEMU main thread if we do the snapshot merge now. + +When backend disk is FULL, only reporting an error rather +than pausing the virtual machine. + +Signed-off-by: wangjian161 +--- + blockdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index bc2099e9da..455ae8606d 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -557,7 +557,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + qdict_put_str(bs_opts, "driver", buf); + } + +- on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; ++ on_write_error = BLOCKDEV_ON_ERROR_REPORT; + if ((buf = qemu_opt_get(opts, "werror")) != NULL) { + on_write_error = parse_block_error_action(buf, 0, &error); + if (error) { +-- +2.27.0 + diff --git a/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch b/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch deleted file mode 100644 index 73d2b9d0e5e490c3f855304e692ed68fd4029468..0000000000000000000000000000000000000000 --- a/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 088f1e8fd9e790bc5766bd43af134230abcff6dd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 12 Sep 2019 00:08:49 +0200 -Subject: [PATCH] block/create: Do not abort if a block driver is not available -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The 'blockdev-create' QMP command was introduced as experimental -feature in commit b0292b851b8, using the assert() debug call. -It got promoted to 'stable' command in 3fb588a0f2c, but the -assert call was not removed. - -Some block drivers are optional, and bdrv_find_format() might -return a NULL value, triggering the assertion. - -Stable code is not expected to abort, so return an error instead. - -This is easily reproducible when libnfs is not installed: - - ./configure - [...] - module support no - Block whitelist (rw) - Block whitelist (ro) - libiscsi support yes - libnfs support no - [...] - -Start QEMU: - - $ qemu-system-x86_64 -S -qmp unix:/tmp/qemu.qmp,server,nowait - -Send the 'blockdev-create' with the 'nfs' driver: - - $ ( cat << 'EOF' - {'execute': 'qmp_capabilities'} - {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} - EOF - ) | socat STDIO UNIX:/tmp/qemu.qmp - {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 4}, "package": "v4.1.0-733-g89ea03a7dc"}, "capabilities": ["oob"]}} - {"return": {}} - -QEMU crashes: - - $ gdb qemu-system-x86_64 core - Program received signal SIGSEGV, Segmentation fault. - (gdb) bt - #0 0x00007ffff510957f in raise () at /lib64/libc.so.6 - #1 0x00007ffff50f3895 in abort () at /lib64/libc.so.6 - #2 0x00007ffff50f3769 in _nl_load_domain.cold.0 () at /lib64/libc.so.6 - #3 0x00007ffff5101a26 in .annobin_assert.c_end () at /lib64/libc.so.6 - #4 0x0000555555d7e1f1 in qmp_blockdev_create (job_id=0x555556baee40 "x", options=0x555557666610, errp=0x7fffffffc770) at block/create.c:69 - #5 0x0000555555c96b52 in qmp_marshal_blockdev_create (args=0x7fffdc003830, ret=0x7fffffffc7f8, errp=0x7fffffffc7f0) at qapi/qapi-commands-block-core.c:1314 - #6 0x0000555555deb0a0 in do_qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false, errp=0x7fffffffc898) at qapi/qmp-dispatch.c:131 - #7 0x0000555555deb2a1 in qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false) at qapi/qmp-dispatch.c:174 - -With this patch applied, QEMU returns a QMP error: - - {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} - {"id": "x", "error": {"class": "GenericError", "desc": "Block driver 'nfs' not found or not supported"}} - -Cc: qemu-stable@nongnu.org -Reported-by: Xu Tian -Signed-off-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Blake -Reviewed-by: John Snow -Signed-off-by: Kevin Wolf -(cherry picked from commit d90d5cae2b10efc0e8d0b3cc91ff16201853d3ba) -Signed-off-by: Michael Roth ---- - block/create.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/block/create.c b/block/create.c -index 95341219ef..de5e97bb18 100644 ---- a/block/create.c -+++ b/block/create.c -@@ -63,9 +63,13 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, - const char *fmt = BlockdevDriver_str(options->driver); - BlockDriver *drv = bdrv_find_format(fmt); - -+ if (!drv) { -+ error_setg(errp, "Block driver '%s' not found or not supported", fmt); -+ return; -+ } -+ - /* If the driver is in the schema, we know that it exists. But it may not - * be whitelisted. */ -- assert(drv); - if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { - error_setg(errp, "Driver is not whitelisted"); - return; --- -2.23.0 diff --git a/block-curl-HTTP-header-field-names-are-case-insensit.patch b/block-curl-HTTP-header-field-names-are-case-insensit.patch deleted file mode 100644 index 8f1028d75bbb4db6b16eca9718c90d5fe2e5795e..0000000000000000000000000000000000000000 --- a/block-curl-HTTP-header-field-names-are-case-insensit.patch +++ /dev/null @@ -1,54 +0,0 @@ -From ae2c6d13c4ac625a2c6b217a7f6a17506a2b26e5 Mon Sep 17 00:00:00 2001 -From: Richard Jones -Date: Thu, 28 May 2020 14:27:37 +0100 -Subject: [PATCH] block/curl: HTTP header field names are case insensitive -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Richard Jones -Message-id: <20200528142737.17318-3-rjones@redhat.com> -Patchwork-id: 96895 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block/curl: HTTP header field names are case insensitive -Bugzilla: 1841038 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Philippe Mathieu-Daudé - -From: David Edmondson - -RFC 7230 section 3.2 indicates that HTTP header field names are case -insensitive. - -Signed-off-by: David Edmondson -Message-Id: <20200224101310.101169-3-david.edmondson@oracle.com> -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 69032253c33ae1774233c63cedf36d32242a85fc) -Signed-off-by: Danilo C. L. de Paula ---- - block/curl.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/curl.c b/block/curl.c -index bfabe7eabd..a298fcc591 100644 ---- a/block/curl.c -+++ b/block/curl.c -@@ -214,11 +214,12 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) - size_t realsize = size * nmemb; - const char *header = (char *)ptr; - const char *end = header + realsize; -- const char *accept_ranges = "Accept-Ranges:"; -+ const char *accept_ranges = "accept-ranges:"; - const char *bytes = "bytes"; - - if (realsize >= strlen(accept_ranges) -- && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { -+ && g_ascii_strncasecmp(header, accept_ranges, -+ strlen(accept_ranges)) == 0) { - - char *p = strchr(header, ':') + 1; - --- -2.27.0 - diff --git a/block-curl-HTTP-header-fields-allow-whitespace-aroun.patch b/block-curl-HTTP-header-fields-allow-whitespace-aroun.patch deleted file mode 100644 index 6f3aade47ce62e588e7ce490a7a3fc8f873c49de..0000000000000000000000000000000000000000 --- a/block-curl-HTTP-header-fields-allow-whitespace-aroun.patch +++ /dev/null @@ -1,75 +0,0 @@ -From c8fd37c06fd24d1242629dda329dd16bea20f319 Mon Sep 17 00:00:00 2001 -From: Richard Jones -Date: Thu, 28 May 2020 14:27:36 +0100 -Subject: [PATCH] block/curl: HTTP header fields allow whitespace around values - -RH-Author: Richard Jones -Message-id: <20200528142737.17318-2-rjones@redhat.com> -Patchwork-id: 96894 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] block/curl: HTTP header fields allow whitespace around values -Bugzilla: 1841038 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Danilo de Paula - -From: David Edmondson - -RFC 7230 section 3.2 indicates that whitespace is permitted between -the field name and field value and after the field value. - -Signed-off-by: David Edmondson -Message-Id: <20200224101310.101169-2-david.edmondson@oracle.com> -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 7788a319399f17476ff1dd43164c869e320820a2) -Signed-off-by: Danilo C. L. de Paula ---- - block/curl.c | 31 +++++++++++++++++++++++++++---- - 1 file changed, 27 insertions(+), 4 deletions(-) - -diff --git a/block/curl.c b/block/curl.c -index d4c8e94f3e..bfabe7eabd 100644 ---- a/block/curl.c -+++ b/block/curl.c -@@ -212,11 +212,34 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) - { - BDRVCURLState *s = opaque; - size_t realsize = size * nmemb; -- const char *accept_line = "Accept-Ranges: bytes"; -+ const char *header = (char *)ptr; -+ const char *end = header + realsize; -+ const char *accept_ranges = "Accept-Ranges:"; -+ const char *bytes = "bytes"; - -- if (realsize >= strlen(accept_line) -- && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) { -- s->accept_range = true; -+ if (realsize >= strlen(accept_ranges) -+ && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { -+ -+ char *p = strchr(header, ':') + 1; -+ -+ /* Skip whitespace between the header name and value. */ -+ while (p < end && *p && g_ascii_isspace(*p)) { -+ p++; -+ } -+ -+ if (end - p >= strlen(bytes) -+ && strncmp(p, bytes, strlen(bytes)) == 0) { -+ -+ /* Check that there is nothing but whitespace after the value. */ -+ p += strlen(bytes); -+ while (p < end && *p && g_ascii_isspace(*p)) { -+ p++; -+ } -+ -+ if (p == end || !*p) { -+ s->accept_range = true; -+ } -+ } - } - - return realsize; --- -2.27.0 - diff --git a/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch b/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2f7ad6a14474d6ae4b4558280f78047540e689f --- /dev/null +++ b/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch @@ -0,0 +1,47 @@ +From f9aef3909d23af6a33c604f59dccfcb764090f01 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:29:15 +0800 +Subject: [PATCH] block: disallow block jobs when there is a BDRV_O_INACTIVE + flag + +Currently, migration will put a BDRV_O_INACTIVE flag +on bs's open_flags until another resume being called. In that case, +any IO from vm or block jobs will cause a qemu crash with an assert +'assert(!(bs->open_flags & BDRV_O_INACTIVE))' failure in bdrv_co_pwritev +function. we hereby disallow block jobs by faking a blocker. + +Signed-off-by: wangjian161 +--- + block.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/block.c b/block.c +index bfb0861ec6..b7cb963929 100644 +--- a/block.c ++++ b/block.c +@@ -7298,6 +7298,22 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) + bdrv_get_device_or_node_name(bs)); + return true; + } ++ ++ /* ++ * When migration puts a BDRV_O_INACTIVE flag on driver's open_flags, ++ * we fake a blocker that doesn't exist. From now on, block jobs ++ * will not be permitted. ++ */ ++ if ((op == BLOCK_OP_TYPE_RESIZE || op == BLOCK_OP_TYPE_COMMIT_SOURCE || ++ op == BLOCK_OP_TYPE_MIRROR_SOURCE || op == BLOCK_OP_TYPE_MIRROR_TARGET) && ++ (bs->open_flags & BDRV_O_INACTIVE)) { ++ if (errp) { ++ error_setg(errp, "block device is in use by migration with" ++ " a driver BDRV_O_INACTIVE flag setted"); ++ } ++ return true; ++ } ++ + return false; + } + +-- +2.27.0 + diff --git a/block-enable-cache-mode-of-empty-cdrom.patch b/block-enable-cache-mode-of-empty-cdrom.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7aae757e53e7c9f84a576ccf2dfd1476bab2acb --- /dev/null +++ b/block-enable-cache-mode-of-empty-cdrom.patch @@ -0,0 +1,49 @@ +From 652325f9a04143ffabf5e9a418253a05e927ec37 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:18:21 +0800 +Subject: [PATCH] block: enable cache mode of empty cdrom + +enable cache mode even if cdrom is empty + +Signed-off-by: wangjian161 +--- + blockdev.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/blockdev.c b/blockdev.c +index c91f49e7b6..bc2099e9da 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -493,6 +493,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + QDict *interval_dict = NULL; + QList *interval_list = NULL; + const char *id; ++ const char *cache; + BlockdevDetectZeroesOptions detect_zeroes = + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; + const char *throttling_group = NULL; +@@ -580,6 +581,21 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + + read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false); + ++ if (!file || !*file) { ++ cache = qdict_get_try_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH); ++ if (cache && !strcmp(cache, "on")) { ++ bdrv_flags |= BDRV_O_NO_FLUSH; ++ } ++ ++ cache = qdict_get_try_str(bs_opts, BDRV_OPT_CACHE_DIRECT); ++ if (cache && !strcmp(cache, "on")) { ++ bdrv_flags |= BDRV_O_NOCACHE; ++ } ++ ++ qdict_del(bs_opts, BDRV_OPT_CACHE_NO_FLUSH); ++ qdict_del(bs_opts, BDRV_OPT_CACHE_DIRECT); ++ } ++ + /* init */ + if ((!file || !*file) && !qdict_size(bs_opts)) { + BlockBackendRootState *blk_rs; +-- +2.27.0 + diff --git a/block-file-posix-Let-post-EOF-fallocate-serialize.patch b/block-file-posix-Let-post-EOF-fallocate-serialize.patch deleted file mode 100644 index bf7d34a3e54ffe24356a16185d1dfcaef603c455..0000000000000000000000000000000000000000 --- a/block-file-posix-Let-post-EOF-fallocate-serialize.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 7db05c8a732fbdc986a40aadf0de6dd23057d044 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Fri, 1 Nov 2019 16:25:10 +0100 -Subject: [PATCH] block/file-posix: Let post-EOF fallocate serialize - -The XFS kernel driver has a bug that may cause data corruption for qcow2 -images as of qemu commit c8bb23cbdbe32f. We can work around it by -treating post-EOF fallocates as serializing up until infinity (INT64_MAX -in practice). - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20191101152510.11719-4-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 292d06b925b2787ee6f2430996b95651cae42fce) -Signed-off-by: Michael Roth ---- - block/file-posix.c | 36 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 36 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 992eb4a798..c5df61b477 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2623,6 +2623,42 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, - RawPosixAIOData acb; - ThreadPoolFunc *handler; - -+#ifdef CONFIG_FALLOCATE -+ if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { -+ BdrvTrackedRequest *req; -+ uint64_t end; -+ -+ /* -+ * This is a workaround for a bug in the Linux XFS driver, -+ * where writes submitted through the AIO interface will be -+ * discarded if they happen beyond a concurrently running -+ * fallocate() that increases the file length (i.e., both the -+ * write and the fallocate() happen beyond the EOF). -+ * -+ * To work around it, we extend the tracked request for this -+ * zero write until INT64_MAX (effectively infinity), and mark -+ * it as serializing. -+ * -+ * We have to enable this workaround for all filesystems and -+ * AIO modes (not just XFS with aio=native), because for -+ * remote filesystems we do not know the host configuration. -+ */ -+ -+ req = bdrv_co_get_self_request(bs); -+ assert(req); -+ assert(req->type == BDRV_TRACKED_WRITE); -+ assert(req->offset <= offset); -+ assert(req->offset + req->bytes >= offset + bytes); -+ -+ end = INT64_MAX & -(uint64_t)bs->bl.request_alignment; -+ req->bytes = end - req->offset; -+ req->overlap_bytes = req->bytes; -+ -+ bdrv_mark_request_serialising(req, bs->bl.request_alignment); -+ bdrv_wait_serialising_requests(req); -+ } -+#endif -+ - acb = (RawPosixAIOData) { - .bs = bs, - .aio_fildes = s->fd, --- -2.23.0 diff --git a/block-file-posix-Reduce-xfsctl-use.patch b/block-file-posix-Reduce-xfsctl-use.patch deleted file mode 100644 index 69ceb453efac39a1fcfcc26488e04a7bb8eee0df..0000000000000000000000000000000000000000 --- a/block-file-posix-Reduce-xfsctl-use.patch +++ /dev/null @@ -1,165 +0,0 @@ -From 6f1a94035b02d3676a897ea5fa4cda4c62128228 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Fri, 23 Aug 2019 15:03:40 +0200 -Subject: [PATCH] block/file-posix: Reduce xfsctl() use -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This patch removes xfs_write_zeroes() and xfs_discard(). Both functions -have been added just before the same feature was present through -fallocate(): - -- fallocate() has supported PUNCH_HOLE for XFS since Linux 2.6.38 (March - 2011); xfs_discard() was added in December 2010. - -- fallocate() has supported ZERO_RANGE for XFS since Linux 3.15 (June - 2014); xfs_write_zeroes() was added in November 2013. - -Nowadays, all systems that qemu runs on should support both fallocate() -features (RHEL 7's kernel does). - -xfsctl() is still useful for getting the request alignment for O_DIRECT, -so this patch does not remove our dependency on it completely. - -Note that xfs_write_zeroes() had a bug: It calls ftruncate() when the -file is shorter than the specified range (because ZERO_RANGE does not -increase the file length). ftruncate() may yield and then discard data -that parallel write requests have written past the EOF in the meantime. -Dropping the function altogether fixes the bug. - -Suggested-by: Paolo Bonzini -Fixes: 50ba5b2d994853b38fed10e0841b119da0f8b8e5 -Reported-by: Lukáš Doktor -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Reviewed-by: Stefano Garzarella -Reviewed-by: John Snow -Tested-by: Stefano Garzarella -Tested-by: John Snow -Signed-off-by: Kevin Wolf -(cherry picked from commit b2c6f23f4a9f6d8f1b648705cd46d3713b78d6a2) -Signed-off-by: Michael Roth ---- - block/file-posix.c | 77 +--------------------------------------------- - 1 file changed, 1 insertion(+), 76 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 4479cc7ab4..992eb4a798 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1445,59 +1445,6 @@ out: - } - } - --#ifdef CONFIG_XFS --static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) --{ -- int64_t len; -- struct xfs_flock64 fl; -- int err; -- -- len = lseek(s->fd, 0, SEEK_END); -- if (len < 0) { -- return -errno; -- } -- -- if (offset + bytes > len) { -- /* XFS_IOC_ZERO_RANGE does not increase the file length */ -- if (ftruncate(s->fd, offset + bytes) < 0) { -- return -errno; -- } -- } -- -- memset(&fl, 0, sizeof(fl)); -- fl.l_whence = SEEK_SET; -- fl.l_start = offset; -- fl.l_len = bytes; -- -- if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) { -- err = errno; -- trace_file_xfs_write_zeroes(strerror(errno)); -- return -err; -- } -- -- return 0; --} -- --static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) --{ -- struct xfs_flock64 fl; -- int err; -- -- memset(&fl, 0, sizeof(fl)); -- fl.l_whence = SEEK_SET; -- fl.l_start = offset; -- fl.l_len = bytes; -- -- if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { -- err = errno; -- trace_file_xfs_discard(strerror(errno)); -- return -err; -- } -- -- return 0; --} --#endif -- - static int translate_err(int err) - { - if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || -@@ -1553,10 +1500,8 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) - static int handle_aiocb_write_zeroes(void *opaque) - { - RawPosixAIOData *aiocb = opaque; --#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS) -- BDRVRawState *s = aiocb->bs->opaque; --#endif - #ifdef CONFIG_FALLOCATE -+ BDRVRawState *s = aiocb->bs->opaque; - int64_t len; - #endif - -@@ -1564,12 +1509,6 @@ static int handle_aiocb_write_zeroes(void *opaque) - return handle_aiocb_write_zeroes_block(aiocb); - } - --#ifdef CONFIG_XFS -- if (s->is_xfs) { -- return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes); -- } --#endif -- - #ifdef CONFIG_FALLOCATE_ZERO_RANGE - if (s->has_write_zeroes) { - int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, -@@ -1632,14 +1571,6 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque) - } - #endif - --#ifdef CONFIG_XFS -- if (s->is_xfs) { -- /* xfs_discard() guarantees that the discarded area reads as all-zero -- * afterwards, so we can use it here. */ -- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); -- } --#endif -- - /* If we couldn't manage to unmap while guaranteed that the area reads as - * all-zero afterwards, just write zeroes without unmapping */ - ret = handle_aiocb_write_zeroes(aiocb); -@@ -1716,12 +1647,6 @@ static int handle_aiocb_discard(void *opaque) - ret = -errno; - #endif - } else { --#ifdef CONFIG_XFS -- if (s->is_xfs) { -- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); -- } --#endif -- - #ifdef CONFIG_FALLOCATE_PUNCH_HOLE - ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - aiocb->aio_offset, aiocb->aio_nbytes); --- -2.23.0 diff --git a/block-fix-Werror-maybe-uninitialized-false-positive.patch b/block-fix-Werror-maybe-uninitialized-false-positive.patch new file mode 100644 index 0000000000000000000000000000000000000000..13ba9cebbd4cd4c53521464b3e2e84cd83e2adfa --- /dev/null +++ b/block-fix-Werror-maybe-uninitialized-false-positive.patch @@ -0,0 +1,36 @@ +From 2edda423f2bca2348595e99a4ef9f5c73e262e77 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Wed, 9 Oct 2024 07:25:22 -0400 +Subject: [PATCH] block: fix -Werror=maybe-uninitialized false-positive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from eb5d28c783078ad2d7fb42349e146190cd98678b + +../block/file-posix.c:1405:17: error: ‘zoned’ may be used uninitialized [-Werror=maybe-uninitialized] + 1405 | if (ret < 0 || zoned == BLK_Z_NONE) { + +Signed-off-by: Marc-André Lureau +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: qihao_yewu +--- + block/file-posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 4ac8f684f1..787f613d52 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1423,7 +1423,7 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, + Error **errp) + { + BDRVRawState *s = bs->opaque; +- BlockZoneModel zoned; ++ BlockZoneModel zoned = BLK_Z_NONE; + int ret; + + ret = get_sysfs_zoned_model(st, &zoned); +-- +2.41.0.windows.1 + diff --git a/block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch b/block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch deleted file mode 100644 index d901f1062659223d2899ab5520759a6a5065545a..0000000000000000000000000000000000000000 --- a/block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 5060ef71fa4621061101a30fa9e0d1690696c5c1 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 24 Mar 2020 18:59:21 +0300 -Subject: [PATCH 10/14] block: fix bdrv_root_attach_child forget to unref - child_bs - -bdrv_root_attach_child promises to drop child_bs reference on failure. -It does it on first handled failure path, but not on the second. Fix -that. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200324155921.23822-1-vsementsov@virtuozzo.com> -Signed-off-by: Kevin Wolf -Signed-off-by: Peng Liang ---- - block.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block.c b/block.c -index e834102c87f7..38880eabf801 100644 ---- a/block.c -+++ b/block.c -@@ -2399,6 +2399,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - error_propagate(errp, local_err); - g_free(child); - bdrv_abort_perm_update(child_bs); -+ bdrv_unref(child_bs); - return NULL; - } - } --- -2.26.2 - diff --git a/block-fix-memleaks-in-bdrv_refresh_filename.patch b/block-fix-memleaks-in-bdrv_refresh_filename.patch deleted file mode 100644 index 48682054e6d7db02048c7538f8f1963447020c85..0000000000000000000000000000000000000000 --- a/block-fix-memleaks-in-bdrv_refresh_filename.patch +++ /dev/null @@ -1,56 +0,0 @@ -From d09b8364d9f89c9d5f36dc983c4d4a36bb7388b9 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Thu, 16 Jan 2020 17:29:29 +0800 -Subject: [PATCH] block: fix memleaks in bdrv_refresh_filename - -If we call the qmp 'query-block' while qemu is working on 'block-commit', -it will cause memleaks. The memory leak stack is as follow: - -Indirect leak of 12360 byte(s) in 3 object(s) allocated from: - #0 0x7f80f0b6d970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) - #1 0x7f80ee86049d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) - #2 0x55ea95b5bb67 in qdict_new /mnt/sdb/qemu/qobject/qdict.c - #3 0x55ea956cd043 in bdrv_refresh_filename /mnt/sdb/qemu/block.c - #4 0x55ea956cc950 in bdrv_refresh_filename /mnt/sdb/qemu/block.c - #5 0x55ea956cc950 in bdrv_refresh_filename /mnt/sdb/qemu/block.c - #6 0x55ea956cc950 in bdrv_refresh_filename /mnt/sdb/qemu/block.c - #7 0x55ea958818ea in bdrv_block_device_info /mnt/sdb/qemu/block/qapi.c - #8 0x55ea958879de in bdrv_query_info /mnt/sdb/qemu/block/qapi.c - #9 0x55ea9588b58f in qmp_query_block /mnt/sdb/qemu/block/qapi.c - #10 0x55ea95567392 in qmp_marshal_query_block qapi/qapi-commands-block-core.c - -Indirect leak of 4120 byte(s) in 1 object(s) allocated from: - #0 0x7f80f0b6d970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) - #1 0x7f80ee86049d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) - #2 0x55ea95b5bb67 in qdict_new /mnt/sdb/qemu/qobject/qdict.c - #3 0x55ea956cd043 in bdrv_refresh_filename /mnt/sdb/qemu/block.c - #4 0x55ea956cc950 in bdrv_refresh_filename /mnt/sdb/qemu/block.c - #5 0x55ea956cc950 in bdrv_refresh_filename /mnt/sdb/qemu/block.c - #6 0x55ea9569f301 in bdrv_backing_attach /mnt/sdb/qemu/block.c - #7 0x55ea956a99dd in bdrv_replace_child_noperm /mnt/sdb/qemu/block.c - #8 0x55ea956b9b53 in bdrv_replace_node /mnt/sdb/qemu/block.c - #9 0x55ea956b9e49 in bdrv_append /mnt/sdb/qemu/block.c - #10 0x55ea958c3472 in commit_start /mnt/sdb/qemu/block/commit.c - #11 0x55ea94b68ab0 in qmp_block_commit /mnt/sdb/qemu/blockdev.c - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan ---- - block.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block.c b/block.c -index 9ae5c0e..52bad05 100644 ---- a/block.c -+++ b/block.c -@@ -6048,6 +6048,7 @@ void bdrv_refresh_filename(BlockDriverState *bs) - child->bs->exact_filename); - pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename); - -+ qobject_unref(bs->full_open_options); - bs->full_open_options = qobject_ref(child->bs->full_open_options); - - return; --- -1.8.3.1 - diff --git a/block-io-accept-NULL-qiov-in-bdrv_pad_request.patch b/block-io-accept-NULL-qiov-in-bdrv_pad_request.patch new file mode 100644 index 0000000000000000000000000000000000000000..0235d649316cc93a5c17cfb0d561df6709e3d4be --- /dev/null +++ b/block-io-accept-NULL-qiov-in-bdrv_pad_request.patch @@ -0,0 +1,80 @@ +From a01e9f722d8e187493cda6acf645012793bc95fe Mon Sep 17 00:00:00 2001 +From: gubin +Date: Wed, 25 Jun 2025 17:18:04 +0800 +Subject: [PATCH] block/io: accept NULL qiov in bdrv_pad_request + +cherry-pick from 3f934817c82c2f1bf1c238f8d1065a3be10a3c9e + +Some operations, e.g. block-stream, perform reads while discarding the +results (only copy-on-read matters). In this case, they will pass NULL +as the target QEMUIOVector, which will however trip bdrv_pad_request, +since it wants to extend its passed vector. In particular, this is the +case for the blk_co_preadv() call in stream_populate(). + +If there is no qiov, no operation can be done with it, but the bytes +and offset still need to be updated, so the subsequent aligned read +will actually be aligned and not run into an assertion failure. + +Originally-by: Stefan Reiter +Signed-off-by: Thomas Lamprecht +Signed-off-by: Fiona Ebner +Message-ID: <20240322095009.346989-2-f.ebner@proxmox.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +Signed-off-by: gubin +Signed-off-by: gubin +--- + block/io.c | 33 ++++++++++++++++++++------------- + 1 file changed, 20 insertions(+), 13 deletions(-) + +diff --git a/block/io.c b/block/io.c +index a280a5a4c9..27d6a1a04b 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1756,22 +1756,29 @@ static int bdrv_pad_request(BlockDriverState *bs, + return 0; + } + +- sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, +- &sliced_head, &sliced_tail, +- &sliced_niov); +- +- /* Guaranteed by bdrv_check_request32() */ +- assert(*bytes <= SIZE_MAX); +- ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, +- sliced_head, *bytes); +- if (ret < 0) { +- bdrv_padding_finalize(pad); +- return ret; ++ /* ++ * For prefetching in stream_populate(), no qiov is passed along, because ++ * only copy-on-read matters. ++ */ ++ if (qiov && *qiov) { ++ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, ++ &sliced_head, &sliced_tail, ++ &sliced_niov); ++ ++ /* Guaranteed by bdrv_check_request32() */ ++ assert(*bytes <= SIZE_MAX); ++ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, ++ sliced_head, *bytes); ++ if (ret < 0) { ++ bdrv_padding_finalize(pad); ++ return ret; ++ } ++ *qiov = &pad->local_qiov; ++ *qiov_offset = 0; + } ++ + *bytes += pad->head + pad->tail; + *offset -= pad->head; +- *qiov = &pad->local_qiov; +- *qiov_offset = 0; + if (padded) { + *padded = true; + } +-- +2.33.0 + diff --git a/block-io-refactor-padding.patch b/block-io-refactor-padding.patch deleted file mode 100644 index 7a267147f5b5fbfa908edf342c02bd17481b3d70..0000000000000000000000000000000000000000 --- a/block-io-refactor-padding.patch +++ /dev/null @@ -1,481 +0,0 @@ -From 2e2ad02f2cecf419eaad0df982ceb5b41170cc7e Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 4 Jun 2019 19:15:05 +0300 -Subject: [PATCH] block/io: refactor padding - -We have similar padding code in bdrv_co_pwritev, -bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify -it. - -[Squashed in Vladimir's qemu-iotests 077 fix ---Stefan] - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Acked-by: Stefan Hajnoczi -Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com -Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 7a3f542fbdfd799be4fa6f8b96dc8c1e6933fce4) -*prereq for 292d06b9 -Signed-off-by: Michael Roth ---- - block/io.c | 365 +++++++++++++++++++++++++++++------------------------ - 1 file changed, 200 insertions(+), 165 deletions(-) - -diff --git a/block/io.c b/block/io.c -index dccf687acc..07d2d825c3 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1408,28 +1408,177 @@ out: - } - - /* -- * Handle a read request in coroutine context -+ * Request padding -+ * -+ * |<---- align ----->| |<----- align ---->| -+ * |<- head ->|<------------- bytes ------------->|<-- tail -->| -+ * | | | | | | -+ * -*----------$-------*-------- ... --------*-----$------------*--- -+ * | | | | | | -+ * | offset | | end | -+ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end) -+ * [buf ... ) [tail_buf ) -+ * -+ * @buf is an aligned allocation needed to store @head and @tail paddings. @head -+ * is placed at the beginning of @buf and @tail at the @end. -+ * -+ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk -+ * around tail, if tail exists. -+ * -+ * @merge_reads is true for small requests, -+ * if @buf_len == @head + bytes + @tail. In this case it is possible that both -+ * head and tail exist but @buf_len == align and @tail_buf == @buf. -+ */ -+typedef struct BdrvRequestPadding { -+ uint8_t *buf; -+ size_t buf_len; -+ uint8_t *tail_buf; -+ size_t head; -+ size_t tail; -+ bool merge_reads; -+ QEMUIOVector local_qiov; -+} BdrvRequestPadding; -+ -+static bool bdrv_init_padding(BlockDriverState *bs, -+ int64_t offset, int64_t bytes, -+ BdrvRequestPadding *pad) -+{ -+ uint64_t align = bs->bl.request_alignment; -+ size_t sum; -+ -+ memset(pad, 0, sizeof(*pad)); -+ -+ pad->head = offset & (align - 1); -+ pad->tail = ((offset + bytes) & (align - 1)); -+ if (pad->tail) { -+ pad->tail = align - pad->tail; -+ } -+ -+ if ((!pad->head && !pad->tail) || !bytes) { -+ return false; -+ } -+ -+ sum = pad->head + bytes + pad->tail; -+ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align; -+ pad->buf = qemu_blockalign(bs, pad->buf_len); -+ pad->merge_reads = sum == pad->buf_len; -+ if (pad->tail) { -+ pad->tail_buf = pad->buf + pad->buf_len - align; -+ } -+ -+ return true; -+} -+ -+static int bdrv_padding_rmw_read(BdrvChild *child, -+ BdrvTrackedRequest *req, -+ BdrvRequestPadding *pad, -+ bool zero_middle) -+{ -+ QEMUIOVector local_qiov; -+ BlockDriverState *bs = child->bs; -+ uint64_t align = bs->bl.request_alignment; -+ int ret; -+ -+ assert(req->serialising && pad->buf); -+ -+ if (pad->head || pad->merge_reads) { -+ uint64_t bytes = pad->merge_reads ? pad->buf_len : align; -+ -+ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes); -+ -+ if (pad->head) { -+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); -+ } -+ if (pad->merge_reads && pad->tail) { -+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); -+ } -+ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, -+ align, &local_qiov, 0); -+ if (ret < 0) { -+ return ret; -+ } -+ if (pad->head) { -+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); -+ } -+ if (pad->merge_reads && pad->tail) { -+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); -+ } -+ -+ if (pad->merge_reads) { -+ goto zero_mem; -+ } -+ } -+ -+ if (pad->tail) { -+ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align); -+ -+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); -+ ret = bdrv_aligned_preadv( -+ child, req, -+ req->overlap_offset + req->overlap_bytes - align, -+ align, align, &local_qiov, 0); -+ if (ret < 0) { -+ return ret; -+ } -+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); -+ } -+ -+zero_mem: -+ if (zero_middle) { -+ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail); -+ } -+ -+ return 0; -+} -+ -+static void bdrv_padding_destroy(BdrvRequestPadding *pad) -+{ -+ if (pad->buf) { -+ qemu_vfree(pad->buf); -+ qemu_iovec_destroy(&pad->local_qiov); -+ } -+} -+ -+/* -+ * bdrv_pad_request -+ * -+ * Exchange request parameters with padded request if needed. Don't include RMW -+ * read of padding, bdrv_padding_rmw_read() should be called separately if -+ * needed. -+ * -+ * All parameters except @bs are in-out: they represent original request at -+ * function call and padded (if padding needed) at function finish. -+ * -+ * Function always succeeds. - */ -+static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, -+ int64_t *offset, unsigned int *bytes, -+ BdrvRequestPadding *pad) -+{ -+ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { -+ return false; -+ } -+ -+ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, -+ *qiov, 0, *bytes, -+ pad->buf + pad->buf_len - pad->tail, pad->tail); -+ *bytes += pad->head + pad->tail; -+ *offset -= pad->head; -+ *qiov = &pad->local_qiov; -+ -+ return true; -+} -+ - int coroutine_fn bdrv_co_preadv(BdrvChild *child, - int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) - { - BlockDriverState *bs = child->bs; -- BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; -- -- uint64_t align = bs->bl.request_alignment; -- uint8_t *head_buf = NULL; -- uint8_t *tail_buf = NULL; -- QEMUIOVector local_qiov; -- bool use_local_qiov = false; -+ BdrvRequestPadding pad; - int ret; - -- trace_bdrv_co_preadv(child->bs, offset, bytes, flags); -- -- if (!drv) { -- return -ENOMEDIUM; -- } -+ trace_bdrv_co_preadv(bs, offset, bytes, flags); - - ret = bdrv_check_byte_request(bs, offset, bytes); - if (ret < 0) { -@@ -1443,43 +1592,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, - flags |= BDRV_REQ_COPY_ON_READ; - } - -- /* Align read if necessary by padding qiov */ -- if (offset & (align - 1)) { -- head_buf = qemu_blockalign(bs, align); -- qemu_iovec_init(&local_qiov, qiov->niov + 2); -- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); -- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); -- use_local_qiov = true; -- -- bytes += offset & (align - 1); -- offset = offset & ~(align - 1); -- } -- -- if ((offset + bytes) & (align - 1)) { -- if (!use_local_qiov) { -- qemu_iovec_init(&local_qiov, qiov->niov + 1); -- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); -- use_local_qiov = true; -- } -- tail_buf = qemu_blockalign(bs, align); -- qemu_iovec_add(&local_qiov, tail_buf, -- align - ((offset + bytes) & (align - 1))); -- -- bytes = ROUND_UP(bytes, align); -- } -+ bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad); - - tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); -- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, -- use_local_qiov ? &local_qiov : qiov, -- flags); -+ ret = bdrv_aligned_preadv(child, &req, offset, bytes, -+ bs->bl.request_alignment, -+ qiov, flags); - tracked_request_end(&req); - bdrv_dec_in_flight(bs); - -- if (use_local_qiov) { -- qemu_iovec_destroy(&local_qiov); -- qemu_vfree(head_buf); -- qemu_vfree(tail_buf); -- } -+ bdrv_padding_destroy(&pad); - - return ret; - } -@@ -1775,44 +1897,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, - BdrvTrackedRequest *req) - { - BlockDriverState *bs = child->bs; -- uint8_t *buf = NULL; - QEMUIOVector local_qiov; - uint64_t align = bs->bl.request_alignment; -- unsigned int head_padding_bytes, tail_padding_bytes; - int ret = 0; -+ bool padding; -+ BdrvRequestPadding pad; - -- head_padding_bytes = offset & (align - 1); -- tail_padding_bytes = (align - (offset + bytes)) & (align - 1); -- -- -- assert(flags & BDRV_REQ_ZERO_WRITE); -- if (head_padding_bytes || tail_padding_bytes) { -- buf = qemu_blockalign(bs, align); -- qemu_iovec_init_buf(&local_qiov, buf, align); -- } -- if (head_padding_bytes) { -- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); -- -- /* RMW the unaligned part before head. */ -+ padding = bdrv_init_padding(bs, offset, bytes, &pad); -+ if (padding) { - mark_request_serialising(req, align); - wait_serialising_requests(req); -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); -- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, -- align, &local_qiov, 0); -- if (ret < 0) { -- goto fail; -- } -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); - -- memset(buf + head_padding_bytes, 0, zero_bytes); -- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, -- align, &local_qiov, -- flags & ~BDRV_REQ_ZERO_WRITE); -- if (ret < 0) { -- goto fail; -+ bdrv_padding_rmw_read(child, req, &pad, true); -+ -+ if (pad.head || pad.merge_reads) { -+ int64_t aligned_offset = offset & ~(align - 1); -+ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align; -+ -+ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); -+ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, -+ align, &local_qiov, -+ flags & ~BDRV_REQ_ZERO_WRITE); -+ if (ret < 0 || pad.merge_reads) { -+ /* Error or all work is done */ -+ goto out; -+ } -+ offset += write_bytes - pad.head; -+ bytes -= write_bytes - pad.head; - } -- offset += zero_bytes; -- bytes -= zero_bytes; - } - - assert(!bytes || (offset & (align - 1)) == 0); -@@ -1822,7 +1934,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, - ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, - NULL, flags); - if (ret < 0) { -- goto fail; -+ goto out; - } - bytes -= aligned_bytes; - offset += aligned_bytes; -@@ -1830,26 +1942,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, - - assert(!bytes || (offset & (align - 1)) == 0); - if (bytes) { -- assert(align == tail_padding_bytes + bytes); -- /* RMW the unaligned part after tail. */ -- mark_request_serialising(req, align); -- wait_serialising_requests(req); -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); -- ret = bdrv_aligned_preadv(child, req, offset, align, -- align, &local_qiov, 0); -- if (ret < 0) { -- goto fail; -- } -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); -+ assert(align == pad.tail + bytes); - -- memset(buf, 0, bytes); -+ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); - ret = bdrv_aligned_pwritev(child, req, offset, align, align, - &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); - } --fail: -- qemu_vfree(buf); -- return ret; - -+out: -+ bdrv_padding_destroy(&pad); -+ -+ return ret; - } - - /* -@@ -1862,10 +1965,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, - BlockDriverState *bs = child->bs; - BdrvTrackedRequest req; - uint64_t align = bs->bl.request_alignment; -- uint8_t *head_buf = NULL; -- uint8_t *tail_buf = NULL; -- QEMUIOVector local_qiov; -- bool use_local_qiov = false; -+ BdrvRequestPadding pad; - int ret; - - trace_bdrv_co_pwritev(child->bs, offset, bytes, flags); -@@ -1892,86 +1992,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, - goto out; - } - -- if (offset & (align - 1)) { -- QEMUIOVector head_qiov; -- -+ if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { - mark_request_serialising(&req, align); - wait_serialising_requests(&req); -- -- head_buf = qemu_blockalign(bs, align); -- qemu_iovec_init_buf(&head_qiov, head_buf, align); -- -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); -- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, -- align, &head_qiov, 0); -- if (ret < 0) { -- goto fail; -- } -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); -- -- qemu_iovec_init(&local_qiov, qiov->niov + 2); -- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); -- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); -- use_local_qiov = true; -- -- bytes += offset & (align - 1); -- offset = offset & ~(align - 1); -- -- /* We have read the tail already if the request is smaller -- * than one aligned block. -- */ -- if (bytes < align) { -- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes); -- bytes = align; -- } -- } -- -- if ((offset + bytes) & (align - 1)) { -- QEMUIOVector tail_qiov; -- size_t tail_bytes; -- bool waited; -- -- mark_request_serialising(&req, align); -- waited = wait_serialising_requests(&req); -- assert(!waited || !use_local_qiov); -- -- tail_buf = qemu_blockalign(bs, align); -- qemu_iovec_init_buf(&tail_qiov, tail_buf, align); -- -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); -- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), -- align, align, &tail_qiov, 0); -- if (ret < 0) { -- goto fail; -- } -- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); -- -- if (!use_local_qiov) { -- qemu_iovec_init(&local_qiov, qiov->niov + 1); -- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); -- use_local_qiov = true; -- } -- -- tail_bytes = (offset + bytes) & (align - 1); -- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); -- -- bytes = ROUND_UP(bytes, align); -+ bdrv_padding_rmw_read(child, &req, &pad, false); - } - - ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, -- use_local_qiov ? &local_qiov : qiov, -- flags); -+ qiov, flags); - --fail: -+ bdrv_padding_destroy(&pad); - -- if (use_local_qiov) { -- qemu_iovec_destroy(&local_qiov); -- } -- qemu_vfree(head_buf); -- qemu_vfree(tail_buf); - out: - tracked_request_end(&req); - bdrv_dec_in_flight(bs); -+ - return ret; - } - --- -2.23.0 diff --git a/block-iscsi-use-MIN-between-mx_sb_len-and-sb_len_wr.patch b/block-iscsi-use-MIN-between-mx_sb_len-and-sb_len_wr.patch deleted file mode 100644 index ba53c1ddb4381562aa1fe486ad7f0ce56923d1d7..0000000000000000000000000000000000000000 --- a/block-iscsi-use-MIN-between-mx_sb_len-and-sb_len_wr.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 547b06bb04287eb97ffb02e213aa8466c15cce65 Mon Sep 17 00:00:00 2001 -From: Chen Qun -Date: Mon, 16 Mar 2020 14:35:34 +0800 -Subject: [PATCH] block/iscsi: use MIN() between mx_sb_len and sb_len_wr - -Use MIN() macro between mx_sb_len and sb_len_wr the len for sbp copy data. - -Reported-by: Euler Robot -Signed-off-by: Chen Qun ---- - block/iscsi.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index 3f86aaf..5c3c598 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -989,8 +989,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, - acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE; - - acb->ioh->sb_len_wr = acb->task->datain.size - 2; -- ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ? -- acb->ioh->mx_sb_len : acb->ioh->sb_len_wr; -+ ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr); - memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss); - } - --- -1.8.3.1 - diff --git a/block-mirror-fix-file-system-went-to-read-only-after.patch b/block-mirror-fix-file-system-went-to-read-only-after.patch new file mode 100644 index 0000000000000000000000000000000000000000..b36f8cd870663ea2bb2c4a84d4d70f0527915ee5 --- /dev/null +++ b/block-mirror-fix-file-system-went-to-read-only-after.patch @@ -0,0 +1,32 @@ +From 6203b11d2a900c60d2ee3c3a980d2c385050eb62 Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 10 Feb 2022 21:37:49 +0800 +Subject: [PATCH] block/mirror: fix file-system went to read-only after + block-mirror + +config vm disk with prdm, keep the disk writing data continuously +during block-mirror, the file-system will went to read-only after +block-mirror, fix it. + +Signed-off-by: caojinhua +Signed-off-by: jiangdongxu +--- + block/mirror.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index cd9d3ad4a8..20b3e8e5d8 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1774,7 +1774,7 @@ static BlockJob *mirror_start_job( + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, +- BDRV_O_RDWR, errp); ++ BDRV_O_RDWR | BDRV_O_NOCACHE, errp); + if (mirror_top_bs == NULL) { + return NULL; + } +-- +2.27.0 + diff --git a/block-mirror-fix-use-after-free-of-local_err.patch b/block-mirror-fix-use-after-free-of-local_err.patch deleted file mode 100644 index ea2f739410164f7df43f020192cd60653a3b8cf0..0000000000000000000000000000000000000000 --- a/block-mirror-fix-use-after-free-of-local_err.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 682d23829adf0a872d5a3ca6eb4b31c424f558fc Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 24 Mar 2020 18:36:26 +0300 -Subject: [PATCH 09/14] block/mirror: fix use after free of local_err - -local_err is used again in mirror_exit_common() after -bdrv_set_backing_hd(), so we must zero it. Otherwise try to set -non-NULL local_err will crash. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200324153630.11882-3-vsementsov@virtuozzo.com> -Reviewed-by: Eric Blake -Reviewed-by: John Snow -Signed-off-by: Max Reitz -Signed-off-by: Peng Liang ---- - block/mirror.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/mirror.c b/block/mirror.c -index 681b305de650..ef6c958ff9b3 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -674,6 +674,7 @@ static int mirror_exit_common(Job *job) - bdrv_set_backing_hd(target_bs, backing, &local_err); - if (local_err) { - error_report_err(local_err); -+ local_err = NULL; - ret = -EPERM; - } - } --- -2.26.2 - diff --git a/block-nbd-extract-the-common-cleanup-code.patch b/block-nbd-extract-the-common-cleanup-code.patch deleted file mode 100644 index 4cc24818b7c96cc8c85a0c8ac97dc2452c234610..0000000000000000000000000000000000000000 --- a/block-nbd-extract-the-common-cleanup-code.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 1196a2079a558cbb673e06142fa67a401c5e6c30 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Thu, 5 Dec 2019 11:45:27 +0800 -Subject: [PATCH 6/9] block/nbd: extract the common cleanup code - -The BDRVNBDState cleanup code is common in two places, add -nbd_clear_bdrvstate() function to do these cleanups. - -Suggested-by: Stefano Garzarella -Signed-off-by: Pan Nengyuan -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <1575517528-44312-2-git-send-email-pannengyuan@huawei.com> -Reviewed-by: Eric Blake -[eblake: fix compilation error and commit message] -Signed-off-by: Eric Blake -Signed-off-by: AlexChen ---- - block/nbd.c | 19 ++++++++++++------- - 1 file changed, 12 insertions(+), 7 deletions(-) - -diff --git a/block/nbd.c b/block/nbd.c -index 57c1a20..3977b1e 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -73,6 +73,16 @@ typedef struct BDRVNBDState { - char *export, *tlscredsid; - } BDRVNBDState; - -+static void nbd_clear_bdrvstate(BDRVNBDState *s) -+{ -+ qapi_free_SocketAddress(s->saddr); -+ s->saddr = NULL; -+ g_free(s->export); -+ s->export = NULL; -+ g_free(s->tlscredsid); -+ s->tlscredsid = NULL; -+} -+ - static void nbd_recv_coroutines_wake_all(BDRVNBDState *s) - { - int i; -@@ -1640,9 +1650,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, - object_unref(OBJECT(tlscreds)); - } - if (ret < 0) { -- qapi_free_SocketAddress(s->saddr); -- g_free(s->export); -- g_free(s->tlscredsid); -+ nbd_clear_bdrvstate(s); - } - qemu_opts_del(opts); - return ret; -@@ -1692,10 +1700,7 @@ static void nbd_close(BlockDriverState *bs) - BDRVNBDState *s = bs->opaque; - - nbd_client_close(bs); -- -- qapi_free_SocketAddress(s->saddr); -- g_free(s->export); -- g_free(s->tlscredsid); -+ nbd_clear_bdrvstate(s); - } - - static int64_t nbd_getlength(BlockDriverState *bs) --- -1.8.3.1 - diff --git a/block-nfs-tear-down-aio-before-nfs_close.patch b/block-nfs-tear-down-aio-before-nfs_close.patch deleted file mode 100644 index ea116d0a381c18521da88b94d3ea914f0357939d..0000000000000000000000000000000000000000 --- a/block-nfs-tear-down-aio-before-nfs_close.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 0694c489cd240620fee5675e8d24c7ce02d1d67d Mon Sep 17 00:00:00 2001 -From: Peter Lieven -Date: Tue, 10 Sep 2019 17:41:09 +0200 -Subject: [PATCH] block/nfs: tear down aio before nfs_close - -nfs_close is a sync call from libnfs and has its own event -handler polling on the nfs FD. Avoid that both QEMU and libnfs -are intefering here. - -CC: qemu-stable@nongnu.org -Signed-off-by: Peter Lieven -Signed-off-by: Kevin Wolf -(cherry picked from commit 601dc6559725f7a614b6f893611e17ff0908e914) -Signed-off-by: Michael Roth ---- - block/nfs.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/block/nfs.c b/block/nfs.c -index d93241b3bb..2b7a078241 100644 ---- a/block/nfs.c -+++ b/block/nfs.c -@@ -390,12 +390,14 @@ static void nfs_attach_aio_context(BlockDriverState *bs, - static void nfs_client_close(NFSClient *client) - { - if (client->context) { -+ qemu_mutex_lock(&client->mutex); -+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), -+ false, NULL, NULL, NULL, NULL); -+ qemu_mutex_unlock(&client->mutex); - if (client->fh) { - nfs_close(client->context, client->fh); - client->fh = NULL; - } -- aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), -- false, NULL, NULL, NULL, NULL); - nfs_destroy_context(client->context); - client->context = NULL; - } --- -2.23.0 diff --git a/block-posix-Always-allocate-the-first-block.patch b/block-posix-Always-allocate-the-first-block.patch deleted file mode 100644 index 166d73957ce0569929276d446a1934be17cad612..0000000000000000000000000000000000000000 --- a/block-posix-Always-allocate-the-first-block.patch +++ /dev/null @@ -1,343 +0,0 @@ -From 3d018ff3bdd8aec260254036b600cfa8d694ced4 Mon Sep 17 00:00:00 2001 -From: Nir Soffer -Date: Tue, 27 Aug 2019 04:05:27 +0300 -Subject: [PATCH] block: posix: Always allocate the first block - -When creating an image with preallocation "off" or "falloc", the first -block of the image is typically not allocated. When using Gluster -storage backed by XFS filesystem, reading this block using direct I/O -succeeds regardless of request length, fooling alignment detection. - -In this case we fallback to a safe value (4096) instead of the optimal -value (512), which may lead to unneeded data copying when aligning -requests. Allocating the first block avoids the fallback. - -Since we allocate the first block even with preallocation=off, we no -longer create images with zero disk size: - - $ ./qemu-img create -f raw test.raw 1g - Formatting 'test.raw', fmt=raw size=1073741824 - - $ ls -lhs test.raw - 4.0K -rw-r--r--. 1 nsoffer nsoffer 1.0G Aug 16 23:48 test.raw - -And converting the image requires additional cluster: - - $ ./qemu-img measure -f raw -O qcow2 test.raw - required size: 458752 - fully allocated size: 1074135040 - -When using format like vmdk with multiple files per image, we allocate -one block per file: - - $ ./qemu-img create -f vmdk -o subformat=twoGbMaxExtentFlat test.vmdk 4g - Formatting 'test.vmdk', fmt=vmdk size=4294967296 compat6=off hwversion=undefined subformat=twoGbMaxExtentFlat - - $ ls -lhs test*.vmdk - 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f001.vmdk - 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f002.vmdk - 4.0K -rw-r--r--. 1 nsoffer nsoffer 353 Aug 27 03:23 test.vmdk - -I did quick performance test for copying disks with qemu-img convert to -new raw target image to Gluster storage with sector size of 512 bytes: - - for i in $(seq 10); do - rm -f dst.raw - sleep 10 - time ./qemu-img convert -f raw -O raw -t none -T none src.raw dst.raw - done - -Here is a table comparing the total time spent: - -Type Before(s) After(s) Diff(%) ---------------------------------------- -real 530.028 469.123 -11.4 -user 17.204 10.768 -37.4 -sys 17.881 7.011 -60.7 - -We can see very clear improvement in CPU usage. - -Signed-off-by: Nir Soffer -Message-id: 20190827010528.8818-2-nsoffer@redhat.com -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz - -(cherry picked from commit 3a20013fbb26d2a1bd11ef148eefdb1508783787) - -Signed-off-by: Michael Roth ---- - block/file-posix.c | 51 +++++++++++++++++++ - tests/qemu-iotests/059.out | 2 +- - tests/qemu-iotests/{150.out => 150.out.qcow2} | 0 - tests/qemu-iotests/150.out.raw | 12 +++++ - tests/qemu-iotests/175 | 19 ++++--- - tests/qemu-iotests/175.out | 8 +-- - tests/qemu-iotests/178.out.qcow2 | 4 +- - tests/qemu-iotests/221.out | 12 +++-- - tests/qemu-iotests/253.out | 12 +++-- - 9 files changed, 99 insertions(+), 21 deletions(-) - rename tests/qemu-iotests/{150.out => 150.out.qcow2} (100%) - create mode 100644 tests/qemu-iotests/150.out.raw - -diff --git a/block/file-posix.c b/block/file-posix.c -index be32dd8c51..2184aa980c 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1674,6 +1674,43 @@ static int handle_aiocb_discard(void *opaque) - return ret; - } - -+/* -+ * Help alignment probing by allocating the first block. -+ * -+ * When reading with direct I/O from unallocated area on Gluster backed by XFS, -+ * reading succeeds regardless of request length. In this case we fallback to -+ * safe alignment which is not optimal. Allocating the first block avoids this -+ * fallback. -+ * -+ * fd may be opened with O_DIRECT, but we don't know the buffer alignment or -+ * request alignment, so we use safe values. -+ * -+ * Returns: 0 on success, -errno on failure. Since this is an optimization, -+ * caller may ignore failures. -+ */ -+static int allocate_first_block(int fd, size_t max_size) -+{ -+ size_t write_size = (max_size < MAX_BLOCKSIZE) -+ ? BDRV_SECTOR_SIZE -+ : MAX_BLOCKSIZE; -+ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); -+ void *buf; -+ ssize_t n; -+ int ret; -+ -+ buf = qemu_memalign(max_align, write_size); -+ memset(buf, 0, write_size); -+ -+ do { -+ n = pwrite(fd, buf, write_size, 0); -+ } while (n == -1 && errno == EINTR); -+ -+ ret = (n == -1) ? -errno : 0; -+ -+ qemu_vfree(buf); -+ return ret; -+} -+ - static int handle_aiocb_truncate(void *opaque) - { - RawPosixAIOData *aiocb = opaque; -@@ -1713,6 +1750,17 @@ static int handle_aiocb_truncate(void *opaque) - /* posix_fallocate() doesn't set errno. */ - error_setg_errno(errp, -result, - "Could not preallocate new data"); -+ } else if (current_length == 0) { -+ /* -+ * posix_fallocate() uses fallocate() if the filesystem -+ * supports it, or fallback to manually writing zeroes. If -+ * fallocate() was used, unaligned reads from the fallocated -+ * area in raw_probe_alignment() will succeed, hence we need to -+ * allocate the first block. -+ * -+ * Optimize future alignment probing; ignore failures. -+ */ -+ allocate_first_block(fd, offset); - } - } else { - result = 0; -@@ -1774,6 +1822,9 @@ static int handle_aiocb_truncate(void *opaque) - if (ftruncate(fd, offset) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); -+ } else if (current_length == 0 && offset > current_length) { -+ /* Optimize future alignment probing; ignore failures. */ -+ allocate_first_block(fd, offset); - } - return result; - default: -diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out -index 4fab42a28c..fe3f861f3c 100644 ---- a/tests/qemu-iotests/059.out -+++ b/tests/qemu-iotests/059.out -@@ -27,7 +27,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000 subformat=twoGbMax - image: TEST_DIR/t.vmdk - file format: vmdk - virtual size: 0.977 TiB (1073741824000 bytes) --disk size: 16 KiB -+disk size: 1.97 MiB - Format specific information: - cid: XXXXXXXX - parent cid: XXXXXXXX -diff --git a/tests/qemu-iotests/150.out b/tests/qemu-iotests/150.out.qcow2 -similarity index 100% -rename from tests/qemu-iotests/150.out -rename to tests/qemu-iotests/150.out.qcow2 -diff --git a/tests/qemu-iotests/150.out.raw b/tests/qemu-iotests/150.out.raw -new file mode 100644 -index 0000000000..3cdc7727a5 ---- /dev/null -+++ b/tests/qemu-iotests/150.out.raw -@@ -0,0 +1,12 @@ -+QA output created by 150 -+ -+=== Mapping sparse conversion === -+ -+Offset Length File -+0 0x1000 TEST_DIR/t.IMGFMT -+ -+=== Mapping non-sparse conversion === -+ -+Offset Length File -+0 0x100000 TEST_DIR/t.IMGFMT -+*** done -diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 -index 51e62c8276..7ba28b3c1b 100755 ---- a/tests/qemu-iotests/175 -+++ b/tests/qemu-iotests/175 -@@ -37,14 +37,16 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 - # the file size. This function hides the resulting difference in the - # stat -c '%b' output. - # Parameter 1: Number of blocks an empty file occupies --# Parameter 2: Image size in bytes -+# Parameter 2: Minimal number of blocks in an image -+# Parameter 3: Image size in bytes - _filter_blocks() - { - extra_blocks=$1 -- img_size=$2 -+ min_blocks=$2 -+ img_size=$3 - -- sed -e "s/blocks=$extra_blocks\\(\$\\|[^0-9]\\)/nothing allocated/" \ -- -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/everything allocated/" -+ sed -e "s/blocks=$min_blocks\\(\$\\|[^0-9]\\)/min allocation/" \ -+ -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/" - } - - # get standard environment, filters and checks -@@ -60,16 +62,21 @@ size=$((1 * 1024 * 1024)) - touch "$TEST_DIR/empty" - extra_blocks=$(stat -c '%b' "$TEST_DIR/empty") - -+# We always write the first byte; check how many blocks this filesystem -+# allocates to match empty image alloation. -+printf "\0" > "$TEST_DIR/empty" -+min_blocks=$(stat -c '%b' "$TEST_DIR/empty") -+ - echo - echo "== creating image with default preallocation ==" - _make_test_img $size | _filter_imgfmt --stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size -+stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size - - for mode in off full falloc; do - echo - echo "== creating image with preallocation $mode ==" - IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt -- stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size -+ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size - done - - # success, all done -diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out -index 6d9a5ed84e..263e521262 100644 ---- a/tests/qemu-iotests/175.out -+++ b/tests/qemu-iotests/175.out -@@ -2,17 +2,17 @@ QA output created by 175 - - == creating image with default preallocation == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 --size=1048576, nothing allocated -+size=1048576, min allocation - - == creating image with preallocation off == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=off --size=1048576, nothing allocated -+size=1048576, min allocation - - == creating image with preallocation full == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=full --size=1048576, everything allocated -+size=1048576, max allocation - - == creating image with preallocation falloc == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc --size=1048576, everything allocated -+size=1048576, max allocation - *** done -diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 -index 55a8dc926f..9e7d8c44df 100644 ---- a/tests/qemu-iotests/178.out.qcow2 -+++ b/tests/qemu-iotests/178.out.qcow2 -@@ -101,7 +101,7 @@ converted image file size in bytes: 196608 - == raw input image with data (human) == - - Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 --required size: 393216 -+required size: 458752 - fully allocated size: 1074135040 - wrote 512/512 bytes at offset 512 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -@@ -257,7 +257,7 @@ converted image file size in bytes: 196608 - - Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 - { -- "required": 393216, -+ "required": 458752, - "fully-allocated": 1074135040 - } - wrote 512/512 bytes at offset 512 -diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out -index 9f9dd52bb0..dca024a0c3 100644 ---- a/tests/qemu-iotests/221.out -+++ b/tests/qemu-iotests/221.out -@@ -3,14 +3,18 @@ QA output created by 221 - === Check mapping of unaligned raw image === - - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537 --[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] --[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] - wrote 1/1 bytes at offset 65536 - 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, - { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] --[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, - { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] - *** done -diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out -index 607c0baa0b..3d08b305d7 100644 ---- a/tests/qemu-iotests/253.out -+++ b/tests/qemu-iotests/253.out -@@ -3,12 +3,16 @@ QA output created by 253 - === Check mapping of unaligned raw image === - - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 --[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] --[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] - wrote 65535/65535 bytes at offset 983040 - 63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] --[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] - *** done --- -2.23.0 diff --git a/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch b/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch deleted file mode 100644 index f77cc06c60dd36ccd84a5ad5c5e9748bb2126c08..0000000000000000000000000000000000000000 --- a/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 84f22c728520792f1010074e0d5ac2ec8e2e372c Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Sun, 15 Sep 2019 23:36:53 +0300 -Subject: [PATCH] block/qcow2: Fix corruption introduced by commit 8ac0f15f335 - -This fixes subtle corruption introduced by luks threaded encryption -in commit 8ac0f15f335 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1745922 - -The corruption happens when we do a write that - * writes to two or more unallocated clusters at once - * doesn't fully cover the first sector - * doesn't fully cover the last sector - * uses luks encryption - -In this case, when allocating the new clusters we COW both areas -prior to the write and after the write, and we encrypt them. - -The above mentioned commit accidentally made it so we encrypt the -second COW area using the physical cluster offset of the first area. - -The problem is that offset_in_cluster in do_perform_cow_encrypt -can be larger that the cluster size, thus cluster_offset -will no longer point to the start of the cluster at which encrypted -area starts. - -Next patch in this series will refactor the code to avoid all these -assumptions. - -In the bugreport that was triggered by rebasing a luks image to new, -zero filled base, which lot of such writes, and causes some files -with zero areas to contain garbage there instead. -But as described above it can happen elsewhere as well - -Signed-off-by: Maxim Levitsky -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-id: 20190915203655.21638-2-mlevitsk@redhat.com -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 38e7d54bdc518b5a05a922467304bcace2396945) -Signed-off-by: Michael Roth ---- - block/qcow2-cluster.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index cc5609e27a..760564c8fb 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -473,9 +473,10 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, - assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); - assert((bytes & ~BDRV_SECTOR_MASK) == 0); - assert(s->crypto); -- if (qcow2_co_encrypt(bs, cluster_offset, -- src_cluster_offset + offset_in_cluster, -- buffer, bytes) < 0) { -+ if (qcow2_co_encrypt(bs, -+ start_of_cluster(s, cluster_offset + offset_in_cluster), -+ src_cluster_offset + offset_in_cluster, -+ buffer, bytes) < 0) { - return false; - } - } --- -2.23.0 diff --git a/block-qcow2-do-free-crypto_opts-in-qcow2_close.patch b/block-qcow2-do-free-crypto_opts-in-qcow2_close.patch deleted file mode 100644 index 44b0ea19e95b95bbd583034e9c830e3dd6d647e6..0000000000000000000000000000000000000000 --- a/block-qcow2-do-free-crypto_opts-in-qcow2_close.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 88ef4e1862987227f8b87228cff94be3af66d054 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Thu, 27 Feb 2020 09:29:49 +0800 -Subject: [PATCH 01/14] block/qcow2: do free crypto_opts in qcow2_close() - -'crypto_opts' forgot to free in qcow2_close(), this patch fix the bellow leak stack: - -Direct leak of 24 byte(s) in 1 object(s) allocated from: - #0 0x7f0edd81f970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) - #1 0x7f0edc6d149d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) - #2 0x55d7eaede63d in qobject_input_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qobject-input-visitor.c:295 - #3 0x55d7eaed78b8 in visit_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qapi-visit-core.c:49 - #4 0x55d7eaf5140b in visit_type_QCryptoBlockOpenOptions qapi/qapi-visit-crypto.c:290 - #5 0x55d7eae43af3 in block_crypto_open_opts_init /mnt/sdb/qemu-new/qemu_test/qemu/block/crypto.c:163 - #6 0x55d7eacd2924 in qcow2_update_options_prepare /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1148 - #7 0x55d7eacd33f7 in qcow2_update_options /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1232 - #8 0x55d7eacd9680 in qcow2_do_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1512 - #9 0x55d7eacdc55e in qcow2_open_entry /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1792 - #10 0x55d7eacdc8fe in qcow2_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1819 - #11 0x55d7eac3742d in bdrv_open_driver /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1317 - #12 0x55d7eac3e990 in bdrv_open_common /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1575 - #13 0x55d7eac4442c in bdrv_open_inherit /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3126 - #14 0x55d7eac45c3f in bdrv_open /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3219 - #15 0x55d7ead8e8a4 in blk_new_open /mnt/sdb/qemu-new/qemu_test/qemu/block/block-backend.c:397 - #16 0x55d7eacde74c in qcow2_co_create /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3534 - #17 0x55d7eacdfa6d in qcow2_co_create_opts /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3668 - #18 0x55d7eac1c678 in bdrv_create_co_entry /mnt/sdb/qemu-new/qemu_test/qemu/block.c:485 - #19 0x55d7eb0024d2 in coroutine_trampoline /mnt/sdb/qemu-new/qemu_test/qemu/util/coroutine-ucontext.c:115 - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Reviewed-by: Max Reitz -Message-Id: <20200227012950.12256-2-pannengyuan@huawei.com> -Signed-off-by: Max Reitz -Signed-off-by: Peng Liang ---- - block/qcow2.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 1909df6e1d24..27c54b9905aa 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -2408,6 +2408,7 @@ static void qcow2_close(BlockDriverState *bs) - - qcrypto_block_free(s->crypto); - s->crypto = NULL; -+ qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); - - g_free(s->unknown_header_fields); - cleanup_unknown_header_ext(bs); --- -2.26.2 - diff --git a/block-qcow2-threads-fix-qcow2_decompress.patch b/block-qcow2-threads-fix-qcow2_decompress.patch deleted file mode 100644 index d2fd9ee74fc5b57fb57ee1f655763895f1fe4356..0000000000000000000000000000000000000000 --- a/block-qcow2-threads-fix-qcow2_decompress.patch +++ /dev/null @@ -1,75 +0,0 @@ -From a583b6b616b086d3fdce93e255d24ab2c865efd3 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Mon, 2 Mar 2020 18:09:30 +0300 -Subject: [PATCH 03/14] block/qcow2-threads: fix qcow2_decompress -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -On success path we return what inflate() returns instead of 0. And it -most probably works for Z_STREAM_END as it is positive, but is -definitely broken for Z_BUF_ERROR. - -While being here, switch to errno return code, to be closer to -qcow2_compress API (and usual expectations). - -Revert condition in if to be more positive. Drop dead initialization of -ret. - -Cc: qemu-stable@nongnu.org # v4.0 -Fixes: 341926ab83e2b -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200302150930.16218-1-vsementsov@virtuozzo.com> -Reviewed-by: Alberto Garcia -Reviewed-by: Ján Tomko -Signed-off-by: Max Reitz -Signed-off-by: Peng Liang ---- - block/qcow2-threads.c | 12 +++++++----- - 1 file changed, 7 insertions(+), 5 deletions(-) - -diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c -index 3b1e63fe414d..449cd3c0a1f4 100644 ---- a/block/qcow2-threads.c -+++ b/block/qcow2-threads.c -@@ -128,12 +128,12 @@ static ssize_t qcow2_compress(void *dest, size_t dest_size, - * @src - source buffer, @src_size bytes - * - * Returns: 0 on success -- * -1 on fail -+ * -EIO on fail - */ - static ssize_t qcow2_decompress(void *dest, size_t dest_size, - const void *src, size_t src_size) - { -- int ret = 0; -+ int ret; - z_stream strm; - - memset(&strm, 0, sizeof(strm)); -@@ -144,17 +144,19 @@ static ssize_t qcow2_decompress(void *dest, size_t dest_size, - - ret = inflateInit2(&strm, -12); - if (ret != Z_OK) { -- return -1; -+ return -EIO; - } - - ret = inflate(&strm, Z_FINISH); -- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) { -+ if ((ret == Z_STREAM_END || ret == Z_BUF_ERROR) && strm.avail_out == 0) { - /* - * We approve Z_BUF_ERROR because we need @dest buffer to be filled, but - * @src buffer may be processed partly (because in qcow2 we know size of - * compressed data with precision of one sector) - */ -- ret = -1; -+ ret = 0; -+ } else { -+ ret = -EIO; - } - - inflateEnd(&strm); --- -2.26.2 - diff --git a/block-snapshot-Restrict-set-of-snapshot-nodes.patch b/block-snapshot-Restrict-set-of-snapshot-nodes.patch deleted file mode 100644 index c29f30adc897f5b60bf8004b7f317b6e6257bf3a..0000000000000000000000000000000000000000 --- a/block-snapshot-Restrict-set-of-snapshot-nodes.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 7a8aa6c734bb1c2927ad0cc1d10bcacb53cf4ae3 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 17 Sep 2019 12:26:23 +0200 -Subject: [PATCH] block/snapshot: Restrict set of snapshot nodes - -Nodes involved in internal snapshots were those that were returned by -bdrv_next(), inserted and not read-only. bdrv_next() in turn returns all -nodes that are either the root node of a BlockBackend or monitor-owned -nodes. - -With the typical -drive use, this worked well enough. However, in the -typical -blockdev case, the user defines one node per option, making all -nodes monitor-owned nodes. This includes protocol nodes etc. which often -are not snapshottable, so "savevm" only returns an error. - -Change the conditions so that internal snapshot still include all nodes -that have a BlockBackend attached (we definitely want to snapshot -anything attached to a guest device and probably also the built-in NBD -server; snapshotting block job BlockBackends is more of an accident, but -a preexisting one), but other monitor-owned nodes are only included if -they have no parents. - -This makes internal snapshots usable again with typical -blockdev -configurations. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Peter Krempa -Tested-by: Peter Krempa -(cherry picked from commit 05f4aced658a02b02d3e89a6c7a2281008fcf26c) -Signed-off-by: Michael Roth ---- - block/snapshot.c | 26 +++++++++++++++++++------- - 1 file changed, 19 insertions(+), 7 deletions(-) - -diff --git a/block/snapshot.c b/block/snapshot.c -index f2f48f926a..8081616ae9 100644 ---- a/block/snapshot.c -+++ b/block/snapshot.c -@@ -31,6 +31,7 @@ - #include "qapi/qmp/qerror.h" - #include "qapi/qmp/qstring.h" - #include "qemu/option.h" -+#include "sysemu/block-backend.h" - - QemuOptsList internal_snapshot_opts = { - .name = "snapshot", -@@ -384,6 +385,16 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, - return ret; - } - -+static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs) -+{ -+ if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { -+ return false; -+ } -+ -+ /* Include all nodes that are either in use by a BlockBackend, or that -+ * aren't attached to any node, but owned by the monitor. */ -+ return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); -+} - - /* Group operations. All block drivers are involved. - * These functions will properly handle dataplane (take aio_context_acquire -@@ -399,7 +410,7 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) - AioContext *ctx = bdrv_get_aio_context(bs); - - aio_context_acquire(ctx); -- if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) { -+ if (bdrv_all_snapshots_includes_bs(bs)) { - ok = bdrv_can_snapshot(bs); - } - aio_context_release(ctx); -@@ -426,8 +437,9 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, - AioContext *ctx = bdrv_get_aio_context(bs); - - aio_context_acquire(ctx); -- if (bdrv_can_snapshot(bs) && -- bdrv_snapshot_find(bs, snapshot, name) >= 0) { -+ if (bdrv_all_snapshots_includes_bs(bs) && -+ bdrv_snapshot_find(bs, snapshot, name) >= 0) -+ { - ret = bdrv_snapshot_delete(bs, snapshot->id_str, - snapshot->name, err); - } -@@ -455,7 +467,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, - AioContext *ctx = bdrv_get_aio_context(bs); - - aio_context_acquire(ctx); -- if (bdrv_can_snapshot(bs)) { -+ if (bdrv_all_snapshots_includes_bs(bs)) { - ret = bdrv_snapshot_goto(bs, name, errp); - } - aio_context_release(ctx); -@@ -481,7 +493,7 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) - AioContext *ctx = bdrv_get_aio_context(bs); - - aio_context_acquire(ctx); -- if (bdrv_can_snapshot(bs)) { -+ if (bdrv_all_snapshots_includes_bs(bs)) { - err = bdrv_snapshot_find(bs, &sn, name); - } - aio_context_release(ctx); -@@ -512,7 +524,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, - if (bs == vm_state_bs) { - sn->vm_state_size = vm_state_size; - err = bdrv_snapshot_create(bs, sn); -- } else if (bdrv_can_snapshot(bs)) { -+ } else if (bdrv_all_snapshots_includes_bs(bs)) { - sn->vm_state_size = 0; - err = bdrv_snapshot_create(bs, sn); - } -@@ -538,7 +550,7 @@ BlockDriverState *bdrv_all_find_vmstate_bs(void) - bool found; - - aio_context_acquire(ctx); -- found = bdrv_can_snapshot(bs); -+ found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs); - aio_context_release(ctx); - - if (found) { --- -2.23.0 diff --git a/block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch b/block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch new file mode 100644 index 0000000000000000000000000000000000000000..8acce11890dc45ec4be509e207a1b65c6f0111f3 --- /dev/null +++ b/block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch @@ -0,0 +1,49 @@ +From b54d853396820150735294107e2e3d060724de04 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Mon, 8 Apr 2024 14:39:43 +0800 +Subject: [PATCH] block/virtio-blk: Fix memory leak from virtio_blk_zone_report + +cheery-pick from bbdf9023665f409113cb07b463732861af63fb47 + +This modification ensures that in scenarios where the buffer size is +insufficient for a zone report, the function will now properly set an +error status and proceed to a cleanup label, instead of merely +returning. + +The following ASAN log reveals it: + +==1767400==ERROR: LeakSanitizer: detected memory leaks +Direct leak of 312 byte(s) in 1 object(s) allocated from: + #0 0x64ac7b3280cd in malloc llvm/compiler-rt/lib/asan/asan_malloc_linux.cpp:129:3 + #1 0x735b02fb9738 in g_malloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x5e738) + #2 0x64ac7d23be96 in virtqueue_split_pop hw/virtio/virtio.c:1612:12 + #3 0x64ac7d23728a in virtqueue_pop hw/virtio/virtio.c:1783:16 + #4 0x64ac7cfcaacd in virtio_blk_get_request hw/block/virtio-blk.c:228:27 + #5 0x64ac7cfca7c7 in virtio_blk_handle_vq hw/block/virtio-blk.c:1123:23 + #6 0x64ac7cfecb95 in virtio_blk_handle_output hw/block/virtio-blk.c:1157:5 + +Signed-off-by: Zheyu Ma +Message-id: 20240404120040.1951466-1-zheyuma97@gmail.com +Signed-off-by: Stefan Hajnoczi +Signed-off-by: qihao_yewu +--- + hw/block/virtio-blk.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 1ebc9188c0..2eb096a6dc 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -790,7 +790,8 @@ static void virtio_blk_handle_zone_report(VirtIOBlockReq *req, + sizeof(struct virtio_blk_zone_report) + + sizeof(struct virtio_blk_zone_descriptor)) { + virtio_error(vdev, "in buffer too small for zone report"); +- return; ++ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; ++ goto out; + } + + /* start byte offset of the zone report */ +-- +2.27.0 + diff --git a/block-vvfat-Fix-bad-printf-format-specifiers.patch b/block-vvfat-Fix-bad-printf-format-specifiers.patch deleted file mode 100644 index 597b9782683778c799e3192fd66d27eb230eee8e..0000000000000000000000000000000000000000 --- a/block-vvfat-Fix-bad-printf-format-specifiers.patch +++ /dev/null @@ -1,77 +0,0 @@ -From c9a4e85610bffe1803648c431e4cff4539a42323 Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Tue, 3 Nov 2020 17:42:56 +0800 -Subject: [PATCH] block/vvfat: Fix bad printf format specifiers - -We should use printf format specifier "%u" instead of "%d" for -argument of type "unsigned int". -In addition, fix two error format problems found by checkpatch.pl: -ERROR: space required after that ',' (ctx:VxV) -+ fprintf(stderr,"%s attributes=0x%02x begin=%u size=%d\n", - ^ -ERROR: line over 90 characters -+ fprintf(stderr, "%d, %s (%u, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action); - -Reported-by: Euler Robot -Signed-off-by: Alex Chen -Message-Id: <5FA12620.6030705@huawei.com> -Signed-off-by: Kevin Wolf -(cherry-picked from commit c9eb2f3e38) ---- - block/vvfat.c | 12 +++++++----- - 1 file changed, 7 insertions(+), 5 deletions(-) - -diff --git a/block/vvfat.c b/block/vvfat.c -index f6c28805dd..5dc8d6eb4c 100644 ---- a/block/vvfat.c -+++ b/block/vvfat.c -@@ -1453,7 +1453,7 @@ static void print_direntry(const direntry_t* direntry) - for(i=0;i<11;i++) - ADD_CHAR(direntry->name[i]); - buffer[j] = 0; -- fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n", -+ fprintf(stderr, "%s attributes=0x%02x begin=%u size=%u\n", - buffer, - direntry->attributes, - begin_of_direntry(direntry),le32_to_cpu(direntry->size)); -@@ -1462,7 +1462,7 @@ static void print_direntry(const direntry_t* direntry) - - static void print_mapping(const mapping_t* mapping) - { -- fprintf(stderr, "mapping (%p): begin, end = %d, %d, dir_index = %d, " -+ fprintf(stderr, "mapping (%p): begin, end = %u, %u, dir_index = %u, " - "first_mapping_index = %d, name = %s, mode = 0x%x, " , - mapping, mapping->begin, mapping->end, mapping->dir_index, - mapping->first_mapping_index, mapping->path, mapping->mode); -@@ -1470,7 +1470,7 @@ static void print_mapping(const mapping_t* mapping) - if (mapping->mode & MODE_DIRECTORY) - fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index); - else -- fprintf(stderr, "offset = %d\n", mapping->info.file.offset); -+ fprintf(stderr, "offset = %u\n", mapping->info.file.offset); - } - #endif - -@@ -1604,7 +1604,7 @@ typedef struct commit_t { - static void clear_commits(BDRVVVFATState* s) - { - int i; --DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next)); -+DLOG(fprintf(stderr, "clear_commits (%u commits)\n", s->commits.next)); - for (i = 0; i < s->commits.next; i++) { - commit_t* commit = array_get(&(s->commits), i); - assert(commit->path || commit->action == ACTION_WRITEOUT); -@@ -2660,7 +2660,9 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s) - fprintf(stderr, "handle_renames\n"); - for (i = 0; i < s->commits.next; i++) { - commit_t* commit = array_get(&(s->commits), i); -- fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action); -+ fprintf(stderr, "%d, %s (%u, %d)\n", i, -+ commit->path ? commit->path : "(null)", -+ commit->param.rename.cluster, commit->action); - } - #endif - --- -2.27.0 - diff --git a/blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/blockdev-Return-bs-to-the-proper-context-on-snapshot.patch deleted file mode 100644 index a232c7450689ae13c12e215803a7d1a2bca4c158..0000000000000000000000000000000000000000 --- a/blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +++ /dev/null @@ -1,93 +0,0 @@ -From dc6b61f12750b3ab5a3965af2ec758750389233d Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Wed, 8 Jan 2020 15:31:37 +0100 -Subject: [PATCH] blockdev: Return bs to the proper context on snapshot abort - -external_snapshot_abort() calls to bdrv_set_backing_hd(), which -returns state->old_bs to the main AioContext, as it's intended to be -used then the BDS is going to be released. As that's not the case when -aborting an external snapshot, return it to the AioContext it was -before the call. - -This issue can be triggered by issuing a transaction with two actions, -a proper blockdev-snapshot-sync and a bogus one, so the second will -trigger a transaction abort. This results in a crash with an stack -trace like this one: - - #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 - #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 - #2 0x00007fa10489cbc9 in __assert_fail_base - (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 - #3 0x00007fa1048aae96 in __GI___assert_fail - (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 - #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 - #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 - #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 - #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 - #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 - #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 - #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 - #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 - #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 - #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 - #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 - #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 - #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 - #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 - #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 - #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 - #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 - #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 - #23 0x0000557223d13201 in main_loop () at vl.c:1828 - #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf ---- - blockdev.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/blockdev.c b/blockdev.c -index 5088541591..79112be2e6 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1774,6 +1774,8 @@ static void external_snapshot_abort(BlkActionState *common) - if (state->new_bs) { - if (state->overlay_appended) { - AioContext *aio_context; -+ AioContext *tmp_context; -+ int ret; - - aio_context = bdrv_get_aio_context(state->old_bs); - aio_context_acquire(aio_context); -@@ -1781,6 +1783,25 @@ static void external_snapshot_abort(BlkActionState *common) - bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() - close state->old_bs; we need it */ - bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); -+ -+ /* -+ * The call to bdrv_set_backing_hd() above returns state->old_bs to -+ * the main AioContext. As we're still going to be using it, return -+ * it to the AioContext it was before. -+ */ -+ tmp_context = bdrv_get_aio_context(state->old_bs); -+ if (aio_context != tmp_context) { -+ aio_context_release(aio_context); -+ aio_context_acquire(tmp_context); -+ -+ ret = bdrv_try_set_aio_context(state->old_bs, -+ aio_context, NULL); -+ assert(ret == 0); -+ -+ aio_context_release(tmp_context); -+ aio_context_acquire(aio_context); -+ } -+ - bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ - --- -2.27.0 - diff --git a/blockdev-backup-utilize-do_backup_common.patch b/blockdev-backup-utilize-do_backup_common.patch deleted file mode 100644 index 6827b221978c2646d97f15e62b42b8eb34282bea..0000000000000000000000000000000000000000 --- a/blockdev-backup-utilize-do_backup_common.patch +++ /dev/null @@ -1,105 +0,0 @@ -From e5456acf2332efd0ed6106eb13cf24e6bca1ee64 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:52 -0400 -Subject: [PATCH] blockdev-backup: utilize do_backup_common - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190709232550.10724-4-jsnow@redhat.com -Signed-off-by: John Snow ---- - blockdev.c | 65 +++++------------------------------------------------- - 1 file changed, 6 insertions(+), 59 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index a29838a1c8..aa15ed1f00 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3668,78 +3668,25 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, - { - BlockDriverState *bs; - BlockDriverState *target_bs; -- Error *local_err = NULL; -- BdrvDirtyBitmap *bmap = NULL; - AioContext *aio_context; -- BlockJob *job = NULL; -- int job_flags = JOB_DEFAULT; -- int ret; -- -- if (!backup->has_speed) { -- backup->speed = 0; -- } -- if (!backup->has_on_source_error) { -- backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT; -- } -- if (!backup->has_on_target_error) { -- backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT; -- } -- if (!backup->has_job_id) { -- backup->job_id = NULL; -- } -- if (!backup->has_auto_finalize) { -- backup->auto_finalize = true; -- } -- if (!backup->has_auto_dismiss) { -- backup->auto_dismiss = true; -- } -- if (!backup->has_compress) { -- backup->compress = false; -- } -+ BlockJob *job; - - bs = bdrv_lookup_bs(backup->device, backup->device, errp); - if (!bs) { - return NULL; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); - if (!target_bs) { -- goto out; -+ return NULL; - } - -- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -- if (ret < 0) { -- goto out; -- } -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); - -- if (backup->has_bitmap) { -- bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); -- if (!bmap) { -- error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); -- goto out; -- } -- if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { -- goto out; -- } -- } -+ job = do_backup_common(qapi_BlockdevBackup_base(backup), -+ bs, target_bs, aio_context, txn, errp); - -- if (!backup->auto_finalize) { -- job_flags |= JOB_MANUAL_FINALIZE; -- } -- if (!backup->auto_dismiss) { -- job_flags |= JOB_MANUAL_DISMISS; -- } -- job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, -- backup->sync, bmap, backup->compress, -- backup->on_source_error, backup->on_target_error, -- job_flags, NULL, NULL, txn, &local_err); -- if (local_err != NULL) { -- error_propagate(errp, local_err); -- } --out: - aio_context_release(aio_context); - return job; - } --- -2.27.0 - diff --git a/blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/blockdev-fix-coding-style-issues-in-drive_backup_pre.patch deleted file mode 100644 index e915b05a415c2e2fc76ac0e58be7cc819f457533..0000000000000000000000000000000000000000 --- a/blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +++ /dev/null @@ -1,44 +0,0 @@ -From ffbf1e237d0311512c411e195278e69d710fb9cf Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Wed, 8 Jan 2020 15:31:31 +0100 -Subject: [PATCH] blockdev: fix coding style issues in drive_backup_prepare - -Fix a couple of minor coding style issues in drive_backup_prepare. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - blockdev.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 4435795b6d..99b1cafb8f 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3597,7 +3597,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - if (!backup->has_format) { - backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char*) bs->drv->format_name; -+ NULL : (char *) bs->drv->format_name; - } - - /* Early check to avoid creating target */ -@@ -3607,8 +3607,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - flags = bs->open_flags | BDRV_O_RDWR; - -- /* See if we have a backing HD we can use to create our new image -- * on top of. */ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ - if (backup->sync == MIRROR_SYNC_MODE_TOP) { - source = backing_bs(bs); - if (!source) { --- -2.27.0 - diff --git a/blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/blockdev-honor-bdrv_try_set_aio_context-context-requ.patch deleted file mode 100644 index 970057179e504e1961c766d0a68107816c750721..0000000000000000000000000000000000000000 --- a/blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +++ /dev/null @@ -1,191 +0,0 @@ -From 64c6b3b911f65c19f3a235c8394f5db894c1ee6a Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Wed, 8 Jan 2020 15:31:34 +0100 -Subject: [PATCH] blockdev: honor bdrv_try_set_aio_context() context - requirements - -bdrv_try_set_aio_context() requires that the old context is held, and -the new context is not held. Fix all the occurrences where it's not -done this way. - -Suggested-by: Max Reitz -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf ---- - blockdev.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++------- - 1 file changed, 60 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index d3309c205a..5088541591 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1578,6 +1578,7 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -+ AioContext *old_context; - int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar -@@ -1718,7 +1719,16 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(state->new_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } -@@ -1818,11 +1828,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *target_bs; - BlockDriverState *source = NULL; - AioContext *aio_context; -+ AioContext *old_context; - QDict *options; - Error *local_err = NULL; - int flags; - int64_t size; - bool set_backing_hd = false; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; -@@ -1911,6 +1923,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ bdrv_unref(target_bs); -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (set_backing_hd) { - bdrv_set_backing_hd(target_bs, source, &local_err); - if (local_err) { -@@ -1990,6 +2017,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -2004,7 +2033,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ - aio_context = bdrv_get_aio_context(bs); -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); - aio_context_acquire(aio_context); - state->bs = bs; - -@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - BlockJob *job = NULL; - BdrvDirtyBitmap *bmap = NULL; - int job_flags = JOB_DEFAULT; -- int ret; - - if (!backup->has_speed) { - backup->speed = 0; -@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - backup->compress = false; - } - -- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -- if (ret < 0) { -- return NULL; -- } -- - if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || - (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { - /* done before desugaring 'incremental' to print the right message */ -@@ -3802,6 +3836,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - BlockDriverState *bs; - BlockDriverState *source, *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode; - Error *local_err = NULL; - QDict *options = NULL; -@@ -3914,12 +3949,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - (arg->mode == NEW_IMAGE_MODE_EXISTING || - !bdrv_has_zero_init(target_bs))); - -+ -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- goto out; -+ aio_context_release(old_context); -+ return; - } - -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, - arg->has_replaces, arg->replaces, arg->sync, - backing_mode, zero_target, -@@ -3961,6 +4006,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - Error *local_err = NULL; - bool zero_target; -@@ -3978,10 +4024,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - - zero_target = (sync == MIRROR_SYNC_MODE_FULL); - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -+ aio_context_acquire(old_context); - - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } --- -2.27.0 - diff --git a/blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch deleted file mode 100644 index 84e29fffabed38135f24a1e83cbe12538481dbac..0000000000000000000000000000000000000000 --- a/blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 6d89e4923e9c341975dbfdd2bae153ba367a1b79 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Wed, 8 Jan 2020 15:31:33 +0100 -Subject: [PATCH] blockdev: unify qmp_blockdev_backup and blockdev-backup - transaction paths - -Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_blockdev_backup() and -blockdev_backup_prepare(). - -This change unifies both paths, merging do_blockdev_backup() and -blockdev_backup_prepare(), and changing qmp_blockdev_backup() to -create a transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_blockdev_backup() is executed inside a -drained section, as it happens when creating a blockdev-backup -transaction. This change is visible from the user's perspective, as -the job gets paused and immediately resumed before starting the actual -work. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - blockdev.c | 60 ++++++++++++------------------------------------------ - 1 file changed, 13 insertions(+), 47 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 7016054688..d3309c205a 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1983,16 +1983,13 @@ typedef struct BlockdevBackupState { - BlockJob *job; - } BlockdevBackupState; - --static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp); -- - static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); - BlockdevBackup *backup; -- BlockDriverState *bs, *target; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; - AioContext *aio_context; -- Error *local_err = NULL; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -2002,8 +1999,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -- target = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target) { -+ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -+ if (!target_bs) { - return; - } - -@@ -2014,13 +2011,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - /* Paired with .clean() */ - bdrv_drained_begin(state->bs); - -- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -+ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); - --out: - aio_context_release(aio_context); - } - -@@ -3672,41 +3666,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) - return bdrv_get_xdbg_block_graph(errp); - } - --BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp) -+void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) - { -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- AioContext *aio_context; -- BlockJob *job; -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target_bs) { -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- job = do_backup_common(qapi_BlockdevBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- -- aio_context_release(aio_context); -- return job; --} -- --void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) --{ -- BlockJob *job; -- job = do_blockdev_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, -+ .u.blockdev_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - /* Parameter check and block job starting for drive mirroring. --- -2.27.0 - diff --git a/blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch deleted file mode 100644 index aefa05e921b7d66a09995716f8176367817d434c..0000000000000000000000000000000000000000 --- a/blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +++ /dev/null @@ -1,406 +0,0 @@ -From 952f7f53cdd4320d1a0328481fa578dd199eb1ce Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Wed, 8 Jan 2020 15:31:32 +0100 -Subject: [PATCH] blockdev: unify qmp_drive_backup and drive-backup transaction - paths - -Issuing a drive-backup from qmp_drive_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_drive_backup() and -drive_backup_prepare(). - -This change unifies both paths, merging do_drive_backup() and -drive_backup_prepare(), and changing qmp_drive_backup() to create a -transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_drive_backup() is executed inside a drained -section, as it happens when creating a drive-backup transaction. This -change is visible from the user's perspective, as the job gets paused -and immediately resumed before starting the actual work. - -Also fix tests 141, 185 and 219 to cope with the extra -JOB_STATUS_CHANGE lines. - -Signed-off-by: Sergio Lopez -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - blockdev.c | 224 +++++++++++++++++-------------------- - tests/qemu-iotests/141.out | 2 + - tests/qemu-iotests/185.out | 2 + - tests/qemu-iotests/219 | 7 +- - tests/qemu-iotests/219.out | 8 ++ - 5 files changed, 117 insertions(+), 126 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 99b1cafb8f..7016054688 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1804,39 +1804,128 @@ typedef struct DriveBackupState { - BlockJob *job; - } DriveBackupState; - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp); -+static BlockJob *do_backup_common(BackupCommon *backup, -+ BlockDriverState *bs, -+ BlockDriverState *target_bs, -+ AioContext *aio_context, -+ JobTxn *txn, Error **errp); - - static void drive_backup_prepare(BlkActionState *common, Error **errp) - { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); -- BlockDriverState *bs; - DriveBackup *backup; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; -+ BlockDriverState *source = NULL; - AioContext *aio_context; -+ QDict *options; - Error *local_err = NULL; -+ int flags; -+ int64_t size; -+ bool set_backing_hd = false; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; - -+ if (!backup->has_mode) { -+ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -+ } -+ - bs = bdrv_lookup_bs(backup->device, backup->device, errp); - if (!bs) { - return; - } - -+ if (!bs->drv) { -+ error_setg(errp, "Device has no medium"); -+ return; -+ } -+ - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - - /* Paired with .clean() */ - bdrv_drained_begin(bs); - -- state->bs = bs; -+ if (!backup->has_format) { -+ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -+ NULL : (char *) bs->drv->format_name; -+ } -+ -+ /* Early check to avoid creating target */ -+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -+ goto out; -+ } -+ -+ flags = bs->open_flags | BDRV_O_RDWR; -+ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ -+ if (backup->sync == MIRROR_SYNC_MODE_TOP) { -+ source = backing_bs(bs); -+ if (!source) { -+ backup->sync = MIRROR_SYNC_MODE_FULL; -+ } -+ } -+ if (backup->sync == MIRROR_SYNC_MODE_NONE) { -+ source = bs; -+ flags |= BDRV_O_NO_BACKING; -+ set_backing_hd = true; -+ } -+ -+ size = bdrv_getlength(bs); -+ if (size < 0) { -+ error_setg_errno(errp, -size, "bdrv_getlength failed"); -+ goto out; -+ } -+ -+ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -+ assert(backup->format); -+ if (source) { -+ bdrv_refresh_filename(source); -+ bdrv_img_create(backup->target, backup->format, source->filename, -+ source->drv->format_name, NULL, -+ size, flags, false, &local_err); -+ } else { -+ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -+ size, flags, false, &local_err); -+ } -+ } - -- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); - if (local_err) { - error_propagate(errp, local_err); - goto out; - } - -+ options = qdict_new(); -+ qdict_put_str(options, "discard", "unmap"); -+ qdict_put_str(options, "detect-zeroes", "unmap"); -+ if (backup->format) { -+ qdict_put_str(options, "driver", backup->format); -+ } -+ -+ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -+ if (!target_bs) { -+ goto out; -+ } -+ -+ if (set_backing_hd) { -+ bdrv_set_backing_hd(target_bs, source, &local_err); -+ if (local_err) { -+ goto unref; -+ } -+ } -+ -+ state->bs = bs; -+ -+ state->job = do_backup_common(qapi_DriveBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); -+ -+unref: -+ bdrv_unref(target_bs); - out: - aio_context_release(aio_context); - } -@@ -3564,126 +3653,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, - return job; - } - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp) --{ -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- BlockDriverState *source = NULL; -- BlockJob *job = NULL; -- AioContext *aio_context; -- QDict *options; -- Error *local_err = NULL; -- int flags; -- int64_t size; -- bool set_backing_hd = false; -- -- if (!backup->has_mode) { -- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -- } -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- if (!bs->drv) { -- error_setg(errp, "Device has no medium"); -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- if (!backup->has_format) { -- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char *) bs->drv->format_name; -- } -- -- /* Early check to avoid creating target */ -- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -- goto out; -- } -- -- flags = bs->open_flags | BDRV_O_RDWR; -- -- /* -- * See if we have a backing HD we can use to create our new image -- * on top of. -- */ -- if (backup->sync == MIRROR_SYNC_MODE_TOP) { -- source = backing_bs(bs); -- if (!source) { -- backup->sync = MIRROR_SYNC_MODE_FULL; -- } -- } -- if (backup->sync == MIRROR_SYNC_MODE_NONE) { -- source = bs; -- flags |= BDRV_O_NO_BACKING; -- set_backing_hd = true; -- } -- -- size = bdrv_getlength(bs); -- if (size < 0) { -- error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -- } -- -- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -- assert(backup->format); -- if (source) { -- bdrv_refresh_filename(source); -- bdrv_img_create(backup->target, backup->format, source->filename, -- source->drv->format_name, NULL, -- size, flags, false, &local_err); -- } else { -- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -- size, flags, false, &local_err); -- } -- } -- -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -- -- options = qdict_new(); -- qdict_put_str(options, "discard", "unmap"); -- qdict_put_str(options, "detect-zeroes", "unmap"); -- if (backup->format) { -- qdict_put_str(options, "driver", backup->format); -- } -- -- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -- if (!target_bs) { -- goto out; -- } -- -- if (set_backing_hd) { -- bdrv_set_backing_hd(target_bs, source, &local_err); -- if (local_err) { -- goto unref; -- } -- } -- -- job = do_backup_common(qapi_DriveBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- --unref: -- bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); -- return job; --} -- --void qmp_drive_backup(DriveBackup *arg, Error **errp) -+void qmp_drive_backup(DriveBackup *backup, Error **errp) - { -- -- BlockJob *job; -- job = do_drive_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, -+ .u.drive_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) -diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out -index 4d71d9dcae..07e0ec66d7 100644 ---- a/tests/qemu-iotests/141.out -+++ b/tests/qemu-iotests/141.out -@@ -10,6 +10,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. - Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} - {"return": {}} -diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out -index ddfbf3c765..a233be7f58 100644 ---- a/tests/qemu-iotests/185.out -+++ b/tests/qemu-iotests/185.out -@@ -51,6 +51,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l - Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 -index e0c51662c0..655f54d881 100755 ---- a/tests/qemu-iotests/219 -+++ b/tests/qemu-iotests/219 -@@ -63,7 +63,7 @@ def test_pause_resume(vm): - # logged immediately - iotests.log(vm.qmp('query-jobs')) - --def test_job_lifecycle(vm, job, job_args, has_ready=False): -+def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): - global img_size - - iotests.log('') -@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): - iotests.log('Waiting for PENDING state...') - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ if is_mirror: -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - - if not job_args.get('auto-finalize', True): - # PENDING state: -@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ - - for auto_finalize in [True, False]: - for auto_dismiss in [True, False]: -- test_job_lifecycle(vm, 'drive-backup', job_args={ -+ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ - 'device': 'drive0-node', - 'target': copy_path, - 'sync': 'full', -diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out -index 8ebd3fee60..0ea5d0b9d5 100644 ---- a/tests/qemu-iotests/219.out -+++ b/tests/qemu-iotests/219.out -@@ -135,6 +135,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -186,6 +188,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -245,6 +249,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} -@@ -304,6 +310,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} --- -2.27.0 - diff --git a/blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch b/blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch deleted file mode 100644 index 2efef7276e0463b863265039dccdc0afd3aee834..0000000000000000000000000000000000000000 --- a/blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch +++ /dev/null @@ -1,114 +0,0 @@ -From e37cda3452309d147f1f7aec3c74249001e3db0c Mon Sep 17 00:00:00 2001 -From: Michael Qiu -Date: Wed, 12 May 2021 21:54:37 +0800 -Subject: [PATCH] blockjob: Fix crash with IOthread when block commit after - snapshot - -Currently, if guest has workloads, IO thread will acquire aio_context -lock before do io_submit, it leads to segmentfault when do block commit -after snapshot. Just like below: - -Program received signal SIGSEGV, Segmentation fault. - -[Switching to Thread 0x7f7c7d91f700 (LWP 99907)] -0x00005576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437 -1437 ../block/mirror.c: No such file or directory. -(gdb) p s->job -$17 = (MirrorBlockJob *) 0x0 -(gdb) p s->stop -$18 = false - -Call trace of IO thread: -0 0x00005576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437 -1 0x00005576d0f7f3ab in bdrv_driver_pwritev at ../block/io.c:1174 -2 0x00005576d0f8139d in bdrv_aligned_pwritev at ../block/io.c:1988 -3 0x00005576d0f81b65 in bdrv_co_pwritev_part at ../block/io.c:2156 -4 0x00005576d0f8e6b7 in blk_do_pwritev_part at ../block/block-backend.c:1260 -5 0x00005576d0f8e84d in blk_aio_write_entry at ../block/block-backend.c:1476 -... - -Switch to qemu main thread: -0 0x00007f903be704ed in __lll_lock_wait at -/lib/../lib64/libpthread.so.0 -1 0x00007f903be6bde6 in _L_lock_941 at /lib/../lib64/libpthread.so.0 -2 0x00007f903be6bcdf in pthread_mutex_lock at -/lib/../lib64/libpthread.so.0 -3 0x0000564b21456889 in qemu_mutex_lock_impl at -../util/qemu-thread-posix.c:79 -4 0x0000564b213af8a5 in block_job_add_bdrv at ../blockjob.c:224 -5 0x0000564b213b00ad in block_job_create at ../blockjob.c:440 -6 0x0000564b21357c0a in mirror_start_job at ../block/mirror.c:1622 -7 0x0000564b2135a9af in commit_active_start at ../block/mirror.c:1867 -8 0x0000564b2133d132 in qmp_block_commit at ../blockdev.c:2768 -9 0x0000564b2141fef3 in qmp_marshal_block_commit at -qapi/qapi-commands-block-core.c:346 -10 0x0000564b214503c9 in do_qmp_dispatch_bh at -../qapi/qmp-dispatch.c:110 -11 0x0000564b21451996 in aio_bh_poll at ../util/async.c:164 -12 0x0000564b2146018e in aio_dispatch at ../util/aio-posix.c:381 -13 0x0000564b2145187e in aio_ctx_dispatch at ../util/async.c:306 -14 0x00007f9040239049 in g_main_context_dispatch at -/lib/../lib64/libglib-2.0.so.0 -15 0x0000564b21447368 in main_loop_wait at ../util/main-loop.c:232 -16 0x0000564b21447368 in main_loop_wait at ../util/main-loop.c:255 -17 0x0000564b21447368 in main_loop_wait at ../util/main-loop.c:531 -18 0x0000564b212304e1 in qemu_main_loop at ../softmmu/runstate.c:721 -19 0x0000564b20f7975e in main at ../softmmu/main.c:50 - -In IO thread when do bdrv_mirror_top_pwritev, the job is NULL, and stop field -is false, this means the MirrorBDSOpaque "s" object has not been initialized -yet, and this object is initialized by block_job_create(), but the initialize -process is stuck in acquiring the lock. - -In this situation, IO thread come to bdrv_mirror_top_pwritev(),which means that -mirror-top node is already inserted into block graph, but its bs->opaque->job -is not initialized. - -The root cause is that qemu main thread do release/acquire when hold the lock, -at the same time, IO thread get the lock after release stage, and the crash -occured. - -Actually, in this situation, job->job.aio_context will not equal to -qemu_get_aio_context(), and will be the same as bs->aio_context, -thus, no need to release the lock, becasue bdrv_root_attach_child() -will not change the context. - -This patch fix this issue. - -Fixes: 132ada80 "block: Adjust AioContexts when attaching nodes" - -Signed-off-by: Michael Qiu -Message-Id: <20210203024059.52683-1-08005325@163.com> -Signed-off-by: Kevin Wolf ---- - blockjob.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/blockjob.c b/blockjob.c -index 74abb97bfd..72865a4a6e 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -223,14 +223,18 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, - uint64_t perm, uint64_t shared_perm, Error **errp) - { - BdrvChild *c; -+ bool need_context_ops; - - bdrv_ref(bs); -- if (job->job.aio_context != qemu_get_aio_context()) { -+ -+ need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context; -+ -+ if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { - aio_context_release(job->job.aio_context); - } - c = bdrv_root_attach_child(bs, name, &child_job, job->job.aio_context, - perm, shared_perm, job, errp); -- if (job->job.aio_context != qemu_get_aio_context()) { -+ if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { - aio_context_acquire(job->job.aio_context); - } - if (c == NULL) { --- -2.27.0 - diff --git a/blockjob-update-nodes-head-while-removing-all-bdrv.patch b/blockjob-update-nodes-head-while-removing-all-bdrv.patch deleted file mode 100644 index 36cedc77f7d38b124263a6f5d09e5f1dc97de5b8..0000000000000000000000000000000000000000 --- a/blockjob-update-nodes-head-while-removing-all-bdrv.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 86b0f4022bb43b16979ba5300e8d40a1e6d44b79 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Wed, 11 Sep 2019 12:03:16 +0200 -Subject: [PATCH] blockjob: update nodes head while removing all bdrv - -block_job_remove_all_bdrv() iterates through job->nodes, calling -bdrv_root_unref_child() for each entry. The call to the latter may -reach child_job_[can_]set_aio_ctx(), which will also attempt to -traverse job->nodes, potentially finding entries that where freed -on previous iterations. - -To avoid this situation, update job->nodes head on each iteration to -ensure that already freed entries are no longer linked to the list. - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1746631 -Signed-off-by: Sergio Lopez -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20190911100316.32282-1-mreitz@redhat.com -Reviewed-by: Sergio Lopez -Signed-off-by: Max Reitz -(cherry picked from commit d876bf676f5e7c6aa9ac64555e48cba8734ecb2f) -Signed-off-by: Michael Roth ---- - blockjob.c | 17 +++++++++++++---- - 1 file changed, 13 insertions(+), 4 deletions(-) - -diff --git a/blockjob.c b/blockjob.c -index 20b7f557da..74abb97bfd 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -186,14 +186,23 @@ static const BdrvChildRole child_job = { - - void block_job_remove_all_bdrv(BlockJob *job) - { -- GSList *l; -- for (l = job->nodes; l; l = l->next) { -+ /* -+ * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), -+ * which will also traverse job->nodes, so consume the list one by -+ * one to make sure that such a concurrent access does not attempt -+ * to process an already freed BdrvChild. -+ */ -+ while (job->nodes) { -+ GSList *l = job->nodes; - BdrvChild *c = l->data; -+ -+ job->nodes = l->next; -+ - bdrv_op_unblock_all(c->bs, job->blocker); - bdrv_root_unref_child(c); -+ -+ g_slist_free_1(l); - } -- g_slist_free(job->nodes); -- job->nodes = NULL; - } - - bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) --- -2.23.0 diff --git a/bt-use-size_t-type-for-length-parameters-instead-of-.patch b/bt-use-size_t-type-for-length-parameters-instead-of-.patch deleted file mode 100644 index 2005979aec4f4401b512bd0ea72d6c12493f5ea1..0000000000000000000000000000000000000000 --- a/bt-use-size_t-type-for-length-parameters-instead-of-.patch +++ /dev/null @@ -1,794 +0,0 @@ -From f9ab92373813cfccd31f29c0d963232f65cb5f88 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Fri, 22 May 2020 12:22:26 +0800 -Subject: [PATCH] bt: use size_t type for length parameters instead of int - -From: Prasad J Pandit - -The length parameter values are not negative, thus use an unsigned -type 'size_t' for them. Many routines pass 'len' values to memcpy(3) -calls. If it was negative, it could lead to memory corruption issues. -Add check to avoid it. - -Reported-by: Arash TC -Signed-off-by: Prasad J Pandit - -diff --git a/bt-host.c b/bt-host.c -index 2f8f631..b73a44d 100644 ---- a/bt-host.c -+++ b/bt-host.c -@@ -43,7 +43,7 @@ struct bt_host_hci_s { - }; - - static void bt_host_send(struct HCIInfo *hci, -- int type, const uint8_t *data, int len) -+ int type, const uint8_t *data, size_t len) - { - struct bt_host_hci_s *s = (struct bt_host_hci_s *) hci; - uint8_t pkt = type; -@@ -63,17 +63,17 @@ static void bt_host_send(struct HCIInfo *hci, - } - } - --static void bt_host_cmd(struct HCIInfo *hci, const uint8_t *data, int len) -+static void bt_host_cmd(struct HCIInfo *hci, const uint8_t *data, size_t len) - { - bt_host_send(hci, HCI_COMMAND_PKT, data, len); - } - --static void bt_host_acl(struct HCIInfo *hci, const uint8_t *data, int len) -+static void bt_host_acl(struct HCIInfo *hci, const uint8_t *data, size_t len) - { - bt_host_send(hci, HCI_ACLDATA_PKT, data, len); - } - --static void bt_host_sco(struct HCIInfo *hci, const uint8_t *data, int len) -+static void bt_host_sco(struct HCIInfo *hci, const uint8_t *data, size_t len) - { - bt_host_send(hci, HCI_SCODATA_PKT, data, len); - } -diff --git a/bt-vhci.c b/bt-vhci.c -index 886e146..32ef1c5 100644 ---- a/bt-vhci.c -+++ b/bt-vhci.c -@@ -89,7 +89,7 @@ static void vhci_read(void *opaque) - } - - static void vhci_host_send(void *opaque, -- int type, const uint8_t *data, int len) -+ int type, const uint8_t *data, size_t len) - { - struct bt_vhci_s *s = (struct bt_vhci_s *) opaque; - #if 0 -@@ -112,6 +112,7 @@ static void vhci_host_send(void *opaque, - static uint8_t buf[4096]; - - buf[0] = type; -+ assert(len < sizeof(buf)); - memcpy(buf + 1, data, len); - - while (write(s->fd, buf, len + 1) < 0) -@@ -124,13 +125,13 @@ static void vhci_host_send(void *opaque, - } - - static void vhci_out_hci_packet_event(void *opaque, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - vhci_host_send(opaque, HCI_EVENT_PKT, data, len); - } - - static void vhci_out_hci_packet_acl(void *opaque, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - vhci_host_send(opaque, HCI_ACLDATA_PKT, data, len); - } -diff --git a/hw/bt/core.c b/hw/bt/core.c -index dfb196e..f548b3d 100644 ---- a/hw/bt/core.c -+++ b/hw/bt/core.c -@@ -44,7 +44,7 @@ static void bt_dummy_lmp_disconnect_master(struct bt_link_s *link) - } - - static void bt_dummy_lmp_acl_resp(struct bt_link_s *link, -- const uint8_t *data, int start, int len) -+ const uint8_t *data, int start, size_t len) - { - error_report("%s: stray ACL response PDU, fixme", __func__); - exit(-1); -diff --git a/hw/bt/hci-csr.c b/hw/bt/hci-csr.c -index 3d60654..f7a74c0 100644 ---- a/hw/bt/hci-csr.c -+++ b/hw/bt/hci-csr.c -@@ -103,7 +103,7 @@ static inline void csrhci_fifo_wake(struct csrhci_s *s) - } - - #define csrhci_out_packetz(s, len) memset(csrhci_out_packet(s, len), 0, len) --static uint8_t *csrhci_out_packet(struct csrhci_s *s, int len) -+static uint8_t *csrhci_out_packet(struct csrhci_s *s, size_t len) - { - int off = s->out_start + s->out_len; - -@@ -112,14 +112,14 @@ static uint8_t *csrhci_out_packet(struct csrhci_s *s, int len) - - if (off < FIFO_LEN) { - if (off + len > FIFO_LEN && (s->out_size = off + len) > FIFO_LEN * 2) { -- error_report("%s: can't alloc %i bytes", __func__, len); -+ error_report("%s: can't alloc %zu bytes", __func__, len); - exit(-1); - } - return s->outfifo + off; - } - - if (s->out_len > s->out_size) { -- error_report("%s: can't alloc %i bytes", __func__, len); -+ error_report("%s: can't alloc %zu bytes", __func__, len); - exit(-1); - } - -@@ -127,7 +127,7 @@ static uint8_t *csrhci_out_packet(struct csrhci_s *s, int len) - } - - static inline uint8_t *csrhci_out_packet_csr(struct csrhci_s *s, -- int type, int len) -+ int type, size_t len) - { - uint8_t *ret = csrhci_out_packetz(s, len + 2); - -@@ -138,7 +138,7 @@ static inline uint8_t *csrhci_out_packet_csr(struct csrhci_s *s, - } - - static inline uint8_t *csrhci_out_packet_event(struct csrhci_s *s, -- int evt, int len) -+ int evt, size_t len) - { - uint8_t *ret = csrhci_out_packetz(s, - len + 1 + sizeof(struct hci_event_hdr)); -@@ -151,7 +151,7 @@ static inline uint8_t *csrhci_out_packet_event(struct csrhci_s *s, - } - - static void csrhci_in_packet_vendor(struct csrhci_s *s, int ocf, -- uint8_t *data, int len) -+ uint8_t *data, size_t len) - { - int offset; - uint8_t *rpkt; -@@ -320,18 +320,18 @@ static int csrhci_write(struct Chardev *chr, - struct csrhci_s *s = (struct csrhci_s *)chr; - int total = 0; - -- if (!s->enable) -+ if (!s->enable || len <= 0) - return 0; - - for (;;) { - int cnt = MIN(len, s->in_needed - s->in_len); -- if (cnt) { -- memcpy(s->inpkt + s->in_len, buf, cnt); -- s->in_len += cnt; -- buf += cnt; -- len -= cnt; -- total += cnt; -- } -+ assert(cnt > 0); -+ -+ memcpy(s->inpkt + s->in_len, buf, cnt); -+ s->in_len += cnt; -+ buf += cnt; -+ len -= cnt; -+ total += cnt; - - if (s->in_len < s->in_needed) { - break; -@@ -363,7 +363,7 @@ static int csrhci_write(struct Chardev *chr, - } - - static void csrhci_out_hci_packet_event(void *opaque, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - struct csrhci_s *s = (struct csrhci_s *) opaque; - uint8_t *pkt = csrhci_out_packet(s, (len + 2) & ~1); /* Align */ -@@ -375,7 +375,7 @@ static void csrhci_out_hci_packet_event(void *opaque, - } - - static void csrhci_out_hci_packet_acl(void *opaque, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - struct csrhci_s *s = (struct csrhci_s *) opaque; - uint8_t *pkt = csrhci_out_packet(s, (len + 2) & ~1); /* Align */ -diff --git a/hw/bt/hci.c b/hw/bt/hci.c -index c7958f6..9c4f957 100644 ---- a/hw/bt/hci.c -+++ b/hw/bt/hci.c -@@ -31,7 +31,7 @@ - - struct bt_hci_s { - uint8_t *(*evt_packet)(void *opaque); -- void (*evt_submit)(void *opaque, int len); -+ void (*evt_submit)(void *opaque, size_t len); - void *opaque; - uint8_t evt_buf[256]; - -@@ -61,7 +61,7 @@ struct bt_hci_s { - struct bt_hci_master_link_s { - struct bt_link_s *link; - void (*lmp_acl_data)(struct bt_link_s *link, -- const uint8_t *data, int start, int len); -+ const uint8_t *data, int start, size_t len); - QEMUTimer *acl_mode_timer; - } handle[HCI_HANDLES_MAX]; - uint32_t role_bmp; -@@ -433,7 +433,7 @@ static const uint8_t bt_event_reserved_mask[8] = { - }; - - --static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, int len) -+static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, size_t len) - { - } - -@@ -451,13 +451,13 @@ struct HCIInfo null_hci = { - - - static inline uint8_t *bt_hci_event_start(struct bt_hci_s *hci, -- int evt, int len) -+ int evt, size_t len) - { - uint8_t *packet, mask; - int mask_byte; - - if (len > 255) { -- error_report("%s: HCI event params too long (%ib)", __func__, len); -+ error_report("%s: HCI event params too long (%zub)", __func__, len); - exit(-1); - } - -@@ -474,7 +474,7 @@ static inline uint8_t *bt_hci_event_start(struct bt_hci_s *hci, - } - - static inline void bt_hci_event(struct bt_hci_s *hci, int evt, -- void *params, int len) -+ void *params, size_t len) - { - uint8_t *packet = bt_hci_event_start(hci, evt, len); - -@@ -499,7 +499,7 @@ static inline void bt_hci_event_status(struct bt_hci_s *hci, int status) - } - - static inline void bt_hci_event_complete(struct bt_hci_s *hci, -- void *ret, int len) -+ void *ret, size_t len) - { - uint8_t *packet = bt_hci_event_start(hci, EVT_CMD_COMPLETE, - len + EVT_CMD_COMPLETE_SIZE); -@@ -1476,7 +1476,7 @@ static inline void bt_hci_event_num_comp_pkts(struct bt_hci_s *hci, - } - - static void bt_submit_hci(struct HCIInfo *info, -- const uint8_t *data, int length) -+ const uint8_t *data, size_t length) - { - struct bt_hci_s *hci = hci_from_info(info); - uint16_t cmd; -@@ -1970,7 +1970,7 @@ static void bt_submit_hci(struct HCIInfo *info, - break; - - short_hci: -- error_report("%s: HCI packet too short (%iB)", __func__, length); -+ error_report("%s: HCI packet too short (%zuB)", __func__, length); - bt_hci_event_status(hci, HCI_INVALID_PARAMETERS); - break; - } -@@ -1981,7 +1981,7 @@ static void bt_submit_hci(struct HCIInfo *info, - * know that a packet contained the last fragment of the SDU when the next - * SDU starts. */ - static inline void bt_hci_lmp_acl_data(struct bt_hci_s *hci, uint16_t handle, -- const uint8_t *data, int start, int len) -+ const uint8_t *data, int start, size_t len) - { - struct hci_acl_hdr *pkt = (void *) hci->acl_buf; - -@@ -1989,7 +1989,7 @@ static inline void bt_hci_lmp_acl_data(struct bt_hci_s *hci, uint16_t handle, - /* TODO: avoid memcpy'ing */ - - if (len + HCI_ACL_HDR_SIZE > sizeof(hci->acl_buf)) { -- error_report("%s: can't take ACL packets %i bytes long", -+ error_report("%s: can't take ACL packets %zu bytes long", - __func__, len); - return; - } -@@ -2003,7 +2003,7 @@ static inline void bt_hci_lmp_acl_data(struct bt_hci_s *hci, uint16_t handle, - } - - static void bt_hci_lmp_acl_data_slave(struct bt_link_s *btlink, -- const uint8_t *data, int start, int len) -+ const uint8_t *data, int start, size_t len) - { - struct bt_hci_link_s *link = (struct bt_hci_link_s *) btlink; - -@@ -2012,14 +2012,14 @@ static void bt_hci_lmp_acl_data_slave(struct bt_link_s *btlink, - } - - static void bt_hci_lmp_acl_data_host(struct bt_link_s *link, -- const uint8_t *data, int start, int len) -+ const uint8_t *data, int start, size_t len) - { - bt_hci_lmp_acl_data(hci_from_device(link->host), - link->handle, data, start, len); - } - - static void bt_submit_acl(struct HCIInfo *info, -- const uint8_t *data, int length) -+ const uint8_t *data, size_t length) - { - struct bt_hci_s *hci = hci_from_info(info); - uint16_t handle; -@@ -2027,7 +2027,7 @@ static void bt_submit_acl(struct HCIInfo *info, - struct bt_link_s *link; - - if (length < HCI_ACL_HDR_SIZE) { -- error_report("%s: ACL packet too short (%iB)", __func__, length); -+ error_report("%s: ACL packet too short (%zuB)", __func__, length); - return; - } - -@@ -2045,7 +2045,7 @@ static void bt_submit_acl(struct HCIInfo *info, - handle &= ~HCI_HANDLE_OFFSET; - - if (datalen > length) { -- error_report("%s: ACL packet too short (%iB < %iB)", -+ error_report("%s: ACL packet too short (%zuB < %iB)", - __func__, length, datalen); - return; - } -@@ -2087,7 +2087,7 @@ static void bt_submit_acl(struct HCIInfo *info, - } - - static void bt_submit_sco(struct HCIInfo *info, -- const uint8_t *data, int length) -+ const uint8_t *data, size_t length) - { - struct bt_hci_s *hci = hci_from_info(info); - uint16_t handle; -@@ -2106,7 +2106,7 @@ static void bt_submit_sco(struct HCIInfo *info, - } - - if (datalen > length) { -- error_report("%s: SCO packet too short (%iB < %iB)", -+ error_report("%s: SCO packet too short (%zuB < %iB)", - __func__, length, datalen); - return; - } -@@ -2127,7 +2127,7 @@ static uint8_t *bt_hci_evt_packet(void *opaque) - return s->evt_buf; - } - --static void bt_hci_evt_submit(void *opaque, int len) -+static void bt_hci_evt_submit(void *opaque, size_t len) - { - /* TODO: notify upper layer */ - struct bt_hci_s *s = opaque; -diff --git a/hw/bt/hid.c b/hw/bt/hid.c -index 066ca99..fe15434 100644 ---- a/hw/bt/hid.c -+++ b/hw/bt/hid.c -@@ -95,7 +95,7 @@ struct bt_hid_device_s { - int data_type; - int intr_state; - struct { -- int len; -+ size_t len; - uint8_t buffer[1024]; - } dataother, datain, dataout, feature, intrdataout; - enum { -@@ -168,7 +168,7 @@ static void bt_hid_disconnect(struct bt_hid_device_s *s) - } - - static void bt_hid_send_data(struct bt_l2cap_conn_params_s *ch, int type, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - uint8_t *pkt, hdr = (BT_DATA << 4) | type; - int plen; -@@ -189,7 +189,7 @@ static void bt_hid_send_data(struct bt_l2cap_conn_params_s *ch, int type, - } - - static void bt_hid_control_transaction(struct bt_hid_device_s *s, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - uint8_t type, parameter; - int rlen, ret = -1; -@@ -361,7 +361,7 @@ static void bt_hid_control_transaction(struct bt_hid_device_s *s, - bt_hid_send_handshake(s, ret); - } - --static void bt_hid_control_sdu(void *opaque, const uint8_t *data, int len) -+static void bt_hid_control_sdu(void *opaque, const uint8_t *data, size_t len) - { - struct bt_hid_device_s *hid = opaque; - -@@ -387,7 +387,7 @@ static void bt_hid_datain(HIDState *hs) - hid->datain.buffer, hid->datain.len); - } - --static void bt_hid_interrupt_sdu(void *opaque, const uint8_t *data, int len) -+static void bt_hid_interrupt_sdu(void *opaque, const uint8_t *data, size_t len) - { - struct bt_hid_device_s *hid = opaque; - -diff --git a/hw/bt/l2cap.c b/hw/bt/l2cap.c -index d67098a..2f70a03 100644 ---- a/hw/bt/l2cap.c -+++ b/hw/bt/l2cap.c -@@ -31,10 +31,10 @@ struct l2cap_instance_s { - int role; - - uint8_t frame_in[65535 + L2CAP_HDR_SIZE] __attribute__ ((aligned (4))); -- int frame_in_len; -+ uint32_t frame_in_len; - - uint8_t frame_out[65535 + L2CAP_HDR_SIZE] __attribute__ ((aligned (4))); -- int frame_out_len; -+ uint32_t frame_out_len; - - /* Signalling channel timers. They exist per-request but we can make - * sure we have no more than one outstanding request at any time. */ -@@ -48,7 +48,7 @@ struct l2cap_instance_s { - struct bt_l2cap_conn_params_s params; - - void (*frame_in)(struct l2cap_chan_s *chan, uint16_t cid, -- const l2cap_hdr *hdr, int len); -+ const l2cap_hdr *hdr, size_t len); - int mps; - int min_mtu; - -@@ -67,7 +67,7 @@ struct l2cap_instance_s { - - /* Only flow-controlled, connection-oriented channels */ - uint8_t sdu[65536]; /* TODO: dynamically allocate */ -- int len_cur, len_total; -+ uint32_t len_cur, len_total; - int rexmit; - int monitor_timeout; - QEMUTimer *monitor_timer; -@@ -139,7 +139,7 @@ static const uint16_t l2cap_fcs16_table[256] = { - 0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040, - }; - --static uint16_t l2cap_fcs16(const uint8_t *message, int len) -+static uint16_t l2cap_fcs16(const uint8_t *message, size_t len) - { - uint16_t fcs = 0x0000; - -@@ -185,7 +185,7 @@ static void l2cap_monitor_timer_update(struct l2cap_chan_s *ch) - } - - static void l2cap_command_reject(struct l2cap_instance_s *l2cap, int id, -- uint16_t reason, const void *data, int plen) -+ uint16_t reason, const void *data, size_t plen) - { - uint8_t *pkt; - l2cap_cmd_hdr *hdr; -@@ -246,7 +246,7 @@ static void l2cap_connection_response(struct l2cap_instance_s *l2cap, - } - - static void l2cap_configuration_request(struct l2cap_instance_s *l2cap, -- int dcid, int flag, const uint8_t *data, int len) -+ int dcid, int flag, const uint8_t *data, size_t len) - { - uint8_t *pkt; - l2cap_cmd_hdr *hdr; -@@ -274,7 +274,7 @@ static void l2cap_configuration_request(struct l2cap_instance_s *l2cap, - } - - static void l2cap_configuration_response(struct l2cap_instance_s *l2cap, -- int scid, int flag, int result, const uint8_t *data, int len) -+ int scid, int flag, int result, const uint8_t *data, size_t len) - { - uint8_t *pkt; - l2cap_cmd_hdr *hdr; -@@ -321,7 +321,7 @@ static void l2cap_disconnection_response(struct l2cap_instance_s *l2cap, - } - - static void l2cap_echo_response(struct l2cap_instance_s *l2cap, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - uint8_t *pkt; - l2cap_cmd_hdr *hdr; -@@ -342,7 +342,7 @@ static void l2cap_echo_response(struct l2cap_instance_s *l2cap, - } - - static void l2cap_info_response(struct l2cap_instance_s *l2cap, int type, -- int result, const uint8_t *data, int len) -+ int result, const uint8_t *data, size_t len) - { - uint8_t *pkt; - l2cap_cmd_hdr *hdr; -@@ -365,16 +365,18 @@ static void l2cap_info_response(struct l2cap_instance_s *l2cap, int type, - l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params); - } - --static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, int len); -+static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, -+ size_t len); - static void l2cap_bframe_submit(struct bt_l2cap_conn_params_s *parms); - #if 0 --static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, int len); -+static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, -+ size_t len); - static void l2cap_iframe_submit(struct bt_l2cap_conn_params_s *parm); - #endif - static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid, -- const l2cap_hdr *hdr, int len); -+ const l2cap_hdr *hdr, size_t len); - static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid, -- const l2cap_hdr *hdr, int len); -+ const l2cap_hdr *hdr, size_t len); - - static int l2cap_cid_new(struct l2cap_instance_s *l2cap) - { -@@ -498,7 +500,7 @@ static void l2cap_channel_config_req_event(struct l2cap_instance_s *l2cap, - - static int l2cap_channel_config(struct l2cap_instance_s *l2cap, - struct l2cap_chan_s *ch, int flag, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - l2cap_conf_opt *opt; - l2cap_conf_opt_qos *qos; -@@ -683,7 +685,7 @@ static int l2cap_channel_config(struct l2cap_instance_s *l2cap, - } - - static void l2cap_channel_config_req_msg(struct l2cap_instance_s *l2cap, -- int flag, int cid, const uint8_t *data, int len) -+ int flag, int cid, const uint8_t *data, size_t len) - { - struct l2cap_chan_s *ch; - -@@ -715,7 +717,7 @@ static void l2cap_channel_config_req_msg(struct l2cap_instance_s *l2cap, - } - - static int l2cap_channel_config_rsp_msg(struct l2cap_instance_s *l2cap, -- int result, int flag, int cid, const uint8_t *data, int len) -+ int result, int flag, int cid, const uint8_t *data, size_t len) - { - struct l2cap_chan_s *ch; - -@@ -783,7 +785,7 @@ static void l2cap_info(struct l2cap_instance_s *l2cap, int type) - } - - static void l2cap_command(struct l2cap_instance_s *l2cap, int code, int id, -- const uint8_t *params, int len) -+ const uint8_t *params, size_t len) - { - int err; - -@@ -938,7 +940,7 @@ static void l2cap_rexmit_enable(struct l2cap_chan_s *ch, int enable) - } - - /* Command frame SDU */ --static void l2cap_cframe_in(void *opaque, const uint8_t *data, int len) -+static void l2cap_cframe_in(void *opaque, const uint8_t *data, size_t len) - { - struct l2cap_instance_s *l2cap = opaque; - const l2cap_cmd_hdr *hdr; -@@ -966,7 +968,7 @@ static void l2cap_cframe_in(void *opaque, const uint8_t *data, int len) - } - - /* Group frame SDU */ --static void l2cap_gframe_in(void *opaque, const uint8_t *data, int len) -+static void l2cap_gframe_in(void *opaque, const uint8_t *data, size_t len) - { - } - -@@ -977,7 +979,7 @@ static void l2cap_sframe_in(struct l2cap_chan_s *ch, uint16_t ctrl) - - /* Basic L2CAP mode Information frame */ - static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid, -- const l2cap_hdr *hdr, int len) -+ const l2cap_hdr *hdr, size_t len) - { - /* We have a full SDU, no further processing */ - ch->params.sdu_in(ch->params.opaque, hdr->data, len); -@@ -985,7 +987,7 @@ static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid, - - /* Flow Control and Retransmission mode frame */ - static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid, -- const l2cap_hdr *hdr, int len) -+ const l2cap_hdr *hdr, size_t len) - { - uint16_t fcs = lduw_le_p(hdr->data + len - 2); - -@@ -1076,7 +1078,7 @@ static void l2cap_frame_in(struct l2cap_instance_s *l2cap, - - /* "Recombination" */ - static void l2cap_pdu_in(struct l2cap_instance_s *l2cap, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - const l2cap_hdr *hdr = (void *) l2cap->frame_in; - -@@ -1123,7 +1125,7 @@ static inline void l2cap_pdu_submit(struct l2cap_instance_s *l2cap) - (l2cap->link, l2cap->frame_out, 1, l2cap->frame_out_len); - } - --static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, int len) -+static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, size_t len) - { - struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parm; - -@@ -1146,7 +1148,7 @@ static void l2cap_bframe_submit(struct bt_l2cap_conn_params_s *parms) - - #if 0 - /* Stub: Only used if an emulated device requests outgoing flow control */ --static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, int len) -+static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, size_t len) - { - struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parm; - -@@ -1291,7 +1293,7 @@ static void l2cap_lmp_disconnect_slave(struct bt_link_s *link) - } - - static void l2cap_lmp_acl_data_slave(struct bt_link_s *link, -- const uint8_t *data, int start, int len) -+ const uint8_t *data, int start, size_t len) - { - struct slave_l2cap_instance_s *l2cap = - (struct slave_l2cap_instance_s *) link; -@@ -1304,7 +1306,7 @@ static void l2cap_lmp_acl_data_slave(struct bt_link_s *link, - - /* Stub */ - static void l2cap_lmp_acl_data_host(struct bt_link_s *link, -- const uint8_t *data, int start, int len) -+ const uint8_t *data, int start, size_t len) - { - struct bt_l2cap_device_s *dev = (struct bt_l2cap_device_s *) link->host; - struct l2cap_instance_s *l2cap = -diff --git a/hw/bt/sdp.c b/hw/bt/sdp.c -index 2860d76..6bfb174 100644 ---- a/hw/bt/sdp.c -+++ b/hw/bt/sdp.c -@@ -496,7 +496,7 @@ static ssize_t sdp_svc_search_attr_get(struct bt_l2cap_sdp_state_s *sdp, - return end + 2; - } - --static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, int len) -+static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, size_t len) - { - struct bt_l2cap_sdp_state_s *sdp = opaque; - enum bt_sdp_cmd pdu_id; -@@ -506,7 +506,7 @@ static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, int len) - int rsp_len = 0; - - if (len < 5) { -- error_report("%s: short SDP PDU (%iB).", __func__, len); -+ error_report("%s: short SDP PDU (%zuB).", __func__, len); - return; - } - -@@ -517,7 +517,7 @@ static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, int len) - len -= 5; - - if (len != plen) { -- error_report("%s: wrong SDP PDU length (%iB != %iB).", -+ error_report("%s: wrong SDP PDU length (%iB != %zuB).", - __func__, plen, len); - err = SDP_INVALID_PDU_SIZE; - goto respond; -diff --git a/hw/usb/dev-bluetooth.c b/hw/usb/dev-bluetooth.c -index 670ba32..240a901 100644 ---- a/hw/usb/dev-bluetooth.c -+++ b/hw/usb/dev-bluetooth.c -@@ -265,7 +265,7 @@ static void usb_bt_fifo_reset(struct usb_hci_in_fifo_s *fifo) - } - - static void usb_bt_fifo_enqueue(struct usb_hci_in_fifo_s *fifo, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - int off = fifo->dstart + fifo->dlen; - uint8_t *buf; -@@ -274,13 +274,13 @@ static void usb_bt_fifo_enqueue(struct usb_hci_in_fifo_s *fifo, - if (off <= DFIFO_LEN_MASK) { - if (off + len > DFIFO_LEN_MASK + 1 && - (fifo->dsize = off + len) > (DFIFO_LEN_MASK + 1) * 2) { -- fprintf(stderr, "%s: can't alloc %i bytes\n", __func__, len); -+ fprintf(stderr, "%s: can't alloc %zu bytes\n", __func__, len); - exit(-1); - } - buf = fifo->data + off; - } else { - if (fifo->dlen > fifo->dsize) { -- fprintf(stderr, "%s: can't alloc %i bytes\n", __func__, len); -+ fprintf(stderr, "%s: can't alloc %zu bytes\n", __func__, len); - exit(-1); - } - buf = fifo->data + off - fifo->dsize; -@@ -319,7 +319,7 @@ static inline void usb_bt_fifo_dequeue(struct usb_hci_in_fifo_s *fifo, - - static inline void usb_bt_fifo_out_enqueue(struct USBBtState *s, - struct usb_hci_out_fifo_s *fifo, -- void (*send)(struct HCIInfo *, const uint8_t *, int), -+ void (*send)(struct HCIInfo *, const uint8_t *, size_t), - int (*complete)(const uint8_t *, int), - USBPacket *p) - { -@@ -478,7 +478,7 @@ static void usb_bt_handle_data(USBDevice *dev, USBPacket *p) - } - - static void usb_bt_out_hci_packet_event(void *opaque, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - struct USBBtState *s = (struct USBBtState *) opaque; - -@@ -489,7 +489,7 @@ static void usb_bt_out_hci_packet_event(void *opaque, - } - - static void usb_bt_out_hci_packet_acl(void *opaque, -- const uint8_t *data, int len) -+ const uint8_t *data, size_t len) - { - struct USBBtState *s = (struct USBBtState *) opaque; - -diff --git a/include/hw/bt.h b/include/hw/bt.h -index b5e11d4..bc362aa 100644 ---- a/include/hw/bt.h -+++ b/include/hw/bt.h -@@ -94,9 +94,9 @@ struct bt_device_s { - void (*lmp_disconnect_master)(struct bt_link_s *link); - void (*lmp_disconnect_slave)(struct bt_link_s *link); - void (*lmp_acl_data)(struct bt_link_s *link, const uint8_t *data, -- int start, int len); -+ int start, size_t len); - void (*lmp_acl_resp)(struct bt_link_s *link, const uint8_t *data, -- int start, int len); -+ int start, size_t len); - void (*lmp_mode_change)(struct bt_link_s *link); - - void (*handle_destroy)(struct bt_device_s *device); -@@ -148,12 +148,12 @@ struct bt_l2cap_device_s { - - struct bt_l2cap_conn_params_s { - /* Input */ -- uint8_t *(*sdu_out)(struct bt_l2cap_conn_params_s *chan, int len); -+ uint8_t *(*sdu_out)(struct bt_l2cap_conn_params_s *chan, size_t len); - void (*sdu_submit)(struct bt_l2cap_conn_params_s *chan); - int remote_mtu; - /* Output */ - void *opaque; -- void (*sdu_in)(void *opaque, const uint8_t *data, int len); -+ void (*sdu_in)(void *opaque, const uint8_t *data, size_t len); - void (*close)(void *opaque); - }; - -diff --git a/include/sysemu/bt.h b/include/sysemu/bt.h -index 2fd8c0f..df8fb63 100644 ---- a/include/sysemu/bt.h -+++ b/include/sysemu/bt.h -@@ -5,12 +5,12 @@ - - typedef struct HCIInfo { - int (*bdaddr_set)(struct HCIInfo *hci, const uint8_t *bd_addr); -- void (*cmd_send)(struct HCIInfo *hci, const uint8_t *data, int len); -- void (*sco_send)(struct HCIInfo *hci, const uint8_t *data, int len); -- void (*acl_send)(struct HCIInfo *hci, const uint8_t *data, int len); -+ void (*cmd_send)(struct HCIInfo *hci, const uint8_t *data, size_t len); -+ void (*sco_send)(struct HCIInfo *hci, const uint8_t *data, size_t len); -+ void (*acl_send)(struct HCIInfo *hci, const uint8_t *data, size_t len); - void *opaque; -- void (*evt_recv)(void *opaque, const uint8_t *data, int len); -- void (*acl_recv)(void *opaque, const uint8_t *data, int len); -+ void (*evt_recv)(void *opaque, const uint8_t *data, size_t len); -+ void (*acl_recv)(void *opaque, const uint8_t *data, size_t len); - } HCIInfo; - - /* bt-host.c */ --- -1.8.3.1 - diff --git a/bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch b/bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch deleted file mode 100644 index e714cb10bef351cd6ed6df71d08f3666df879ed5..0000000000000000000000000000000000000000 --- a/bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch +++ /dev/null @@ -1,44 +0,0 @@ -From b9e4a4ff6f3292927adb1463777c86cd4063a6ef Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Sat, 18 Apr 2020 12:10:11 +0800 -Subject: [PATCH] bugfix: Use gicr_typer in arm_gicv3_icc_reset - -The KVM_VGIC_ATTR macro expect the second parameter as gicr_typer, -of which high 32bit is constructed by mp_affinity. For most case, -the high 32bit of mp_affinity is zero, so it will always access the -ICC_CTLR_EL1 of CPU0. - -Signed-off-by: Keqian Zhu ---- - hw/intc/arm_gicv3_kvm.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c -index d9c72f85be..b1e74147ba 100644 ---- a/hw/intc/arm_gicv3_kvm.c -+++ b/hw/intc/arm_gicv3_kvm.c -@@ -661,13 +661,11 @@ static void kvm_arm_gicv3_get(GICv3State *s) - - static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) - { -- ARMCPU *cpu; - GICv3State *s; - GICv3CPUState *c; - - c = (GICv3CPUState *)env->gicv3state; - s = c->gic; -- cpu = ARM_CPU(c->cpu); - - c->icc_pmr_el1 = 0; - c->icc_bpr[GICV3_G0] = GIC_MIN_BPR; -@@ -684,7 +682,7 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) - - /* Initialize to actual HW supported configuration */ - kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, -- KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity), -+ KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer), - &c->icc_ctlr_el1[GICV3_NS], false, &error_abort); - - c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; --- -2.19.1 diff --git a/bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch b/bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch new file mode 100644 index 0000000000000000000000000000000000000000..1160489609bce25c0fceffb2c8d98c04ba2283a3 --- /dev/null +++ b/bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch @@ -0,0 +1,49 @@ +From 6588c017de54bab8a11509d43e2ddabf065cfa50 Mon Sep 17 00:00:00 2001 +From: jiangdongxu +Date: Thu, 10 Feb 2022 21:50:28 +0800 +Subject: [PATCH] bugfix: fix eventfds may double free when vm_id reused in + ivshmem + +As the ivshmem Server-Client Protol describes, when a +client disconnects from the server, server sends disconnect +notifications to the other clients. And the other clients +will free the eventfds of the disconnected client according +to the client ID. If the client ID is reused, the eventfds +may be double freed. + +It will be solved by setting eventfds to NULL after freeing +and allocating memory for it when it's used. + +Signed-off-by: Peng Liang +Signed-off-by: jiangdongxu +Signed-off-by: Adttil +--- + hw/misc/ivshmem.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index 0447888029..ad9a3c546e 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -400,6 +400,7 @@ static void close_peer_eventfds(IVShmemState *s, int posn) + } + + g_free(s->peers[posn].eventfds); ++ s->peers[posn].eventfds = NULL; + s->peers[posn].nb_eventfds = 0; + } + +@@ -533,6 +534,10 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, + close(fd); + return; + } ++ if (peer->eventfds == NULL) { ++ peer->eventfds = g_new0(EventNotifier, s->vectors); ++ peer->nb_eventfds = 0; ++ } + vector = peer->nb_eventfds++; + + IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); +-- +2.27.0 + diff --git a/bugfix-fix-possible-memory-leak.patch b/bugfix-fix-possible-memory-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..34d88766ee9e88a8345ff2c247e51e7045a3e159 --- /dev/null +++ b/bugfix-fix-possible-memory-leak.patch @@ -0,0 +1,98 @@ +From e6a20580801314e9d47682d7b8d8161c030eab04 Mon Sep 17 00:00:00 2001 +From: jiangdongxu +Date: Thu, 10 Feb 2022 22:12:50 +0800 +Subject: [PATCH] bugfix: fix possible memory leak + +Signed-off-by: caojinhua +Signed-off-by: jiangdongxu +Signed-off-by: Adttil +--- + migration/savevm.c | 2 ++ + qga/main.c | 18 +++++++++++++----- + 2 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/migration/savevm.c b/migration/savevm.c +index eec5503a42..477a19719f 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1553,6 +1553,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + ret = vmstate_save(f, se, vmdesc); + if (ret) { + qemu_file_set_error(f, ret); ++ json_writer_free(vmdesc); + return ret; + } + +@@ -1572,6 +1573,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + migrate_set_error(ms, local_err); + error_report_err(local_err); + qemu_file_set_error(f, ret); ++ json_writer_free(vmdesc); + return ret; + } + } +diff --git a/qga/main.c b/qga/main.c +index 8668b9f3d3..c4dcbb86be 100644 +--- a/qga/main.c ++++ b/qga/main.c +@@ -1399,7 +1399,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + if (g_mkdir_with_parents(config->state_dir, S_IRWXU) == -1) { + g_critical("unable to create (an ancestor of) the state directory" + " '%s': %s", config->state_dir, strerror(errno)); +- return NULL; ++ goto failed; + } + #endif + +@@ -1424,7 +1424,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + if (!log_file) { + g_critical("unable to open specified log file: %s", + strerror(errno)); +- return NULL; ++ goto failed; + } + s->log_file = log_file; + } +@@ -1435,7 +1435,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + s->pstate_filepath, + ga_is_frozen(s))) { + g_critical("failed to load persistent state"); +- return NULL; ++ goto failed; + } + + if (config->allowedrpcs) { +@@ -1465,7 +1465,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + #ifndef _WIN32 + if (!register_signal_handlers()) { + g_critical("failed to register signal handlers"); +- return NULL; ++ goto failed; + } + #endif + +@@ -1478,12 +1478,20 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + s->wakeup_event = CreateEvent(NULL, TRUE, FALSE, TEXT("WakeUp")); + if (s->wakeup_event == NULL) { + g_critical("CreateEvent failed"); +- return NULL; ++ goto failed; + } + #endif + + ga_state = s; + return s; ++failed: ++ g_free(s->pstate_filepath); ++ g_free(s->state_filepath_isfrozen); ++ if (s->log_file) { ++ fclose(s->log_file); ++ } ++ g_free(s); ++ return NULL; + } + + static void cleanup_agent(GAState *s) +-- +2.27.0 + diff --git a/bugfix-fix-some-illegal-memory-access-and-memory-lea.patch b/bugfix-fix-some-illegal-memory-access-and-memory-lea.patch new file mode 100644 index 0000000000000000000000000000000000000000..18c983974ba07088dcb79920b2b84926f35bef45 --- /dev/null +++ b/bugfix-fix-some-illegal-memory-access-and-memory-lea.patch @@ -0,0 +1,27 @@ +From 35054aa25a0d7758a35d75e3298555b502e37b0f Mon Sep 17 00:00:00 2001 +From: jiangdongxu +Date: Thu, 10 Feb 2022 21:32:37 +0800 +Subject: [PATCH] bugfix: fix some illegal memory access and memory leak + +Signed-off-by: yuxiating +Signed-off-by: jiangdongxu +Signed-off-by: Adttil +--- + util/range.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/util/range.c b/util/range.c +index f3f40098d5..2ea640662b 100644 +--- a/util/range.c ++++ b/util/range.c +@@ -61,6 +61,7 @@ GList *range_list_insert(GList *list, Range *data) + range_extend(l->data, l->next->data); + g_free(l->next->data); + new_l = g_list_delete_link(list, l->next); ++ l->next = NULL; + assert(new_l == list); + } + +-- +2.27.0 + diff --git a/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch b/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d45a21f04698d0c4def05fce0fbe86fc84b8cf0 --- /dev/null +++ b/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch @@ -0,0 +1,32 @@ +From 48a328ee1a5a71b7048e4591310471c759fc5af6 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 27 Jul 2020 20:39:07 +0800 +Subject: [PATCH] bugfix: irq: Avoid covering object refcount of qemu_irq + +Avoid covering object refcount of qemu_irq, otherwise it may causes +memory leak. + +Signed-off-by: Keqian Zhu +--- + hw/core/irq.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/core/irq.c b/hw/core/irq.c +index 3f14e2dda7..df9b5dac9b 100644 +--- a/hw/core/irq.c ++++ b/hw/core/irq.c +@@ -110,7 +110,10 @@ void qemu_irq_intercept_in(qemu_irq *gpio_in, qemu_irq_handler handler, int n) + int i; + qemu_irq *old_irqs = qemu_allocate_irqs(NULL, NULL, n); + for (i = 0; i < n; i++) { +- *old_irqs[i] = *gpio_in[i]; ++ old_irqs[i]->handler = gpio_in[i]->handler; ++ old_irqs[i]->opaque = gpio_in[i]->opaque; ++ old_irqs[i]->n = gpio_in[i]->n; ++ + gpio_in[i]->handler = handler; + gpio_in[i]->opaque = &old_irqs[i]; + } +-- +2.27.0 + diff --git a/build-smt-processor-structure-to-support-smt-topolog.patch b/build-smt-processor-structure-to-support-smt-topolog.patch deleted file mode 100644 index ed01d38bb17dd4931a1f23b5a21127a2c5dd0425..0000000000000000000000000000000000000000 --- a/build-smt-processor-structure-to-support-smt-topolog.patch +++ /dev/null @@ -1,104 +0,0 @@ -From af8740502815be450709e88df44ad322da2b071f Mon Sep 17 00:00:00 2001 -From: Henglong Fan -Date: Tue, 18 Aug 2020 21:42:33 +0800 -Subject: [PATCH] build smt processor structure to support smt topology - -if vcpu support smt, create new smt hierarchy according to -Processor Properties Topology Table(PPTT) in acpi spec 6.3. -Threads sharing a core must be grouped under a unique Processor -hierarchy node structure for each group of threads - -Signed-off-by: Henglong Fan ---- - hw/acpi/aml-build.c | 40 ++++++++++++++++++++++++++++++++-------- - 1 file changed, 32 insertions(+), 8 deletions(-) - -diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 74e95005..8a3b51c8 100644 ---- a/hw/acpi/aml-build.c -+++ b/hw/acpi/aml-build.c -@@ -53,7 +53,7 @@ static void build_append_array(GArray *array, GArray *val) - } - - /* -- * ACPI 6.2 Processor Properties Topology Table (PPTT) -+ * ACPI 6.3 Processor Properties Topology Table (PPTT) - */ - #ifdef __aarch64__ - static void build_cache_head(GArray *tbl, uint32_t next_level) -@@ -126,7 +126,7 @@ static void build_arm_socket_hierarchy(GArray *tbl, - build_append_int_noprefix(tbl, offset, 4); - } - --static void build_arm_cpu_hierarchy(GArray *tbl, -+static void build_arm_core_hierarchy(GArray *tbl, - struct offset_status *offset, uint32_t id) - { - if (!offset) { -@@ -144,18 +144,35 @@ static void build_arm_cpu_hierarchy(GArray *tbl, - build_append_int_noprefix(tbl, offset->l2_offset, 4); - } - -+static void build_arm_smt_hierarchy(GArray *tbl, -+ uint32_t offset, uint32_t id) -+{ -+ if (!offset) { -+ return; -+ } -+ build_append_byte(tbl, 0); /* Type 0 - processor */ -+ build_append_byte(tbl, 20); /* Length, add private resources */ -+ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ -+ build_append_int_noprefix(tbl, 14, 4); /* Valid id*/ -+ build_append_int_noprefix(tbl, offset, 4); -+ build_append_int_noprefix(tbl, id, 4); -+ build_append_int_noprefix(tbl, 0, 4); /* Num private resources */ -+} -+ - void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) - { - int pptt_start = table_data->len; -- int uid = 0, cpus = 0, socket; -+ int uid = 0, socket; -+ uint32_t core_offset; - struct offset_status offset; - const MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int smp_cores = ms->smp.cores; -+ unsigned int smp_sockets = ms->smp.cpus / (smp_cores * ms->smp.threads); - - acpi_data_push(table_data, sizeof(AcpiTableHeader)); - -- for (socket = 0; cpus < possible_cpus; socket++) { -- int core; -+ for (socket = 0; socket < smp_sockets; socket++) { -+ int core,thread; - uint32_t l3_offset = table_data->len - pptt_start; - build_cache_hierarchy(table_data, 0, ARM_L3_CACHE); - -@@ -169,14 +186,21 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) - build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1D_CACHE); - offset.l1i_offset = table_data->len - pptt_start; - build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1I_CACHE); -- build_arm_cpu_hierarchy(table_data, &offset, uid++); -- cpus++; -+ core_offset = table_data->len - pptt_start; -+ if (ms->smp.threads <= 1) { -+ build_arm_core_hierarchy(table_data, &offset, uid++); -+ } else { -+ build_arm_core_hierarchy(table_data, &offset, core); -+ for (thread = 0; thread < ms->smp.threads; thread++) { -+ build_arm_smt_hierarchy(table_data, core_offset, uid++); -+ } -+ } - } - } - - build_header(linker, table_data, - (void *)(table_data->data + pptt_start), "PPTT", -- table_data->len - pptt_start, 1, NULL, NULL); -+ table_data->len - pptt_start, 2, NULL, NULL); - } - - #else --- -2.23.0 - diff --git a/char-fix-use-after-free-with-dup-chardev-reconnect.patch b/char-fix-use-after-free-with-dup-chardev-reconnect.patch deleted file mode 100644 index fd81015a18beced443caef903d0ec1f2a1fd8850..0000000000000000000000000000000000000000 --- a/char-fix-use-after-free-with-dup-chardev-reconnect.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 902a8192600ff81681a162509e23bf95619d1f04 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 20 Apr 2020 13:20:12 +0200 -Subject: [PATCH] char: fix use-after-free with dup chardev & reconnect -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -With a reconnect socket, qemu_char_open() will start a background -thread. It should keep a reference on the chardev. - -Fixes invalid read: -READ of size 8 at 0x6040000ac858 thread T7 - #0 0x5555598d37b8 in unix_connect_saddr /home/elmarco/src/qq/util/qemu-sockets.c:954 - #1 0x5555598d4751 in socket_connect /home/elmarco/src/qq/util/qemu-sockets.c:1109 - #2 0x555559707c34 in qio_channel_socket_connect_sync /home/elmarco/src/qq/io/channel-socket.c:145 - #3 0x5555596adebb in tcp_chr_connect_client_task /home/elmarco/src/qq/chardev/char-socket.c:1104 - #4 0x555559723d55 in qio_task_thread_worker /home/elmarco/src/qq/io/task.c:123 - #5 0x5555598a6731 in qemu_thread_start /home/elmarco/src/qq/util/qemu-thread-posix.c:519 - #6 0x7ffff40d4431 in start_thread (/lib64/libpthread.so.0+0x9431) - #7 0x7ffff40029d2 in __clone (/lib64/libc.so.6+0x1019d2) - -Signed-off-by: Marc-André Lureau -Reviewed-by: Daniel P. Berrangé -Message-Id: <20200420112012.567284-1-marcandre.lureau@redhat.com> -Signed-off-by: Zhenyu Ye ---- - chardev/char-socket.c | 3 ++- - tests/test-char.c | 53 ++++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 54 insertions(+), 2 deletions(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 7ca5d97a..701b62f9 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -1118,7 +1118,8 @@ static void tcp_chr_connect_client_async(Chardev *chr) - */ - s->connect_task = qio_task_new(OBJECT(sioc), - qemu_chr_socket_connected, -- chr, NULL); -+ object_ref(OBJECT(chr)), -+ (GDestroyNotify)object_unref); - qio_task_run_in_thread(s->connect_task, - tcp_chr_connect_client_task, - s->addr, -diff --git a/tests/test-char.c b/tests/test-char.c -index f9440cdc..0e4069fb 100644 ---- a/tests/test-char.c -+++ b/tests/test-char.c -@@ -871,6 +871,53 @@ typedef struct { - } CharSocketClientTestConfig; - - -+static void char_socket_client_dupid_test(gconstpointer opaque) -+{ -+ const CharSocketClientTestConfig *config = opaque; -+ QIOChannelSocket *ioc; -+ char *optstr; -+ Chardev *chr1, *chr2; -+ SocketAddress *addr; -+ QemuOpts *opts; -+ Error *local_err = NULL; -+ -+ /* -+ * Setup a listener socket and determine get its address -+ * so we know the TCP port for the client later -+ */ -+ ioc = qio_channel_socket_new(); -+ g_assert_nonnull(ioc); -+ qio_channel_socket_listen_sync(ioc, config->addr, &error_abort); -+ addr = qio_channel_socket_get_local_address(ioc, &error_abort); -+ g_assert_nonnull(addr); -+ -+ /* -+ * Populate the chardev address based on what the server -+ * is actually listening on -+ */ -+ optstr = char_socket_addr_to_opt_str(addr, -+ config->fd_pass, -+ config->reconnect, -+ false); -+ -+ opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), -+ optstr, true); -+ g_assert_nonnull(opts); -+ chr1 = qemu_chr_new_from_opts(opts, NULL, &error_abort); -+ g_assert_nonnull(chr1); -+ -+ chr2 = qemu_chr_new_from_opts(opts, NULL, &local_err); -+ g_assert_null(chr2); -+ error_free_or_abort(&local_err); -+ -+ object_unref(OBJECT(ioc)); -+ qemu_opts_del(opts); -+ object_unparent(OBJECT(chr1)); -+ qapi_free_SocketAddress(addr); -+ g_free(optstr); -+} -+ -+ - static void char_socket_client_test(gconstpointer opaque) - { - const CharSocketClientTestConfig *config = opaque; -@@ -1425,6 +1472,8 @@ int main(int argc, char **argv) - { addr, NULL, false, true }; \ - CharSocketClientTestConfig client6 ## name = \ - { addr, NULL, true, true }; \ -+ CharSocketClientTestConfig client7 ## name = \ -+ { addr, ",reconnect=1", false, false }; \ - g_test_add_data_func("/char/socket/client/mainloop/" # name, \ - &client1 ##name, char_socket_client_test); \ - g_test_add_data_func("/char/socket/client/wait-conn/" # name, \ -@@ -1436,7 +1485,9 @@ int main(int argc, char **argv) - g_test_add_data_func("/char/socket/client/mainloop-fdpass/" # name, \ - &client5 ##name, char_socket_client_test); \ - g_test_add_data_func("/char/socket/client/wait-conn-fdpass/" # name, \ -- &client6 ##name, char_socket_client_test) -+ &client6 ##name, char_socket_client_test); \ -+ g_test_add_data_func("/char/socket/client/dupid-reconnect/" # name, \ -+ &client7 ##name, char_socket_client_dupid_test) - - SOCKET_SERVER_TEST(tcp, &tcpaddr); - SOCKET_CLIENT_TEST(tcp, &tcpaddr); --- -2.22.0.windows.1 - diff --git a/char-stdio-Restore-blocking-mode-of-stdout-on-exit.patch b/char-stdio-Restore-blocking-mode-of-stdout-on-exit.patch new file mode 100644 index 0000000000000000000000000000000000000000..1e5664ca182074dfda72806a439bca8eb95c89ab --- /dev/null +++ b/char-stdio-Restore-blocking-mode-of-stdout-on-exit.patch @@ -0,0 +1,54 @@ +From 5661b12a28b650226cca100aeddd92d5cc788153 Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 20:41:18 +0800 +Subject: [PATCH] char-stdio: Restore blocking mode of stdout on exit + +cherry picked from commit a0124e333e2176640f233e5ea57a2f413985d9b5 + +qemu_chr_open_fd() sets stdout into non-blocking mode. Restore the old +fd flags on exit to avoid breaking unsuspecting applications that run on +the same terminal after qemu and don't expect to get EAGAIN. + +While at at, also ensure term_exit is called once (at the moment it's +called both from char_stdio_finalize() and as the atexit() hook. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2423 +Signed-off-by: Maxim Mikityanskiy +Link: https://lore.kernel.org/r/20240703190812.3459514-1-m +Signed-off-by: Gao Jiazhen +--- + chardev/char-stdio.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/chardev/char-stdio.c b/chardev/char-stdio.c +index 3c648678ab..b960ddd4e4 100644 +--- a/chardev/char-stdio.c ++++ b/chardev/char-stdio.c +@@ -41,6 +41,7 @@ + /* init terminal so that we can grab keys */ + static struct termios oldtty; + static int old_fd0_flags; ++static int old_fd1_flags; + static bool stdio_in_use; + static bool stdio_allow_signal; + static bool stdio_echo_state; +@@ -50,6 +51,8 @@ static void term_exit(void) + if (stdio_in_use) { + tcsetattr(0, TCSANOW, &oldtty); + fcntl(0, F_SETFL, old_fd0_flags); ++ fcntl(1, F_SETFL, old_fd1_flags); ++ stdio_in_use = false; + } + } + +@@ -102,6 +105,7 @@ static void qemu_chr_open_stdio(Chardev *chr, + + stdio_in_use = true; + old_fd0_flags = fcntl(0, F_GETFL); ++ old_fd1_flags = fcntl(1, F_GETFL); + tcgetattr(0, &oldtty); + if (!g_unix_set_fd_nonblocking(0, true, NULL)) { + error_setg_errno(errp, errno, "Failed to set FD nonblocking"); +-- +2.41.0.windows.1 + diff --git a/chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch b/chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f494ea7f0f43d02b856ea9f1b3d7836a40e7d202 --- /dev/null +++ b/chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch @@ -0,0 +1,91 @@ +From 2d0d05b7d5925f71d7ddd4df9f1ac12add453298 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 7 Mar 2024 10:39:23 +0800 +Subject: [PATCH] chardev/char-socket: Fix TLS io channels sending too much + data to the backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 462945cd22d2bcd233401ed3aa167d83a8e35b05 + +Commit ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") +changed the behavior of the TLS io channels to schedule a second reading +attempt if there is still incoming data pending. This caused a regression +with backends like the sclpconsole that check in their read function that +the sender does not try to write more bytes to it than the device can +currently handle. + +The problem can be reproduced like this: + + 1) In one terminal, do this: + + mkdir qemu-pki + cd qemu-pki + openssl genrsa 2048 > ca-key.pem + openssl req -new -x509 -nodes -days 365000 -key ca-key.pem -out ca-cert.pem + # enter some dummy value for the cert + openssl genrsa 2048 > server-key.pem + openssl req -new -x509 -nodes -days 365000 -key server-key.pem \ + -out server-cert.pem + # enter some other dummy values for the cert + + gnutls-serv --echo --x509cafile ca-cert.pem --x509keyfile server-key.pem \ + --x509certfile server-cert.pem -p 8338 + + 2) In another terminal, do this: + + wget https://download.fedoraproject.org/pub/fedora-secondary/releases/39/Cloud/s390x/images/Fedora-Cloud-Base-39-1.5.s390x.qcow2 + + qemu-system-s390x -nographic -nodefaults \ + -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 \ + -object tls-creds-x509,id=tls0,endpoint=client,verify-peer=false,dir=$PWD/qemu-pki \ + -chardev socket,id=tls_chardev,host=localhost,port=8338,tls-creds=tls0 \ + -device sclpconsole,chardev=tls_chardev,id=tls_serial + +QEMU then aborts after a second or two with: + + qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion + `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. + Aborted (core dumped) + +It looks like the second read does not trigger the chr_can_read() function +to be called before the second read, which should normally always be done +before sending bytes to a character device to see how much it can handle, +so the s->max_size in tcp_chr_read() still contains the old value from the +previous read. Let's make sure that we use the up-to-date value by calling +tcp_chr_read_poll() again here. + +Fixes: ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") +Buglink: https://issues.redhat.com/browse/RHEL-24614 +Reviewed-by: "Daniel P. Berrangé" +Message-ID: <20240229104339.42574-1-thuth@redhat.com> +Reviewed-by: Antoine Damhet +Tested-by: Antoine Damhet +Reviewed-by: Marc-André Lureau +Signed-off-by: Thomas Huth +Signed-off-by: qihao_yewu +--- + chardev/char-socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 73947da188..034840593d 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) + s->max_size <= 0) { + return TRUE; + } +- len = sizeof(buf); +- if (len > s->max_size) { +- len = s->max_size; ++ len = tcp_chr_read_poll(opaque); ++ if (len > sizeof(buf)) { ++ len = sizeof(buf); + } + size = tcp_chr_recv(chr, (void *)buf, len); + if (size == 0 || (size == -1 && errno != EAGAIN)) { +-- +2.27.0 + diff --git a/chardev-tcp-Fix-error-message-double-free-error.patch b/chardev-tcp-Fix-error-message-double-free-error.patch deleted file mode 100644 index 175ddfe2dea85111016d162aa0cd95d79a49a492..0000000000000000000000000000000000000000 --- a/chardev-tcp-Fix-error-message-double-free-error.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 4488ab4700d344b049ddef808a64eda4b5867902 Mon Sep 17 00:00:00 2001 -From: lichun -Date: Mon, 22 Jun 2020 05:30:17 +0800 -Subject: [PATCH 06/11] chardev/tcp: Fix error message double free error - -Errors are already freed by error_report_err, so we only need to call -error_free when that function is not called. - -Cc: qemu-stable@nongnu.org -Signed-off-by: lichun -Message-Id: <20200621213017.17978-1-lichun@ruijie.com.cn> -Reviewed-by: Markus Armbruster -[Commit message improved, cc: qemu-stable] -Signed-off-by: Markus Armbruster -Signed-off-by: BiaoXiang Ye ---- - chardev/char-socket.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 701b62f9..9b06c8aa 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -141,6 +141,8 @@ static void check_report_connect_error(Chardev *chr, - error_report("Unable to connect character device %s: %s", - chr->label, error_get_pretty(err)); - s->connect_err_reported = true; -+ } else { -+ error_free(err); - } - qemu_chr_socket_restart_timer(chr); - } -@@ -1074,7 +1076,6 @@ static void qemu_chr_socket_connected(QIOTask *task, void *opaque) - if (qio_task_propagate_error(task, &err)) { - tcp_chr_change_state(s, TCP_CHARDEV_STATE_DISCONNECTED); - check_report_connect_error(chr, err); -- error_free(err); - goto cleanup; - } - --- -2.27.0.dirty - diff --git a/colo-compare-Fix-memory-leak-in-packet_enqueue.patch b/colo-compare-Fix-memory-leak-in-packet_enqueue.patch deleted file mode 100644 index ca5e43c49a6ad18fa7c6d204c1eabfac7ed6ddd5..0000000000000000000000000000000000000000 --- a/colo-compare-Fix-memory-leak-in-packet_enqueue.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 19afb1431bd730a1e4e09e3c0835c35572517268 Mon Sep 17 00:00:00 2001 -From: Derek Su -Date: Fri, 22 May 2020 15:53:57 +0800 -Subject: [PATCH 07/11] colo-compare: Fix memory leak in packet_enqueue() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The patch is to fix the "pkt" memory leak in packet_enqueue(). -The allocated "pkt" needs to be freed if the colo compare -primary or secondary queue is too big. - -Replace the error_report of full queue with a trace event. - -Signed-off-by: Derek Su -Reviewed-by: Zhang Chen -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Zhang Chen -Signed-off-by: Jason Wang -Signed-off-by: BiaoXiang Ye ---- - net/colo-compare.c | 23 +++++++++++++++-------- - net/trace-events | 1 + - 2 files changed, 16 insertions(+), 8 deletions(-) - -diff --git a/net/colo-compare.c b/net/colo-compare.c -index 7ee17f2c..3168407e 100644 ---- a/net/colo-compare.c -+++ b/net/colo-compare.c -@@ -120,6 +120,10 @@ enum { - SECONDARY_IN, - }; - -+static const char *colo_mode[] = { -+ [PRIMARY_IN] = "primary", -+ [SECONDARY_IN] = "secondary", -+}; - - static int compare_chr_send(CompareState *s, - const uint8_t *buf, -@@ -215,6 +219,7 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) - ConnectionKey key; - Packet *pkt = NULL; - Connection *conn; -+ int ret; - - if (mode == PRIMARY_IN) { - pkt = packet_new(s->pri_rs.buf, -@@ -243,16 +248,18 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) - } - - if (mode == PRIMARY_IN) { -- if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) { -- error_report("colo compare primary queue size too big," -- "drop packet"); -- } -+ ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack); - } else { -- if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) { -- error_report("colo compare secondary queue size too big," -- "drop packet"); -- } -+ ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack); - } -+ -+ if (!ret) { -+ trace_colo_compare_drop_packet(colo_mode[mode], -+ "queue size too big, drop packet"); -+ packet_destroy(pkt, NULL); -+ pkt = NULL; -+ } -+ - *con = conn; - - return 0; -diff --git a/net/trace-events b/net/trace-events -index ac570564..a9995387 100644 ---- a/net/trace-events -+++ b/net/trace-events -@@ -12,6 +12,7 @@ colo_proxy_main(const char *chr) ": %s" - - # colo-compare.c - colo_compare_main(const char *chr) ": %s" -+colo_compare_drop_packet(const char *queue, const char *chr) ": %s: %s" - colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d" - colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" - colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" --- -2.27.0.dirty - diff --git a/confidential-guest-support-introduce-ConfidentialGue.patch b/confidential-guest-support-introduce-ConfidentialGue.patch new file mode 100644 index 0000000000000000000000000000000000000000..95c1cc06e7d0c88affa517983d678e3b10119e04 --- /dev/null +++ b/confidential-guest-support-introduce-ConfidentialGue.patch @@ -0,0 +1,67 @@ +From da96618de3227b87ddd78388b80278bde230ce79 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 11:41:37 +0000 +Subject: [PATCH] confidential guest support: introduce + ConfidentialGuestMemoryEncryptionOps for encrypted VMs + +cherry-picked from https://github.com/AMDESE/qemu/commit/74fce7be9bd. + +When memory encryption is enabled in VM, the guest RAM will be encrypted +with the guest-specific key, to protect the confidentiality of data while +in transit we need to platform specific hooks to save or migrate the +guest RAM. + +Introduce the new ConfidentialGuestMemoryEncryptionOps in this patch +which will be later used by the encrypted guest for migration. + +Signed-off-by: Brijesh Singh +Co-developed-by: Ashish Kalra +Signed-off-by: Ashish Kalra +Signed-off-by: hanliyang +--- + include/exec/confidential-guest-support.h | 27 +++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index ba2dd4b5df..343f686fc2 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -53,8 +53,35 @@ struct ConfidentialGuestSupport { + bool ready; + }; + ++/** ++ * The functions registers with ConfidentialGuestMemoryEncryptionOps will be ++ * used during the encrypted guest migration. ++ */ ++struct ConfidentialGuestMemoryEncryptionOps { ++ /* Initialize the platform specific state before starting the migration */ ++ int (*save_setup)(const char *pdh, const char *plat_cert, ++ const char *amd_cert); ++ ++ /* Write the encrypted page and metadata associated with it */ ++ int (*save_outgoing_page)(QEMUFile *f, uint8_t *ptr, uint32_t size, ++ uint64_t *bytes_sent); ++ ++ /* Load the incoming encrypted page into guest memory */ ++ int (*load_incoming_page)(QEMUFile *f, uint8_t *ptr); ++ ++ /* Check if gfn is in shared/unencrypted region */ ++ bool (*is_gfn_in_unshared_region)(unsigned long gfn); ++ ++ /* Write the shared regions list */ ++ int (*save_outgoing_shared_regions_list)(QEMUFile *f); ++ ++ /* Load the shared regions list */ ++ int (*load_incoming_shared_regions_list)(QEMUFile *f); ++}; ++ + typedef struct ConfidentialGuestSupportClass { + ObjectClass parent; ++ struct ConfidentialGuestMemoryEncryptionOps *memory_encryption_ops; + } ConfidentialGuestSupportClass; + + #endif /* !CONFIG_USER_ONLY */ +-- +2.41.0.windows.1 + diff --git a/configure-Add-linux-header-compile-support-for-Loong.patch b/configure-Add-linux-header-compile-support-for-Loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..cc73eaf6537c25051767305ad40c3cd75dabb9b0 --- /dev/null +++ b/configure-Add-linux-header-compile-support-for-Loong.patch @@ -0,0 +1,40 @@ +From b21a705562867cc9dcbf0012ffa200caad8458ba Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Tue, 16 Jan 2024 09:39:52 +0800 +Subject: [PATCH] configure: Add linux header compile support for LoongArch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When compiling qemu with system KVM mode for LoongArch, header files +in directory linux-headers/asm-loongarch should be used firstly. +Otherwise it fails to find kvm.h on system with old glibc, since +latest kernel header files are not installed. + +This patch adds linux_arch definition for LoongArch system so that +header files in directory linux-headers/asm-loongarch can be included. + +Fixes: 714b03c125 ("target/loongarch: Add loongarch kvm into meson build") +Signed-off-by: Bibo Mao +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240116013952.264474-1-maobibo@loongson.cn> +Signed-off-by: Philippe Mathieu-Daudé +--- + configure | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configure b/configure +index bdda912f36..6036de83a4 100755 +--- a/configure ++++ b/configure +@@ -445,6 +445,7 @@ case "$cpu" in + loongarch*) + cpu=loongarch64 + host_arch=loongarch64 ++ linux_arch=loongarch + ;; + + mips64*) +-- +2.27.0 + diff --git a/configure-Add-uadk-option.patch b/configure-Add-uadk-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..2470452b57dc35399d440610892973ba1931ea9d --- /dev/null +++ b/configure-Add-uadk-option.patch @@ -0,0 +1,98 @@ +From 49db5292ea971c00a7e29eb6d20be24012c553bf Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:05 +0100 +Subject: [81/99] configure: Add uadk option + +commit cfc589a89b31930d9d658f4b0b6c4e6f33280e10 upstream. + +Add --enable-uadk and --disable-uadk options to enable and disable +UADK compression accelerator. This is for using UADK based hardware +accelerators for live migration. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + meson.build | 14 ++++++++++++++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 3 +++ + 3 files changed, 19 insertions(+) + +diff --git a/meson.build b/meson.build +index 888af7e099..e3599b9a09 100644 +--- a/meson.build ++++ b/meson.build +@@ -1049,6 +1049,18 @@ if not get_option('qpl').auto() or have_system + required: get_option('qpl'), + method: 'pkg-config') + endif ++uadk = not_found ++if not get_option('uadk').auto() or have_system ++ libwd = dependency('libwd', version: '>=2.6', ++ required: get_option('uadk'), ++ method: 'pkg-config') ++ libwd_comp = dependency('libwd_comp', version: '>=2.6', ++ required: get_option('uadk'), ++ method: 'pkg-config') ++ if libwd.found() and libwd_comp.found() ++ uadk = declare_dependency(dependencies: [libwd, libwd_comp]) ++ endif ++endif + virgl = not_found + + have_vhost_user_gpu = have_tools and targetos == 'linux' and pixman.found() +@@ -2288,6 +2300,7 @@ config_host_data.set('CONFIG_STATX', has_statx) + config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id) + config_host_data.set('CONFIG_ZSTD', zstd.found()) + config_host_data.set('CONFIG_QPL', qpl.found()) ++config_host_data.set('CONFIG_UADK', uadk.found()) + config_host_data.set('CONFIG_FUSE', fuse.found()) + config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found()) + config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) +@@ -4463,6 +4476,7 @@ summary_info += {'bzip2 support': libbzip2} + summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} + summary_info += {'Query Processing Library support': qpl} ++summary_info += {'UADK Library support': uadk} + summary_info += {'NUMA host support': numa} + summary_info += {'capstone': capstone} + summary_info += {'libpmem support': libpmem} +diff --git a/meson_options.txt b/meson_options.txt +index 82f73d51ce..709678fa18 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -261,6 +261,8 @@ option('zstd', type : 'feature', value : 'auto', + description: 'zstd compression support') + option('qpl', type : 'feature', value : 'auto', + description: 'Query Processing Library support') ++option('uadk', type : 'feature', value : 'auto', ++ description: 'UADK Library support') + option('fuse', type: 'feature', value: 'auto', + description: 'FUSE block device export') + option('fuse_lseek', type : 'feature', value : 'auto', +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 784f74fde9..833b996818 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -223,6 +223,7 @@ meson_options_help() { + printf "%s\n" ' xkbcommon xkbcommon support' + printf "%s\n" ' zstd zstd compression support' + printf "%s\n" ' qpl Query Processing Library support' ++ printf "%s\n" ' uadk UADK Library support' + } + _meson_option_parse() { + case $1 in +@@ -565,6 +566,8 @@ _meson_option_parse() { + --disable-zstd) printf "%s" -Dzstd=disabled ;; + --enable-qpl) printf "%s" -Dqpl=enabled ;; + --disable-qpl) printf "%s" -Dqpl=disabled ;; ++ --enable-uadk) printf "%s" -Duadk=enabled ;; ++ --disable-uadk) printf "%s" -Duadk=disabled ;; + *) return 1 ;; + esac + } +-- +2.33.0 + diff --git a/configure-Enable-test-and-libs-for-zstd.patch b/configure-Enable-test-and-libs-for-zstd.patch deleted file mode 100644 index bf900cf343d4d77c997d203ffcd12a8dc6c434fb..0000000000000000000000000000000000000000 --- a/configure-Enable-test-and-libs-for-zstd.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 5a79ccd388ee09dc1db93d26791d1e4a6b2ced47 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 3 Feb 2021 17:33:44 +0800 -Subject: [PATCH] configure: Enable test and libs for zstd - -configure: Enable test and libs for zstd -Add it to several build systems to make testing good. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert - -Signed-off-by: Jiajie Li ---- - .gitlab-ci.yml | 1 + - .travis.yml | 1 + - configure | 30 ++++++++++++++++++++++++++++++ - 3 files changed, 32 insertions(+) - -diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml -index c63bf2f822..3d9b7f9262 100644 ---- a/.gitlab-ci.yml -+++ b/.gitlab-ci.yml -@@ -16,6 +16,7 @@ build-system2: - script: - - apt-get install -y -qq libsdl2-dev libgcrypt-dev libbrlapi-dev libaio-dev - libfdt-dev liblzo2-dev librdmacm-dev libibverbs-dev libibumad-dev -+ libzstd-dev - - ./configure --enable-werror --target-list="tricore-softmmu unicore32-softmmu - microblaze-softmmu mips-softmmu riscv32-softmmu s390x-softmmu sh4-softmmu - sparc64-softmmu x86_64-softmmu xtensa-softmmu nios2-softmmu or1k-softmmu" -diff --git a/.travis.yml b/.travis.yml -index caf0a1f8fa..f3fe04fba9 100644 ---- a/.travis.yml -+++ b/.travis.yml -@@ -35,6 +35,7 @@ addons: - - liburcu-dev - - libusb-1.0-0-dev - - libvte-2.91-dev -+ - libzstd-dev - - sparse - - uuid-dev - - gcovr -diff --git a/configure b/configure -index 714e7fb6a1..577533e9ed 100755 ---- a/configure -+++ b/configure -@@ -446,6 +446,7 @@ lzo="" - snappy="" - bzip2="" - lzfse="" -+zstd="" - guest_agent="" - guest_agent_with_vss="no" - guest_agent_ntddscsi="no" -@@ -1358,6 +1359,10 @@ for opt do - ;; - --disable-lzfse) lzfse="no" - ;; -+ --disable-zstd) zstd="no" -+ ;; -+ --enable-zstd) zstd="yes" -+ ;; - --enable-guest-agent) guest_agent="yes" - ;; - --disable-guest-agent) guest_agent="no" -@@ -1812,6 +1817,8 @@ disabled with --disable-FEATURE, default is enabled if available: - (for reading bzip2-compressed dmg images) - lzfse support of lzfse compression library - (for reading lzfse-compressed dmg images) -+ zstd support for zstd compression library -+ (for migration compression) - seccomp seccomp support - coroutine-pool coroutine freelist (better performance) - glusterfs GlusterFS backend -@@ -2407,6 +2414,24 @@ EOF - fi - fi - -+########################################## -+# zstd check -+ -+if test "$zstd" != "no" ; then -+ if $pkg_config --exist libzstd ; then -+ zstd_cflags="$($pkg_config --cflags libzstd)" -+ zstd_libs="$($pkg_config --libs libzstd)" -+ LIBS="$zstd_libs $LIBS" -+ QEMU_CFLAGS="$QEMU_CFLAGS $zstd_cflags" -+ zstd="yes" -+ else -+ if test "$zstd" = "yes" ; then -+ feature_not_found "libzstd" "Install libzstd devel" -+ fi -+ zstd="no" -+ fi -+fi -+ - ########################################## - # libseccomp check - -@@ -6460,6 +6485,7 @@ echo "lzo support $lzo" - echo "snappy support $snappy" - echo "bzip2 support $bzip2" - echo "lzfse support $lzfse" -+echo "zstd support $zstd" - echo "NUMA host support $numa" - echo "libxml2 $libxml2" - echo "tcmalloc support $tcmalloc" -@@ -7024,6 +7050,10 @@ if test "$lzfse" = "yes" ; then - echo "LZFSE_LIBS=-llzfse" >> $config_host_mak - fi - -+if test "$zstd" = "yes" ; then -+ echo "CONFIG_ZSTD=y" >> $config_host_mak -+fi -+ - if test "$libiscsi" = "yes" ; then - echo "CONFIG_LIBISCSI=m" >> $config_host_mak - echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak --- -2.27.0 - diff --git a/configure-add-enable-qpl-build-option.patch b/configure-add-enable-qpl-build-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..6eb1fb500a564d998a7424e773c664527f3d4699 --- /dev/null +++ b/configure-add-enable-qpl-build-option.patch @@ -0,0 +1,100 @@ +From e75b4a4c735e07431d02dd85002f8175cfbd5db3 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:06 +0800 +Subject: [74/99] configure: add --enable-qpl build option + +commit b844a2c7cc7f7c7756a27d372e64f6688d67c4eb upstream. + +add --enable-qpl and --disable-qpl options to enable and disable +the QPL compression method for multifd migration. + +The Query Processing Library (QPL) is an open-source library +that supports data compression and decompression features. It +is based on the deflate compression algorithm and use Intel +In-Memory Analytics Accelerator(IAA) hardware for compression +and decompression acceleration. + +For more live migration with IAA, please refer to the document +docs/devel/migration/qpl-compression.rst + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + meson.build | 8 ++++++++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 3 +++ + 3 files changed, 13 insertions(+) + +diff --git a/meson.build b/meson.build +index aea6a33ca3..888af7e099 100644 +--- a/meson.build ++++ b/meson.build +@@ -1043,6 +1043,12 @@ if not get_option('zstd').auto() or have_block + required: get_option('zstd'), + method: 'pkg-config') + endif ++qpl = not_found ++if not get_option('qpl').auto() or have_system ++ qpl = dependency('qpl', version: '>=1.5.0', ++ required: get_option('qpl'), ++ method: 'pkg-config') ++endif + virgl = not_found + + have_vhost_user_gpu = have_tools and targetos == 'linux' and pixman.found() +@@ -2281,6 +2287,7 @@ config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim) + config_host_data.set('CONFIG_STATX', has_statx) + config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id) + config_host_data.set('CONFIG_ZSTD', zstd.found()) ++config_host_data.set('CONFIG_QPL', qpl.found()) + config_host_data.set('CONFIG_FUSE', fuse.found()) + config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found()) + config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) +@@ -4455,6 +4462,7 @@ summary_info += {'snappy support': snappy} + summary_info += {'bzip2 support': libbzip2} + summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} ++summary_info += {'Query Processing Library support': qpl} + summary_info += {'NUMA host support': numa} + summary_info += {'capstone': capstone} + summary_info += {'libpmem support': libpmem} +diff --git a/meson_options.txt b/meson_options.txt +index cf9706c411..82f73d51ce 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -259,6 +259,8 @@ option('xkbcommon', type : 'feature', value : 'auto', + description: 'xkbcommon support') + option('zstd', type : 'feature', value : 'auto', + description: 'zstd compression support') ++option('qpl', type : 'feature', value : 'auto', ++ description: 'Query Processing Library support') + option('fuse', type: 'feature', value: 'auto', + description: 'FUSE block device export') + option('fuse_lseek', type : 'feature', value : 'auto', +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 680fa3f581..784f74fde9 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -222,6 +222,7 @@ meson_options_help() { + printf "%s\n" ' Xen PCI passthrough support' + printf "%s\n" ' xkbcommon xkbcommon support' + printf "%s\n" ' zstd zstd compression support' ++ printf "%s\n" ' qpl Query Processing Library support' + } + _meson_option_parse() { + case $1 in +@@ -562,6 +563,8 @@ _meson_option_parse() { + --disable-xkbcommon) printf "%s" -Dxkbcommon=disabled ;; + --enable-zstd) printf "%s" -Dzstd=enabled ;; + --disable-zstd) printf "%s" -Dzstd=disabled ;; ++ --enable-qpl) printf "%s" -Dqpl=enabled ;; ++ --disable-qpl) printf "%s" -Dqpl=disabled ;; + *) return 1 ;; + esac + } +-- +2.33.0 + diff --git a/contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/contrib-libvhost-user-Protect-slave-fd-with-mutex.patch deleted file mode 100644 index 44fc4283ca652bb9743c186b2127f073e32b5043..0000000000000000000000000000000000000000 --- a/contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +++ /dev/null @@ -1,121 +0,0 @@ -From f076af734a5964c3e48b2d223130f855b86f40e5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 1 Mar 2019 11:18:30 +0000 -Subject: [PATCH] contrib/libvhost-user: Protect slave fd with mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -In future patches we'll be performing commands on the slave-fd driven -by commands on queues, since those queues will be driven by individual -threads we need to make sure they don't attempt to use the slave-fd -for multiple commands in parallel. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert ---- - contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 23 insertions(+), 4 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index cb5f5770e4..fb75837032 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -387,26 +387,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) - return vu_message_write(dev, conn_fd, vmsg); - } - -+/* -+ * Processes a reply on the slave channel. -+ * Entered with slave_mutex held and releases it before exit. -+ * Returns true on success. -+ */ - static bool - vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) - { - VhostUserMsg msg_reply; -+ bool result = false; - - if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { -- return true; -+ result = true; -+ goto out; - } - - if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { -- return false; -+ goto out; - } - - if (msg_reply.request != vmsg->request) { - DPRINT("Received unexpected msg type. Expected %d received %d", - vmsg->request, msg_reply.request); -- return false; -+ goto out; - } - -- return msg_reply.payload.u64 == 0; -+ result = msg_reply.payload.u64 == 0; -+ -+out: -+ pthread_mutex_unlock(&dev->slave_mutex); -+ return result; - } - - /* Kick the log_call_fd if required. */ -@@ -1102,10 +1113,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, - return false; - } - -+ pthread_mutex_lock(&dev->slave_mutex); - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { -+ pthread_mutex_unlock(&dev->slave_mutex); - return false; - } - -+ /* Also unlocks the slave_mutex */ - return vu_process_message_reply(dev, &vmsg); - } - -@@ -1625,6 +1639,7 @@ vu_deinit(VuDev *dev) - close(dev->slave_fd); - dev->slave_fd = -1; - } -+ pthread_mutex_destroy(&dev->slave_mutex); - - if (dev->sock != -1) { - close(dev->sock); -@@ -1660,6 +1675,7 @@ vu_init(VuDev *dev, - dev->remove_watch = remove_watch; - dev->iface = iface; - dev->log_call_fd = -1; -+ pthread_mutex_init(&dev->slave_mutex, NULL); - dev->slave_fd = -1; - dev->max_queues = max_queues; - -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 46b600799b..1844b6f8d4 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include "standard-headers/linux/virtio_ring.h" - - /* Based on qemu/hw/virtio/vhost-user.c */ -@@ -355,6 +356,8 @@ struct VuDev { - VuVirtq *vq; - VuDevInflightInfo inflight_info; - int log_call_fd; -+ /* Must be held while using slave_fd */ -+ pthread_mutex_t slave_mutex; - int slave_fd; - uint64_t log_size; - uint8_t *log_table; --- -2.27.0 - diff --git a/contrib-plugins-add-compat-for-g_memdup2.patch b/contrib-plugins-add-compat-for-g_memdup2.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e8b866405c538ed1ade975aa1663a106cf628e4 --- /dev/null +++ b/contrib-plugins-add-compat-for-g_memdup2.patch @@ -0,0 +1,62 @@ +From 84321dcfb4ec3d08984e7680c8efad80907bde84 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Alex=20Benn=C3=A9e?= +Date: Mon, 29 Jul 2024 15:44:13 +0100 +Subject: [PATCH] contrib/plugins: add compat for g_memdup2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We were premature if bumping this because some of our builds are still +on older glibs. Just copy the compat handler for now and we can remove +it later. + +Fixes: ee293103b0 (plugins: update lockstep to use g_memdup2) +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2161 +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Alex Bennée +Message-Id: <20240729144414.830369-14-alex.bennee@linaro.org> +(cherry picked from commit 44e794896759236885f6d30d1f6b9b8b76355d52) +Signed-off-by: zhujun2 +--- + contrib/plugins/lockstep.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c +index 237543b43a..0c6f060183 100644 +--- a/contrib/plugins/lockstep.c ++++ b/contrib/plugins/lockstep.c +@@ -100,6 +100,31 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) + plugin_cleanup(id); + } + ++/* ++ * g_memdup has been deprecated in Glib since 2.68 and ++ * will complain about it if you try to use it. However until ++ * glib_req_ver for QEMU is bumped we make a copy of the glib-compat ++ * handler. ++ */ ++static inline gpointer g_memdup2_qemu(gconstpointer mem, gsize byte_size) ++{ ++#if GLIB_CHECK_VERSION(2, 68, 0) ++ return g_memdup2(mem, byte_size); ++#else ++ gpointer new_mem; ++ ++ if (mem && byte_size != 0) { ++ new_mem = g_malloc(byte_size); ++ memcpy(new_mem, mem, byte_size); ++ } else { ++ new_mem = NULL; ++ } ++ ++ return new_mem; ++#endif ++} ++#define g_memdup2(m, s) g_memdup2_qemu(m, s) ++ + static void report_divergance(ExecState *us, ExecState *them) + { + DivergeState divrec = { log, 0 }; +-- +2.41.0.windows.1 + diff --git a/coro-support-live-patch-for-libcare.patch b/coro-support-live-patch-for-libcare.patch new file mode 100644 index 0000000000000000000000000000000000000000..71b83c2df433f74db003e6ceee10a067f7db39af --- /dev/null +++ b/coro-support-live-patch-for-libcare.patch @@ -0,0 +1,116 @@ +From c2b377814e7874811d7eb98462d5153e966281cf Mon Sep 17 00:00:00 2001 +From: Fei Xu +Date: Wed, 3 Apr 2024 18:05:25 +0800 +Subject: [PATCH] coro: support live patch for libcare + +Signed-off-by: Dawei Jiang +--- + include/qemu/coroutine_int.h | 3 ++- + util/coroutine-ucontext.c | 52 ++++++++++++++++++++++++++++++++++++ + util/qemu-coroutine.c | 4 +++ + 3 files changed, 58 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h +index 1da148552f..11b550a0fc 100644 +--- a/include/qemu/coroutine_int.h ++++ b/include/qemu/coroutine_int.h +@@ -73,5 +73,6 @@ Coroutine *qemu_coroutine_new(void); + void qemu_coroutine_delete(Coroutine *co); + CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, + CoroutineAction action); +- ++void qemu_coroutine_info_add(const Coroutine *co_); ++void qemu_coroutine_info_delete(const Coroutine *co_); + #endif +diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c +index 7b304c79d9..650c21846d 100644 +--- a/util/coroutine-ucontext.c ++++ b/util/coroutine-ucontext.c +@@ -80,6 +80,19 @@ union cc_arg { + int i[2]; + }; + ++/** ++ * coroutines list for libcare ++ */ ++struct CoroutineInformation { ++ sigjmp_buf *env; ++ QLIST_ENTRY(CoroutineInformation) next; ++}; ++ ++static QemuMutex coro_mtx; ++QLIST_HEAD(, CoroutineInformation) coro_info_list = QLIST_HEAD_INITIALIZER(pool); ++int coro_env_offset = offsetof(struct CoroutineInformation, env); ++int coro_next_offset = offsetof(struct CoroutineInformation, next); ++ + /* + * QEMU_ALWAYS_INLINE only does so if __OPTIMIZE__, so we cannot use it. + * always_inline is required to avoid TSan runtime fatal errors. +@@ -340,3 +353,42 @@ bool qemu_in_coroutine(void) + + return self && self->caller; + } ++ ++static void __attribute__((constructor)) coro_mutex_init(void) ++{ ++ qemu_mutex_init(&coro_mtx); ++} ++ ++void qemu_coroutine_info_add(const Coroutine *co_) ++{ ++ CoroutineUContext *co; ++ struct CoroutineInformation *coro_info; ++ ++ /* save coroutine env to coro_info_list */ ++ co = DO_UPCAST(CoroutineUContext, base, co_); ++ coro_info = g_malloc0(sizeof(struct CoroutineInformation)); ++ coro_info->env = &co->env; ++ ++ qemu_mutex_lock(&coro_mtx); ++ QLIST_INSERT_HEAD(&coro_info_list, coro_info, next); ++ qemu_mutex_unlock(&coro_mtx); ++} ++ ++void qemu_coroutine_info_delete(const Coroutine *co_) ++{ ++ CoroutineUContext *co; ++ struct CoroutineInformation *coro_info; ++ ++ /* Remove relative coroutine env info from coro_info_list */ ++ co = DO_UPCAST(CoroutineUContext, base, co_); ++ ++ qemu_mutex_lock(&coro_mtx); ++ QLIST_FOREACH(coro_info, &coro_info_list, next) { ++ if (coro_info->env == &co->env) { ++ QLIST_REMOVE(coro_info, next); ++ g_free(coro_info); ++ break; ++ } ++ } ++ qemu_mutex_unlock(&coro_mtx); ++} +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 5fd2dbaf8b..f550214484 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -89,6 +89,8 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + co = qemu_coroutine_new(); + } + ++ qemu_coroutine_info_add(co); ++ + co->entry = entry; + co->entry_arg = opaque; + QSIMPLEQ_INIT(&co->co_queue_wakeup); +@@ -99,6 +101,8 @@ static void coroutine_delete(Coroutine *co) + { + co->caller = NULL; + ++ qemu_coroutine_info_delete(co); ++ + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { + if (release_pool_size < qatomic_read(&pool_max_size) * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); +-- +2.27.0 + diff --git a/coroutine-Add-qemu_co_mutex_assert_locked.patch b/coroutine-Add-qemu_co_mutex_assert_locked.patch deleted file mode 100644 index fb1f2589f3edd987f0311288d049951c726ddeb8..0000000000000000000000000000000000000000 --- a/coroutine-Add-qemu_co_mutex_assert_locked.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e9bb3d942e268a19e03fc5d404586d2ed1564282 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 24 Oct 2019 16:26:57 +0200 -Subject: [PATCH] coroutine: Add qemu_co_mutex_assert_locked() - -Some functions require that the caller holds a certain CoMutex for them -to operate correctly. Add a function so that they can assert the lock is -really held. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Tested-by: Michael Weiser -Reviewed-by: Michael Weiser -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Denis V. Lunev -Reviewed-by: Max Reitz -(cherry picked from commit 944f3d5dd216fcd8cb007eddd4f82dced0a15b3d) -Signed-off-by: Michael Roth ---- - include/qemu/coroutine.h | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h -index 9801e7f5a4..f4843b5f59 100644 ---- a/include/qemu/coroutine.h -+++ b/include/qemu/coroutine.h -@@ -167,6 +167,21 @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); - */ - void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); - -+/** -+ * Assert that the current coroutine holds @mutex. -+ */ -+static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex) -+{ -+ /* -+ * mutex->holder doesn't need any synchronisation if the assertion holds -+ * true because the mutex protects it. If it doesn't hold true, we still -+ * don't mind if another thread takes or releases mutex behind our back, -+ * because the condition will be false no matter whether we read NULL or -+ * the pointer for any other coroutine. -+ */ -+ assert(atomic_read(&mutex->locked) && -+ mutex->holder == qemu_coroutine_self()); -+} - - /** - * CoQueues are a mechanism to queue coroutines in order to continue executing --- -2.23.0 diff --git a/cpu-add-Cortex-A72-processor-kvm-target-support.patch b/cpu-add-Cortex-A72-processor-kvm-target-support.patch index a310c37de95cb5fac446418afcd1efe3650180af..2e35603acb11d3b44e0df0faa1e46e018aea0051 100644 --- a/cpu-add-Cortex-A72-processor-kvm-target-support.patch +++ b/cpu-add-Cortex-A72-processor-kvm-target-support.patch @@ -1,42 +1,44 @@ -From 4304d1de2c790ac75ed2f5984c4a3a2760c08fff Mon Sep 17 00:00:00 2001 +From 5853333c9513caea541701c95a4ac691bb97452f Mon Sep 17 00:00:00 2001 From: Xu Yandong -Date: Mon, 23 Sep 2019 14:35:25 +0800 +Date: Tue, 19 Mar 2024 10:45:56 +0800 Subject: [PATCH] cpu: add Cortex-A72 processor kvm target support The ARM Cortex-A72 is ARMv8-A micro-architecture, add kvm target to ARM Cortex-A72 processor definition. Signed-off-by: Xu Yandong +Signed-off-by: Mingwang Li +Signed-off-by: Yuan Zhang --- target/arm/cpu64.c | 2 +- target/arm/kvm-consts.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index e408f50d..afbfd1ec 100644 +index 922eac3b61..471014b5a9 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c -@@ -212,6 +212,7 @@ static void aarch64_a72_initfn(Object *obj) +@@ -710,6 +710,7 @@ static void aarch64_a72_initfn(Object *obj) ARMCPU *cpu = ARM_CPU(obj); cpu->dtb_compatible = "arm,cortex-a72"; + cpu->kvm_target = QEMU_KVM_ARM_TARGET_GENERIC_V8; set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_VFP4); set_feature(&cpu->env, ARM_FEATURE_NEON); -@@ -276,7 +277,6 @@ static void aarch64_kunpeng_t82_initfn(Object *obj) - cpu->id_aa64dfr0 = 0x110305408; + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); +@@ -773,7 +774,6 @@ static void aarch64_kunpeng_920_initfn(Object *obj) + cpu->isar.id_aa64dfr0 = 0x110305408; cpu->isar.id_aa64isar0 = 0x10211120; cpu->isar.id_aa64mmfr0 = 0x101125; - cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; } - static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, + static void aarch64_host_initfn(Object *obj) diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h -index aad28258..b7dac596 100644 +index 7c6adc14f6..c034823170 100644 --- a/target/arm/kvm-consts.h +++ b/target/arm/kvm-consts.h -@@ -130,6 +130,8 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED); +@@ -133,6 +133,8 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED); #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2 #define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3 #define QEMU_KVM_ARM_TARGET_CORTEX_A53 4 @@ -45,14 +47,14 @@ index aad28258..b7dac596 100644 /* There's no kernel define for this: sentinel value which * matches no KVM target value for either 64 or 32 bit -@@ -142,6 +144,7 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8); +@@ -144,6 +146,7 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8); MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57); MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA); MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53); +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_GENERIC_V8, KVM_ARM_TARGET_GENERIC_V8); - #else - MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15); - MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7); + + #define CP_REG_ARM64 0x6000000000000000ULL + #define CP_REG_ARM_COPROC_MASK 0x000000000FFF0000 -- -2.23.0 +2.27.0 diff --git a/cpu-add-Kunpeng-920-cpu-support.patch b/cpu-add-Kunpeng-920-cpu-support.patch index 74e27645ac18a07c00a04283f428bec515cf259e..fc9c4cc8f35dc3b53cf64f7f47b1135e9dc197e3 100644 --- a/cpu-add-Kunpeng-920-cpu-support.patch +++ b/cpu-add-Kunpeng-920-cpu-support.patch @@ -1,66 +1,120 @@ -From 70063948181062161a341a8738a53708d8ed0a0b Mon Sep 17 00:00:00 2001 +From e4ae54316651bf6af12de263da158c5ec4ed0401 Mon Sep 17 00:00:00 2001 From: Xu Yandong -Date: Wed, 28 Aug 2019 01:36:21 -0400 +Date: Mon, 18 Mar 2024 17:31:31 +0800 Subject: [PATCH] cpu: add Kunpeng-920 cpu support -Add the Kunpeng-920 CPU model. +Add the Kunpeng-920 CPU model Signed-off-by: Xu Yandong +Signed-off-by: Mingwang Li +Signed-off-by: Yuan Zhang --- hw/arm/virt.c | 1 + - target/arm/cpu64.c | 21 +++++++++++++++++++++ - 2 files changed, 22 insertions(+) + target/arm/cpu64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 73 insertions(+) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index f89757df..11468b72 100644 +index be2856c018..500a15aa5b 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -179,6 +179,7 @@ static const char *valid_cpus[] = { +@@ -220,6 +220,7 @@ static const char *valid_cpus[] = { + #endif ARM_CPU_TYPE_NAME("cortex-a53"), ARM_CPU_TYPE_NAME("cortex-a57"), - ARM_CPU_TYPE_NAME("cortex-a72"), + ARM_CPU_TYPE_NAME("Kunpeng-920"), ARM_CPU_TYPE_NAME("host"), ARM_CPU_TYPE_NAME("max"), }; diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 228906f2..5581d5e1 100644 +index 1e9c6c85ae..922eac3b61 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c -@@ -258,6 +258,26 @@ static void aarch64_a72_initfn(Object *obj) - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); +@@ -705,6 +705,77 @@ static void aarch64_a53_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); } - + ++static void aarch64_a72_initfn(Object *obj) ++{ ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ cpu->dtb_compatible = "arm,cortex-a72"; ++ set_feature(&cpu->env, ARM_FEATURE_V8); ++ set_feature(&cpu->env, ARM_FEATURE_NEON); ++ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); ++ set_feature(&cpu->env, ARM_FEATURE_AARCH64); ++ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); ++ set_feature(&cpu->env, ARM_FEATURE_EL2); ++ set_feature(&cpu->env, ARM_FEATURE_EL3); ++ set_feature(&cpu->env, ARM_FEATURE_PMU); ++ cpu->midr = 0x410fd083; ++ cpu->revidr = 0x00000000; ++ cpu->reset_fpsid = 0x41034080; ++ cpu->isar.mvfr0 = 0x10110222; ++ cpu->isar.mvfr1 = 0x12111111; ++ cpu->isar.mvfr2 = 0x00000043; ++ cpu->ctr = 0x8444c004; ++ cpu->reset_sctlr = 0x00c50838; ++ cpu->isar.id_pfr0 = 0x00000131; ++ cpu->isar.id_pfr1 = 0x00011011; ++ cpu->isar.id_dfr0 = 0x03010066; ++ cpu->id_afr0 = 0x00000000; ++ cpu->isar.id_mmfr0 = 0x10201105; ++ cpu->isar.id_mmfr1 = 0x40000000; ++ cpu->isar.id_mmfr2 = 0x01260000; ++ cpu->isar.id_mmfr3 = 0x02102211; ++ cpu->isar.id_isar0 = 0x02101110; ++ cpu->isar.id_isar1 = 0x13112111; ++ cpu->isar.id_isar2 = 0x21232042; ++ cpu->isar.id_isar3 = 0x01112131; ++ cpu->isar.id_isar4 = 0x00011142; ++ cpu->isar.id_isar5 = 0x00011121; ++ cpu->isar.id_aa64pfr0 = 0x00002222; ++ cpu->isar.id_aa64dfr0 = 0x10305106; ++ cpu->isar.id_aa64isar0 = 0x00011120; ++ cpu->isar.id_aa64mmfr0 = 0x00001124; ++ cpu->isar.dbgdidr = 0x3516d000; ++ cpu->clidr = 0x0a200023; ++ cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ ++ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ ++ cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ ++ cpu->dcz_blocksize = 4; /* 64 bytes */ ++ cpu->gic_num_lrs = 4; ++ cpu->gic_vpribits = 5; ++ cpu->gic_vprebits = 5; ++ define_cortex_a72_a57_a53_cp_reginfo(cpu); ++} ++ +static void aarch64_kunpeng_920_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + /* + * Hisilicon Kunpeng-920 CPU is similar to cortex-a72, -+ * so first initialize cpu data as cortex-a72 CPU, -+ * and then update the special registers. ++ * so first initialize cpu data as cortex-a72, ++ * and then update the special register. + */ + aarch64_a72_initfn(obj); + + cpu->midr = 0x480fd010; + cpu->ctr = 0x84448004; + cpu->isar.id_aa64pfr0 = 0x11001111; -+ cpu->id_aa64dfr0 = 0x110305408; ++ cpu->isar.id_aa64dfr0 = 0x110305408; + cpu->isar.id_aa64isar0 = 0x10211120; + cpu->isar.id_aa64mmfr0 = 0x101125; + cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; +} + - static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) + static void aarch64_host_initfn(Object *obj) { -@@ -388,6 +408,7 @@ static const ARMCPUInfo aarch64_cpus[] = { + #if defined(CONFIG_KVM) +@@ -744,6 +815,7 @@ static void aarch64_max_initfn(Object *obj) + static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, -+ { .name = "Kunpeng-920", .initfn = aarch64_kunpeng_920_initfn }, ++ { .name = "Kunpeng-920", .initfn = aarch64_kunpeng_920_initfn}, { .name = "max", .initfn = aarch64_max_initfn }, - { .name = NULL } - }; --- -2.19.1 + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +-- +2.27.0 + diff --git a/cpu-ensure-we-don-t-call-start_exclusive-from-cpu_ex.patch b/cpu-ensure-we-don-t-call-start_exclusive-from-cpu_ex.patch new file mode 100644 index 0000000000000000000000000000000000000000..b50c17cfc6ea637e8ec469059f41b63c2b913646 --- /dev/null +++ b/cpu-ensure-we-don-t-call-start_exclusive-from-cpu_ex.patch @@ -0,0 +1,37 @@ +From c5b349f9ff0792cce72cdd1ade2521c568058a25 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 18 Nov 2024 14:20:56 -0500 +Subject: [PATCH] cpu: ensure we don't call start_exclusive from cpu_exec +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 779f30a01af8566780cefc8639505b758950afb3 + +Reviewed-by: Richard Henderson +Signed-off-by: Pierrick Bouvier +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20241025175857.2554252-3-pierrick.bouvier@linaro.org> +Signed-off-by: Richard Henderson +Signed-off-by: qihao_yewu +--- + cpu-common.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/cpu-common.c b/cpu-common.c +index 54e63b3f77..a949ad7ca3 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -234,6 +234,9 @@ void start_exclusive(void) + CPUState *other_cpu; + int running_cpus; + ++ /* Ensure we are not running, or start_exclusive will be blocked. */ ++ g_assert(!current_cpu->running); ++ + if (current_cpu->exclusive_context_count) { + current_cpu->exclusive_context_count++; + return; +-- +2.41.0.windows.1 + diff --git a/cpu-features-fix-bug-for-memory-leakage.patch b/cpu-features-fix-bug-for-memory-leakage.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e6793d462ca876a46fd6f377e4f7dd896d48e06 --- /dev/null +++ b/cpu-features-fix-bug-for-memory-leakage.patch @@ -0,0 +1,25 @@ +From 9ebad9c3020625df0a178e6a2d06eaae15ef767c Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 12:51:19 +0800 +Subject: [PATCH] cpu/features: fix bug for memory leakage + +strList hash not free after used, Fix it. +--- + target/i386/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index fc61a84b1e..f94405c02b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5475,6 +5475,7 @@ static void x86_cpu_get_unavailable_features(Object *obj, Visitor *v, + + x86_cpu_list_feature_names(xc->filtered_features, &result); + visit_type_strList(v, "unavailable-features", &result, errp); ++ qapi_free_strList(result); + } + + /* Print all cpuid feature names in featureset +-- +2.27.0 + diff --git a/cpu-parse-feature-to-avoid-failure.patch b/cpu-parse-feature-to-avoid-failure.patch deleted file mode 100644 index 78178bfa3dd2a9dd1413dd3c12bbc8e6ed6d2869..0000000000000000000000000000000000000000 --- a/cpu-parse-feature-to-avoid-failure.patch +++ /dev/null @@ -1,75 +0,0 @@ -From ba1ca232cfa2ca273c610beda40bee2143f11964 Mon Sep 17 00:00:00 2001 -From: Xu Yandong -Date: Tue, 3 Sep 2019 16:27:39 +0800 -Subject: [PATCH] cpu: parse +/- feature to avoid failure - -To avoid cpu feature parse failuer, +/- feature is added. - -Signed-off-by: Xu Yandong ---- - target/arm/cpu64.c | 38 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 38 insertions(+) - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 0d492877..6ce87ce0 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -30,6 +30,7 @@ - #include "sysemu/kvm.h" - #include "kvm_arm.h" - #include "qapi/visitor.h" -+#include "hw/qdev-properties.h" - - static inline void set_feature(CPUARMState *env, int feature) - { -@@ -455,10 +456,47 @@ static gchar *aarch64_gdb_arch_name(CPUState *cs) - return g_strdup("aarch64"); - } - -+/* Parse "+feature,-feature,feature=foo" CPU feature string -+ */ -+static void arm_cpu_parse_featurestr(const char *typename, char *features, -+ Error **errp) -+{ -+ char *featurestr; -+ char *val; -+ static bool cpu_globals_initialized; -+ -+ if (cpu_globals_initialized) { -+ return; -+ } -+ cpu_globals_initialized = true; -+ -+ featurestr = features ? strtok(features, ",") : NULL; -+ while (featurestr) { -+ val = strchr(featurestr, '='); -+ if (val) { -+ GlobalProperty *prop = g_new0(typeof(*prop), 1); -+ *val = 0; -+ val++; -+ prop->driver = typename; -+ prop->property = g_strdup(featurestr); -+ prop->value = g_strdup(val); -+ qdev_prop_register_global(prop); -+ } else if (featurestr[0] == '+' || featurestr[0] == '-') { -+ warn_report("Ignore %s feature\n", featurestr); -+ } else { -+ error_setg(errp, "Expected key=value format, found %s.", -+ featurestr); -+ return; -+ } -+ featurestr = strtok(NULL, ","); -+ } -+} -+ - static void aarch64_cpu_class_init(ObjectClass *oc, void *data) - { - CPUClass *cc = CPU_CLASS(oc); - -+ cc->parse_features = arm_cpu_parse_featurestr; - cc->cpu_exec_interrupt = arm_cpu_exec_interrupt; - cc->gdb_read_register = aarch64_cpu_gdb_read_register; - cc->gdb_write_register = aarch64_cpu_gdb_write_register; --- -2.19.1 - diff --git a/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch b/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2148e867fe6d83f1c5b25d7c242b17f02dfd472 --- /dev/null +++ b/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch @@ -0,0 +1,144 @@ +From 444de91551c1e141a76bf3dae4cebee9dbd57b49 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 02:48:49 +0100 +Subject: [PATCH] cpus-common: Add common CPU utility for possible vCPUs + +Adds various utility functions which might be required to fetch or check the +state of the possible vCPUs. This also introduces concept of *disabled* vCPUs, +which are part of the *possible* vCPUs but are not part of the *present* vCPU. +This state shall be used during machine init time to check the presence of +vcpus. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + cpu-common.c | 31 +++++++++++++++++++++++++ + include/hw/core/cpu.h | 53 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 84 insertions(+) + +diff --git a/cpu-common.c b/cpu-common.c +index c81fd72d16..d041a351ab 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -24,6 +24,7 @@ + #include "sysemu/cpus.h" + #include "qemu/lockable.h" + #include "trace/trace-root.h" ++#include "hw/boards.h" + + QemuMutex qemu_cpu_list_lock; + static QemuCond exclusive_cond; +@@ -107,6 +108,36 @@ void cpu_list_remove(CPUState *cpu) + cpu_list_generation_id++; + } + ++CPUState *qemu_get_possible_cpu(int index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ assert((index >= 0) && (index < possible_cpus->len)); ++ ++ return CPU(possible_cpus->cpus[index].cpu); ++} ++ ++bool qemu_present_cpu(CPUState *cpu) ++{ ++ return cpu; ++} ++ ++bool qemu_enabled_cpu(CPUState *cpu) ++{ ++ return cpu && !cpu->disabled; ++} ++ ++uint64_t qemu_get_cpu_archid(int cpu_index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ assert((cpu_index >= 0) && (cpu_index < possible_cpus->len)); ++ ++ return possible_cpus->cpus[cpu_index].arch_id; ++} ++ + CPUState *qemu_get_cpu(int index) + { + CPUState *cpu; +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index c0c8320413..c30636a936 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -538,6 +538,17 @@ struct CPUState { + GArray *plugin_mem_cbs; + #endif + ++ /* ++ * Some architectures do not allow *presence* of vCPUs to be changed ++ * after guest has booted using information specified by VMM/firmware ++ * via ACPI MADT at the boot time. Thus to enable vCPU hotplug on these ++ * architectures possible vCPU can have CPUState object in 'disabled' ++ * state or can also not have CPUState object at all. This is possible ++ * when vCPU Hotplug is supported and vCPUs are 'yet-to-be-plugged' in ++ * the QOM or have been hot-unplugged. ++ * By default every CPUState is enabled as of now across all archs. ++ */ ++ bool disabled; + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; +@@ -913,6 +924,48 @@ static inline bool cpu_in_exclusive_context(const CPUState *cpu) + */ + CPUState *qemu_get_cpu(int index); + ++/** ++ * qemu_get_possible_cpu: ++ * @index: The CPUState@cpu_index value of the CPU to obtain. ++ * Input index MUST be in range [0, Max Possible CPUs) ++ * ++ * If CPUState object exists,then it gets a CPU matching ++ * @index in the possible CPU array. ++ * ++ * Returns: The possible CPU or %NULL if CPU does not exist. ++ */ ++CPUState *qemu_get_possible_cpu(int index); ++ ++/** ++ * qemu_present_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is amongst the present possible vcpus. ++ * ++ * Returns: True if it is present possible vCPU else false ++ */ ++bool qemu_present_cpu(CPUState *cpu); ++ ++/** ++ * qemu_enabled_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is enabled. ++ * ++ * Returns: True if it is 'enabled' else false ++ */ ++bool qemu_enabled_cpu(CPUState *cpu); ++ ++/** ++ * qemu_get_cpu_archid: ++ * @cpu_index: possible vCPU for which arch-id needs to be retreived ++ * ++ * Fetches the vCPU arch-id from the present possible vCPUs. ++ * ++ * Returns: arch-id of the possible vCPU ++ */ ++uint64_t qemu_get_cpu_archid(int cpu_index); ++ + /** + * cpu_exists: + * @id: Guest-exposed CPU ID to lookup. +-- +2.27.0 + diff --git a/cris-do-not-leak-struct-cris_disasm_data.patch b/cris-do-not-leak-struct-cris_disasm_data.patch deleted file mode 100644 index fa7623fe1878eca815805e853d64ff9b2d8a88a3..0000000000000000000000000000000000000000 --- a/cris-do-not-leak-struct-cris_disasm_data.patch +++ /dev/null @@ -1,139 +0,0 @@ -From d0586065e67b5df2611f4cf61eb791d48b78ff77 Mon Sep 17 00:00:00 2001 -From: lizhengui -Date: Wed, 9 Sep 2020 14:42:59 +0800 -Subject: [PATCH] cris: do not leak struct cris_disasm_data - -Use a stack-allocated struct to avoid a memory leak. - -Signed-off-by: Paolo Bonzini ---- - disas/cris.c | 65 ++++++++++++++++++++++++++++------------------------ - 1 file changed, 35 insertions(+), 30 deletions(-) - -diff --git a/disas/cris.c b/disas/cris.c -index 2f43c9b2..f3ff44ba 100644 ---- a/disas/cris.c -+++ b/disas/cris.c -@@ -1294,24 +1294,17 @@ static int cris_constraint - /* Parse disassembler options and store state in info. FIXME: For the - time being, we abuse static variables. */ - --static bfd_boolean --cris_parse_disassembler_options (disassemble_info *info, -+static void -+cris_parse_disassembler_options (struct cris_disasm_data *disdata, -+ char *disassembler_options, - enum cris_disass_family distype) - { -- struct cris_disasm_data *disdata; -- -- info->private_data = calloc (1, sizeof (struct cris_disasm_data)); -- disdata = (struct cris_disasm_data *) info->private_data; -- if (disdata == NULL) -- return false; -- - /* Default true. */ - disdata->trace_case -- = (info->disassembler_options == NULL -- || (strcmp (info->disassembler_options, "nocase") != 0)); -+ = (disassembler_options == NULL -+ || (strcmp (disassembler_options, "nocase") != 0)); - - disdata->distype = distype; -- return true; - } - - static const struct cris_spec_reg * -@@ -2736,9 +2729,11 @@ static int - print_insn_cris_with_register_prefix (bfd_vma vma, - disassemble_info *info) - { -- if (info->private_data == NULL -- && !cris_parse_disassembler_options (info, cris_dis_v0_v10)) -- return -1; -+ struct cris_disasm_data disdata; -+ info->private_data = &disdata; -+ cris_parse_disassembler_options (&disdata, info->disassembler_options, -+ cris_dis_v0_v10); -+ - return print_insn_cris_generic (vma, info, true); - } - /* Disassemble, prefixing register names with `$'. CRIS v32. */ -@@ -2747,9 +2742,11 @@ static int - print_insn_crisv32_with_register_prefix (bfd_vma vma, - disassemble_info *info) - { -- if (info->private_data == NULL -- && !cris_parse_disassembler_options (info, cris_dis_v32)) -- return -1; -+ struct cris_disasm_data disdata; -+ info->private_data = &disdata; -+ cris_parse_disassembler_options (&disdata, info->disassembler_options, -+ cris_dis_v32); -+ - return print_insn_cris_generic (vma, info, true); - } - -@@ -2761,9 +2758,11 @@ static int - print_insn_crisv10_v32_with_register_prefix (bfd_vma vma, - disassemble_info *info) - { -- if (info->private_data == NULL -- && !cris_parse_disassembler_options (info, cris_dis_common_v10_v32)) -- return -1; -+ struct cris_disasm_data disdata; -+ info->private_data = &disdata; -+ cris_parse_disassembler_options (&disdata, info->disassembler_options, -+ cris_dis_common_v10_v32); -+ - return print_insn_cris_generic (vma, info, true); - } - -@@ -2773,9 +2772,11 @@ static int - print_insn_cris_without_register_prefix (bfd_vma vma, - disassemble_info *info) - { -- if (info->private_data == NULL -- && !cris_parse_disassembler_options (info, cris_dis_v0_v10)) -- return -1; -+ struct cris_disasm_data disdata; -+ info->private_data = &disdata; -+ cris_parse_disassembler_options (&disdata, info->disassembler_options, -+ cris_dis_v0_v10); -+ - return print_insn_cris_generic (vma, info, false); - } - -@@ -2785,9 +2786,11 @@ static int - print_insn_crisv32_without_register_prefix (bfd_vma vma, - disassemble_info *info) - { -- if (info->private_data == NULL -- && !cris_parse_disassembler_options (info, cris_dis_v32)) -- return -1; -+ struct cris_disasm_data disdata; -+ info->private_data = &disdata; -+ cris_parse_disassembler_options (&disdata, info->disassembler_options, -+ cris_dis_v32); -+ - return print_insn_cris_generic (vma, info, false); - } - -@@ -2798,9 +2801,11 @@ static int - print_insn_crisv10_v32_without_register_prefix (bfd_vma vma, - disassemble_info *info) - { -- if (info->private_data == NULL -- && !cris_parse_disassembler_options (info, cris_dis_common_v10_v32)) -- return -1; -+ struct cris_disasm_data disdata; -+ info->private_data = &disdata; -+ cris_parse_disassembler_options (&disdata, info->disassembler_options, -+ cris_dis_common_v10_v32); -+ - return print_insn_cris_generic (vma, info, false); - } - #endif --- -2.19.1 - diff --git a/crypto-Introduce-SM3-hash-hmac-pbkdf-algorithm.patch b/crypto-Introduce-SM3-hash-hmac-pbkdf-algorithm.patch new file mode 100644 index 0000000000000000000000000000000000000000..5a599c9fe61190c3b5dab9be3d3e4ee9e082c985 --- /dev/null +++ b/crypto-Introduce-SM3-hash-hmac-pbkdf-algorithm.patch @@ -0,0 +1,403 @@ +From 7b7742e137fbf9283cbbfb823fcf2ebe14df3154 Mon Sep 17 00:00:00 2001 +From: gaochuanji +Date: Mon, 19 Aug 2024 10:52:49 +0800 +Subject: [PATCH] crypto: Introduce SM3 hash hmac pbkdf algorithm + +Introduce the SM3 cryptographic hash algorithm (GB/T 32905-2016). + +SM3 (GB/T 32905-2016) is a cryptographic standard issued by the +Organization of State Commercial Cryptography Administration (OSCCA) +as an authorized cryptographic algorithm for use within China. + +Detect the SM3 cryptographic hash algorithm and enable the feature silently +if it is available. + +Signed-off-by: cheliequan +--- + crypto/hash-gcrypt.c | 3 +++ + crypto/hash-nettle.c | 14 ++++++++++++ + crypto/hash.c | 3 +++ + crypto/hmac-gcrypt.c | 3 +++ + crypto/hmac-nettle.c | 11 ++++++++++ + crypto/pbkdf-gcrypt.c | 6 ++++++ + crypto/pbkdf-nettle.c | 13 ++++++++++++ + meson.build | 39 ++++++++++++++++++++++++++++++++++ + qapi/crypto.json | 4 +++- + tests/unit/test-crypto-hash.c | 16 ++++++++++++++ + tests/unit/test-crypto-hmac.c | 8 +++++++ + tests/unit/test-crypto-pbkdf.c | 16 ++++++++++++++ + 12 files changed, 135 insertions(+), 1 deletion(-) + +diff --git a/crypto/hash-gcrypt.c b/crypto/hash-gcrypt.c +index 829e48258d..d3bdfe5633 100644 +--- a/crypto/hash-gcrypt.c ++++ b/crypto/hash-gcrypt.c +@@ -33,6 +33,9 @@ static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_SHA384] = GCRY_MD_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GCRY_MD_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MD_RMD160, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = GCRY_MD_SM3, ++#endif + }; + + gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg) +diff --git a/crypto/hash-nettle.c b/crypto/hash-nettle.c +index 1ca1a41062..0c2f8ce86c 100644 +--- a/crypto/hash-nettle.c ++++ b/crypto/hash-nettle.c +@@ -25,6 +25,9 @@ + #include + #include + #include ++#ifdef CONFIG_CRYPTO_SM3 ++#include ++#endif + + typedef void (*qcrypto_nettle_init)(void *ctx); + typedef void (*qcrypto_nettle_write)(void *ctx, +@@ -42,6 +45,9 @@ union qcrypto_hash_ctx { + struct sha384_ctx sha384; + struct sha512_ctx sha512; + struct ripemd160_ctx ripemd160; ++#ifdef CONFIG_CRYPTO_SM3 ++ struct sm3_ctx sm3; ++#endif + }; + + struct qcrypto_hash_alg { +@@ -92,6 +98,14 @@ struct qcrypto_hash_alg { + .result = (qcrypto_nettle_result)ripemd160_digest, + .len = RIPEMD160_DIGEST_SIZE, + }, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = { ++ .init = (qcrypto_nettle_init)sm3_init, ++ .write = (qcrypto_nettle_write)sm3_update, ++ .result = (qcrypto_nettle_result)sm3_digest, ++ .len = SM3_DIGEST_SIZE, ++ }, ++#endif + }; + + gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg) +diff --git a/crypto/hash.c b/crypto/hash.c +index b0f8228bdc..8f1502ce68 100644 +--- a/crypto/hash.c ++++ b/crypto/hash.c +@@ -30,6 +30,9 @@ static size_t qcrypto_hash_alg_size[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_SHA384] = 48, + [QCRYPTO_HASH_ALG_SHA512] = 64, + [QCRYPTO_HASH_ALG_RIPEMD160] = 20, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = 32, ++#endif + }; + + size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg) +diff --git a/crypto/hmac-gcrypt.c b/crypto/hmac-gcrypt.c +index 0c6f979711..888afb86ed 100644 +--- a/crypto/hmac-gcrypt.c ++++ b/crypto/hmac-gcrypt.c +@@ -26,6 +26,9 @@ static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = { + [QCRYPTO_HASH_ALG_SHA384] = GCRY_MAC_HMAC_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GCRY_MAC_HMAC_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MAC_HMAC_RMD160, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = GCRY_MAC_HMAC_SM3, ++#endif + }; + + typedef struct QCryptoHmacGcrypt QCryptoHmacGcrypt; +diff --git a/crypto/hmac-nettle.c b/crypto/hmac-nettle.c +index 1ad6c4f253..e51e3319ab 100644 +--- a/crypto/hmac-nettle.c ++++ b/crypto/hmac-nettle.c +@@ -38,6 +38,9 @@ struct QCryptoHmacNettle { + struct hmac_sha256_ctx sha256_ctx; /* equals hmac_sha224_ctx */ + struct hmac_sha512_ctx sha512_ctx; /* equals hmac_sha384_ctx */ + struct hmac_ripemd160_ctx ripemd160_ctx; ++#ifdef CONFIG_CRYPTO_SM3 ++ struct hmac_sm3_ctx ctx; ++#endif + } u; + }; + +@@ -89,6 +92,14 @@ struct qcrypto_nettle_hmac_alg { + .digest = (qcrypto_nettle_hmac_digest)hmac_ripemd160_digest, + .len = RIPEMD160_DIGEST_SIZE, + }, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = { ++ .setkey = (qcrypto_nettle_hmac_setkey)hmac_sm3_set_key, ++ .update = (qcrypto_nettle_hmac_update)hmac_sm3_update, ++ .digest = (qcrypto_nettle_hmac_digest)hmac_sm3_digest, ++ .len = SM3_DIGEST_SIZE, ++ }, ++#endif + }; + + bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg) +diff --git a/crypto/pbkdf-gcrypt.c b/crypto/pbkdf-gcrypt.c +index a8d8e64f4d..09b38d0d6e 100644 +--- a/crypto/pbkdf-gcrypt.c ++++ b/crypto/pbkdf-gcrypt.c +@@ -33,6 +33,9 @@ bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash) + case QCRYPTO_HASH_ALG_SHA384: + case QCRYPTO_HASH_ALG_SHA512: + case QCRYPTO_HASH_ALG_RIPEMD160: ++#ifdef CONFIG_CRYPTO_SM3 ++ case QCRYPTO_HASH_ALG_SM3: ++#endif + return true; + default: + return false; +@@ -54,6 +57,9 @@ int qcrypto_pbkdf2(QCryptoHashAlgorithm hash, + [QCRYPTO_HASH_ALG_SHA384] = GCRY_MD_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = GCRY_MD_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MD_RMD160, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = GCRY_MD_SM3, ++#endif + }; + int ret; + +diff --git a/crypto/pbkdf-nettle.c b/crypto/pbkdf-nettle.c +index d6293c25a1..5fea570bd3 100644 +--- a/crypto/pbkdf-nettle.c ++++ b/crypto/pbkdf-nettle.c +@@ -34,6 +34,9 @@ bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash) + case QCRYPTO_HASH_ALG_SHA384: + case QCRYPTO_HASH_ALG_SHA512: + case QCRYPTO_HASH_ALG_RIPEMD160: ++#ifdef CONFIG_CRYPTO_SM3 ++ case QCRYPTO_HASH_ALG_SM3: ++#endif + return true; + default: + return false; +@@ -55,6 +58,9 @@ int qcrypto_pbkdf2(QCryptoHashAlgorithm hash, + struct hmac_sha384_ctx sha384; + struct hmac_sha512_ctx sha512; + struct hmac_ripemd160_ctx ripemd160; ++#ifdef CONFIG_CRYPTO_SM3 ++ struct hmac_sm3_ctx sm3; ++#endif + } ctx; + + if (iterations > UINT_MAX) { +@@ -106,6 +112,13 @@ int qcrypto_pbkdf2(QCryptoHashAlgorithm hash, + PBKDF2(&ctx.ripemd160, hmac_ripemd160_update, hmac_ripemd160_digest, + RIPEMD160_DIGEST_SIZE, iterations, nsalt, salt, nout, out); + break; ++#ifdef CONFIG_CRYPTO_SM3 ++ case QCRYPTO_HASH_ALG_SM3: ++ hmac_sm3_set_key(&ctx.sm3, nkey, key); ++ PBKDF2(&ctx.sm3, hmac_sm3_update, hmac_sm3_digest, ++ SM3_DIGEST_SIZE, iterations, nsalt, salt, nout, out); ++ break; ++#endif + + default: + error_setg_errno(errp, ENOSYS, +diff --git a/meson.build b/meson.build +index 089f45d386..4024f9a4bb 100644 +--- a/meson.build ++++ b/meson.build +@@ -1486,6 +1486,7 @@ gcrypt = not_found + nettle = not_found + hogweed = not_found + crypto_sm4 = not_found ++crypto_sm3 = not_found + xts = 'none' + + if get_option('nettle').enabled() and get_option('gcrypt').enabled() +@@ -1522,6 +1523,17 @@ if not gnutls_crypto.found() + }''', dependencies: gcrypt) + crypto_sm4 = not_found + endif ++ crypto_sm3 = gcrypt ++ # SM3 ALG is available in libgcrypt >= 1.8 ++ if gcrypt.found() and not cc.links(''' ++ #include ++ int main(void) { ++ gcry_md_hd_t handler; ++ gcry_md_open(&handler, GCRY_MD_SM3, 0); ++ return 0; ++ }''', dependencies: gcrypt) ++ crypto_sm3 = not_found ++ endif + endif + if (not get_option('nettle').auto() or have_system) and not gcrypt.found() + nettle = dependency('nettle', version: '>=3.4', +@@ -1542,6 +1554,31 @@ if not gnutls_crypto.found() + }''', dependencies: nettle) + crypto_sm4 = not_found + endif ++ crypto_sm3 = nettle ++ # SM3 ALG is available in nettle >= 3.4 ++ if nettle.found() and not cc.links(''' ++ #include ++ #include ++ int main(void) { ++ struct sm3_ctx ctx; ++ struct hmac_sm3_ctx hmac_ctx; ++ unsigned char data[64] = {0}; ++ unsigned char output[32]; ++ ++ // SM3 hash function test ++ sm3_init(&ctx); ++ sm3_update(&ctx, 64, data); ++ sm3_digest(&ctx, 32, data); ++ ++ // HMAC-SM3 test ++ hmac_sm3_set_key(&hmac_ctx, 32, data); ++ hmac_sm3_update(&hmac_ctx, 64, data); ++ hmac_sm3_digest(&hmac_ctx, 32, output); ++ ++ return 0; ++ }''', dependencies: nettle) ++ crypto_sm3 = not_found ++ endif + endif + endif + +@@ -2229,6 +2266,7 @@ config_host_data.set('CONFIG_TASN1', tasn1.found()) + config_host_data.set('CONFIG_GCRYPT', gcrypt.found()) + config_host_data.set('CONFIG_NETTLE', nettle.found()) + config_host_data.set('CONFIG_CRYPTO_SM4', crypto_sm4.found()) ++config_host_data.set('CONFIG_CRYPTO_SM3', crypto_sm3.found()) + config_host_data.set('CONFIG_HOGWEED', hogweed.found()) + config_host_data.set('CONFIG_QEMU_PRIVATE_XTS', xts == 'private') + config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim) +@@ -4306,6 +4344,7 @@ if nettle.found() + summary_info += {' XTS': xts != 'private'} + endif + summary_info += {'SM4 ALG support': crypto_sm4} ++summary_info += {'SM3 ALG support': crypto_sm3} + summary_info += {'AF_ALG support': have_afalg} + summary_info += {'rng-none': get_option('rng_none')} + summary_info += {'Linux keyring': have_keyring} +diff --git a/qapi/crypto.json b/qapi/crypto.json +index 2f2aeff5fd..af38f0a4bd 100644 +--- a/qapi/crypto.json ++++ b/qapi/crypto.json +@@ -58,11 +58,13 @@ + # + # @ripemd160: RIPEMD-160. (since 2.7) + # ++# @sm3: SM3. (since 8.2.0) ++# + # Since: 2.6 + ## + { 'enum': 'QCryptoHashAlgorithm', + 'prefix': 'QCRYPTO_HASH_ALG', +- 'data': ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', 'ripemd160']} ++ 'data': ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', 'ripemd160', 'sm3']} + + ## + # @QCryptoCipherAlgorithm: +diff --git a/tests/unit/test-crypto-hash.c b/tests/unit/test-crypto-hash.c +index 1f4abb822b..61908e1769 100644 +--- a/tests/unit/test-crypto-hash.c ++++ b/tests/unit/test-crypto-hash.c +@@ -42,6 +42,9 @@ + "63b54e4cb2d2032b393994aa263c0dbb" \ + "e00a9f2fe9ef6037352232a1eec55ee7" + #define OUTPUT_RIPEMD160 "f3d658fad3fdfb2b52c9369cf0d441249ddfa8a0" ++#ifdef CONFIG_CRYPTO_SM3 ++#define OUTPUT_SM3 "d4a97db105b477b84c4f20ec9c31a6c814e2705a0b83a5a89748d75f0ef456a1" ++#endif + + #define OUTPUT_MD5_B64 "Yo0gY3FWMDWrjvYvSSveyQ==" + #define OUTPUT_SHA1_B64 "sudPJnWKOkIeUJzuBFJEt4dTzAI=" +@@ -54,6 +57,10 @@ + "7sVe5w==" + #define OUTPUT_RIPEMD160_B64 "89ZY+tP9+ytSyTac8NRBJJ3fqKA=" + ++#ifdef CONFIG_CRYPTO_SM3 ++#define OUTPUT_SM3_B64 "1Kl9sQW0d7hMTyDsnDGmyBTicFoLg6Wol0jXXw70VqE=" ++#endif ++ + static const char *expected_outputs[] = { + [QCRYPTO_HASH_ALG_MD5] = OUTPUT_MD5, + [QCRYPTO_HASH_ALG_SHA1] = OUTPUT_SHA1, +@@ -62,6 +69,9 @@ static const char *expected_outputs[] = { + [QCRYPTO_HASH_ALG_SHA384] = OUTPUT_SHA384, + [QCRYPTO_HASH_ALG_SHA512] = OUTPUT_SHA512, + [QCRYPTO_HASH_ALG_RIPEMD160] = OUTPUT_RIPEMD160, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = OUTPUT_SM3, ++#endif + }; + static const char *expected_outputs_b64[] = { + [QCRYPTO_HASH_ALG_MD5] = OUTPUT_MD5_B64, +@@ -71,6 +81,9 @@ static const char *expected_outputs_b64[] = { + [QCRYPTO_HASH_ALG_SHA384] = OUTPUT_SHA384_B64, + [QCRYPTO_HASH_ALG_SHA512] = OUTPUT_SHA512_B64, + [QCRYPTO_HASH_ALG_RIPEMD160] = OUTPUT_RIPEMD160_B64, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = OUTPUT_SM3_B64, ++#endif + }; + static const int expected_lens[] = { + [QCRYPTO_HASH_ALG_MD5] = 16, +@@ -80,6 +93,9 @@ static const int expected_lens[] = { + [QCRYPTO_HASH_ALG_SHA384] = 48, + [QCRYPTO_HASH_ALG_SHA512] = 64, + [QCRYPTO_HASH_ALG_RIPEMD160] = 20, ++#ifdef CONFIG_CRYPTO_SM3 ++ [QCRYPTO_HASH_ALG_SM3] = 32, ++#endif + }; + + static const char hex[] = "0123456789abcdef"; +diff --git a/tests/unit/test-crypto-hmac.c b/tests/unit/test-crypto-hmac.c +index 23eb724d94..b1d04e9fcc 100644 +--- a/tests/unit/test-crypto-hmac.c ++++ b/tests/unit/test-crypto-hmac.c +@@ -76,6 +76,14 @@ static QCryptoHmacTestData test_data[] = { + "94964ed4c1155b62b668c241d67279e5" + "8a711676", + }, ++#ifdef CONFIG_CRYPTO_SM3 ++ { ++ .alg = QCRYPTO_HASH_ALG_SM3, ++ .hex_digest = ++ "760e3799332bc913819b930085360ddb" ++ "c05529261313d5b15b75bab4fd7ae91e", ++ }, ++#endif + }; + + static const char hex[] = "0123456789abcdef"; +diff --git a/tests/unit/test-crypto-pbkdf.c b/tests/unit/test-crypto-pbkdf.c +index 43c417f6b4..3d76593c86 100644 +--- a/tests/unit/test-crypto-pbkdf.c ++++ b/tests/unit/test-crypto-pbkdf.c +@@ -326,6 +326,22 @@ static QCryptoPbkdfTestData test_data[] = { + "\xce\xbf\x91\x14\x8b\x5c\x48\x41", + .nout = 32 + }, ++#ifdef CONFIG_CRYPTO_SM3 ++ { ++ .path = "/crypto/pbkdf/nonrfc/sm3/iter2", ++ .hash = QCRYPTO_HASH_ALG_SM3, ++ .iterations = 2, ++ .key = "password", ++ .nkey = 8, ++ .salt = "ATHENA.MIT.EDUraeburn", ++ .nsalt = 21, ++ .out = "\x48\x71\x1b\x58\xa3\xcb\xce\x06" ++ "\xba\xad\x77\xa8\xb5\xb9\xd8\x07" ++ "\x6a\xe2\xb3\x5b\x95\xce\xc8\xce" ++ "\xe7\xb1\xcb\xee\x61\xdf\x04\xea", ++ .nout = 32 ++ }, ++#endif + #if 0 + { + .path = "/crypto/pbkdf/nonrfc/whirlpool/iter1200", +-- +2.41.0.windows.1 + diff --git a/crypto-Introduce-SM4-symmetric-cipher-algorithm.patch b/crypto-Introduce-SM4-symmetric-cipher-algorithm.patch new file mode 100644 index 0000000000000000000000000000000000000000..a7e40b8d574e82042894b4b8e9ad6b7a368ca9c0 --- /dev/null +++ b/crypto-Introduce-SM4-symmetric-cipher-algorithm.patch @@ -0,0 +1,306 @@ +From f402887e0c3e97dcbd6d1929ca9908ec57e2bb1f Mon Sep 17 00:00:00 2001 +From: Hyman Huang +Date: Thu, 7 Dec 2023 23:47:35 +0800 +Subject: [PATCH] crypto: Introduce SM4 symmetric cipher algorithm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce the SM4 cipher algorithms (OSCCA GB/T 32907-2016). + +SM4 (GBT.32907-2016) is a cryptographic standard issued by the +Organization of State Commercial Administration of China (OSCCA) +as an authorized cryptographic algorithms for the use within China. + +Detect the SM4 cipher algorithms and enable the feature silently +if it is available. + +Signed-off-by: Hyman Huang +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Daniel P. Berrangé +Signed-off-by: cheliequan +--- + crypto/block-luks.c | 11 ++++++++ + crypto/cipher-gcrypt.c.inc | 8 ++++++ + crypto/cipher-nettle.c.inc | 49 +++++++++++++++++++++++++++++++++ + crypto/cipher.c | 6 ++++ + meson.build | 26 +++++++++++++++++ + qapi/crypto.json | 5 +++- + tests/unit/test-crypto-cipher.c | 13 +++++++++ + 7 files changed, 117 insertions(+), 1 deletion(-) + +diff --git a/crypto/block-luks.c b/crypto/block-luks.c +index fb01ec38bb..f0813d69b4 100644 +--- a/crypto/block-luks.c ++++ b/crypto/block-luks.c +@@ -95,12 +95,23 @@ qcrypto_block_luks_cipher_size_map_twofish[] = { + { 0, 0 }, + }; + ++#ifdef CONFIG_CRYPTO_SM4 ++static const QCryptoBlockLUKSCipherSizeMap ++qcrypto_block_luks_cipher_size_map_sm4[] = { ++ { 16, QCRYPTO_CIPHER_ALG_SM4}, ++ { 0, 0 }, ++}; ++#endif ++ + static const QCryptoBlockLUKSCipherNameMap + qcrypto_block_luks_cipher_name_map[] = { + { "aes", qcrypto_block_luks_cipher_size_map_aes }, + { "cast5", qcrypto_block_luks_cipher_size_map_cast5 }, + { "serpent", qcrypto_block_luks_cipher_size_map_serpent }, + { "twofish", qcrypto_block_luks_cipher_size_map_twofish }, ++#ifdef CONFIG_CRYPTO_SM4 ++ { "sm4", qcrypto_block_luks_cipher_size_map_sm4}, ++#endif + }; + + QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSKeySlot) != 48); +diff --git a/crypto/cipher-gcrypt.c.inc b/crypto/cipher-gcrypt.c.inc +index a6a0117717..1377cbaf14 100644 +--- a/crypto/cipher-gcrypt.c.inc ++++ b/crypto/cipher-gcrypt.c.inc +@@ -35,6 +35,9 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + case QCRYPTO_CIPHER_ALG_SERPENT_256: + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + case QCRYPTO_CIPHER_ALG_TWOFISH_256: ++#ifdef CONFIG_CRYPTO_SM4 ++ case QCRYPTO_CIPHER_ALG_SM4: ++#endif + break; + default: + return false; +@@ -219,6 +222,11 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + case QCRYPTO_CIPHER_ALG_TWOFISH_256: + gcryalg = GCRY_CIPHER_TWOFISH; + break; ++#ifdef CONFIG_CRYPTO_SM4 ++ case QCRYPTO_CIPHER_ALG_SM4: ++ gcryalg = GCRY_CIPHER_SM4; ++ break; ++#endif + default: + error_setg(errp, "Unsupported cipher algorithm %s", + QCryptoCipherAlgorithm_str(alg)); +diff --git a/crypto/cipher-nettle.c.inc b/crypto/cipher-nettle.c.inc +index 24cc61f87b..42b39e18a2 100644 +--- a/crypto/cipher-nettle.c.inc ++++ b/crypto/cipher-nettle.c.inc +@@ -33,6 +33,9 @@ + #ifndef CONFIG_QEMU_PRIVATE_XTS + #include + #endif ++#ifdef CONFIG_CRYPTO_SM4 ++#include ++#endif + + static inline bool qcrypto_length_check(size_t len, size_t blocksize, + Error **errp) +@@ -426,6 +429,30 @@ DEFINE_ECB_CBC_CTR_XTS(qcrypto_nettle_twofish, + QCryptoNettleTwofish, TWOFISH_BLOCK_SIZE, + twofish_encrypt_native, twofish_decrypt_native) + ++#ifdef CONFIG_CRYPTO_SM4 ++typedef struct QCryptoNettleSm4 { ++ QCryptoCipher base; ++ struct sm4_ctx key[2]; ++} QCryptoNettleSm4; ++ ++static void sm4_encrypt_native(void *ctx, size_t length, ++ uint8_t *dst, const uint8_t *src) ++{ ++ struct sm4_ctx *keys = ctx; ++ sm4_crypt(&keys[0], length, dst, src); ++} ++ ++static void sm4_decrypt_native(void *ctx, size_t length, ++ uint8_t *dst, const uint8_t *src) ++{ ++ struct sm4_ctx *keys = ctx; ++ sm4_crypt(&keys[1], length, dst, src); ++} ++ ++DEFINE_ECB(qcrypto_nettle_sm4, ++ QCryptoNettleSm4, SM4_BLOCK_SIZE, ++ sm4_encrypt_native, sm4_decrypt_native) ++#endif + + bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode) +@@ -443,6 +470,9 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + case QCRYPTO_CIPHER_ALG_TWOFISH_128: + case QCRYPTO_CIPHER_ALG_TWOFISH_192: + case QCRYPTO_CIPHER_ALG_TWOFISH_256: ++#ifdef CONFIG_CRYPTO_SM4 ++ case QCRYPTO_CIPHER_ALG_SM4: ++#endif + break; + default: + return false; +@@ -701,6 +731,25 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + + return &ctx->base; + } ++#ifdef CONFIG_CRYPTO_SM4 ++ case QCRYPTO_CIPHER_ALG_SM4: ++ { ++ QCryptoNettleSm4 *ctx = g_new0(QCryptoNettleSm4, 1); ++ ++ switch (mode) { ++ case QCRYPTO_CIPHER_MODE_ECB: ++ ctx->base.driver = &qcrypto_nettle_sm4_driver_ecb; ++ break; ++ default: ++ goto bad_cipher_mode; ++ } ++ ++ sm4_set_encrypt_key(&ctx->key[0], key); ++ sm4_set_decrypt_key(&ctx->key[1], key); ++ ++ return &ctx->base; ++ } ++#endif + + default: + error_setg(errp, "Unsupported cipher algorithm %s", +diff --git a/crypto/cipher.c b/crypto/cipher.c +index 74b09a5b26..5f512768ea 100644 +--- a/crypto/cipher.c ++++ b/crypto/cipher.c +@@ -38,6 +38,9 @@ static const size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = { + [QCRYPTO_CIPHER_ALG_TWOFISH_128] = 16, + [QCRYPTO_CIPHER_ALG_TWOFISH_192] = 24, + [QCRYPTO_CIPHER_ALG_TWOFISH_256] = 32, ++#ifdef CONFIG_CRYPTO_SM4 ++ [QCRYPTO_CIPHER_ALG_SM4] = 16, ++#endif + }; + + static const size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = { +@@ -53,6 +56,9 @@ static const size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = { + [QCRYPTO_CIPHER_ALG_TWOFISH_128] = 16, + [QCRYPTO_CIPHER_ALG_TWOFISH_192] = 16, + [QCRYPTO_CIPHER_ALG_TWOFISH_256] = 16, ++#ifdef CONFIG_CRYPTO_SM4 ++ [QCRYPTO_CIPHER_ALG_SM4] = 16, ++#endif + }; + + static const bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = { +diff --git a/meson.build b/meson.build +index 0c62b4156d..089f45d386 100644 +--- a/meson.build ++++ b/meson.build +@@ -1485,6 +1485,7 @@ endif + gcrypt = not_found + nettle = not_found + hogweed = not_found ++crypto_sm4 = not_found + xts = 'none' + + if get_option('nettle').enabled() and get_option('gcrypt').enabled() +@@ -1510,6 +1511,17 @@ if not gnutls_crypto.found() + cc.find_library('gpg-error', required: true)], + version: gcrypt.version()) + endif ++ crypto_sm4 = gcrypt ++ # SM4 ALG is available in libgcrypt >= 1.9 ++ if gcrypt.found() and not cc.links(''' ++ #include ++ int main(void) { ++ gcry_cipher_hd_t handler; ++ gcry_cipher_open(&handler, GCRY_CIPHER_SM4, GCRY_CIPHER_MODE_ECB, 0); ++ return 0; ++ }''', dependencies: gcrypt) ++ crypto_sm4 = not_found ++ endif + endif + if (not get_option('nettle').auto() or have_system) and not gcrypt.found() + nettle = dependency('nettle', version: '>=3.4', +@@ -1518,6 +1530,18 @@ if not gnutls_crypto.found() + if nettle.found() and not cc.has_header('nettle/xts.h', dependencies: nettle) + xts = 'private' + endif ++ crypto_sm4 = nettle ++ # SM4 ALG is available in nettle >= 3.9 ++ if nettle.found() and not cc.links(''' ++ #include ++ int main(void) { ++ struct sm4_ctx ctx; ++ unsigned char key[16] = {0}; ++ sm4_set_encrypt_key(&ctx, key); ++ return 0; ++ }''', dependencies: nettle) ++ crypto_sm4 = not_found ++ endif + endif + endif + +@@ -2204,6 +2228,7 @@ config_host_data.set('CONFIG_GNUTLS_CRYPTO', gnutls_crypto.found()) + config_host_data.set('CONFIG_TASN1', tasn1.found()) + config_host_data.set('CONFIG_GCRYPT', gcrypt.found()) + config_host_data.set('CONFIG_NETTLE', nettle.found()) ++config_host_data.set('CONFIG_CRYPTO_SM4', crypto_sm4.found()) + config_host_data.set('CONFIG_HOGWEED', hogweed.found()) + config_host_data.set('CONFIG_QEMU_PRIVATE_XTS', xts == 'private') + config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim) +@@ -4280,6 +4305,7 @@ summary_info += {'nettle': nettle} + if nettle.found() + summary_info += {' XTS': xts != 'private'} + endif ++summary_info += {'SM4 ALG support': crypto_sm4} + summary_info += {'AF_ALG support': have_afalg} + summary_info += {'rng-none': get_option('rng_none')} + summary_info += {'Linux keyring': have_keyring} +diff --git a/qapi/crypto.json b/qapi/crypto.json +index fd3d46ebd1..2f2aeff5fd 100644 +--- a/qapi/crypto.json ++++ b/qapi/crypto.json +@@ -94,6 +94,8 @@ + # + # @twofish-256: Twofish with 256 bit / 32 byte keys + # ++# @sm4: SM4 with 128 bit / 16 byte keys (since 9.0) ++# + # Since: 2.6 + ## + { 'enum': 'QCryptoCipherAlgorithm', +@@ -102,7 +104,8 @@ + 'des', '3des', + 'cast5-128', + 'serpent-128', 'serpent-192', 'serpent-256', +- 'twofish-128', 'twofish-192', 'twofish-256']} ++ 'twofish-128', 'twofish-192', 'twofish-256', ++ 'sm4']} + + ## + # @QCryptoCipherMode: +diff --git a/tests/unit/test-crypto-cipher.c b/tests/unit/test-crypto-cipher.c +index d9d9d078ff..11ab1a54fc 100644 +--- a/tests/unit/test-crypto-cipher.c ++++ b/tests/unit/test-crypto-cipher.c +@@ -382,6 +382,19 @@ static QCryptoCipherTestData test_data[] = { + .plaintext = "90afe91bb288544f2c32dc239b2635e6", + .ciphertext = "6cb4561c40bf0a9705931cb6d408e7fa", + }, ++#ifdef CONFIG_CRYPTO_SM4 ++ { ++ /* SM4, GB/T 32907-2016, Appendix A.1 */ ++ .path = "/crypto/cipher/sm4", ++ .alg = QCRYPTO_CIPHER_ALG_SM4, ++ .mode = QCRYPTO_CIPHER_MODE_ECB, ++ .key = "0123456789abcdeffedcba9876543210", ++ .plaintext = ++ "0123456789abcdeffedcba9876543210", ++ .ciphertext = ++ "681edf34d206965e86b3e94f536e4246", ++ }, ++#endif + { + /* #1 32 byte key, 32 byte PTX */ + .path = "/crypto/cipher/aes-xts-128-1", +-- +2.41.0.windows.1 + diff --git a/crypto-add-support-for-gcrypt-s-native-XTS-impl.patch b/crypto-add-support-for-gcrypt-s-native-XTS-impl.patch deleted file mode 100644 index d204f017b830d15bb9609570e39fb11e34676203..0000000000000000000000000000000000000000 --- a/crypto-add-support-for-gcrypt-s-native-XTS-impl.patch +++ /dev/null @@ -1,346 +0,0 @@ -From 84352558eec97cfb0e4517fbb53d75d9f15cbcf9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Mon, 14 Oct 2019 17:28:27 +0100 -Subject: [PATCH] crypto: add support for gcrypt's native XTS impl -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Libgcrypt 1.8.0 added support for the XTS mode. Use this because long -term we wish to delete QEMU's XTS impl to avoid carrying private crypto -algorithm impls. - -As an added benefit, using this improves performance from 531 MB/sec to -670 MB/sec, since we are avoiding several layers of function call -indirection. - -This is even more noticable with the gcrypt builds in Fedora or RHEL-8 -which have a non-upstream patch for FIPS mode which does mutex locking. -This is catastrophic for encryption performance with small block sizes, -meaning this patch improves encryption from 240 MB/sec to 670 MB/sec. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefano Garzarella -Signed-off-by: Daniel P. Berrangé ---- - configure | 22 ++++++++++ - crypto/Makefile.objs | 2 +- - crypto/cipher-gcrypt.c | 97 ++++++++++++++++++++++++++++-------------- - tests/Makefile.include | 2 +- - 4 files changed, 88 insertions(+), 35 deletions(-) - -diff --git a/configure b/configure -index 5dcaac3b95..a88cdd5109 100755 ---- a/configure -+++ b/configure -@@ -476,6 +476,8 @@ nettle="" - nettle_xts="no" - gcrypt="" - gcrypt_hmac="no" -+gcrypt_xts="no" -+qemu_private_xts="yes" - auth_pam="" - vte="" - virglrenderer="" -@@ -2974,6 +2976,18 @@ EOF - if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then - gcrypt_hmac=yes - fi -+ cat > $TMPC << EOF -+#include -+int main(void) { -+ gcry_cipher_hd_t handle; -+ gcry_cipher_open(&handle, GCRY_CIPHER_AES, GCRY_CIPHER_MODE_XTS, 0); -+ return 0; -+} -+EOF -+ if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then -+ gcrypt_xts=yes -+ qemu_private_xts=no -+ fi - elif test "$gcrypt" = "yes"; then - feature_not_found "gcrypt" "Install gcrypt devel >= 1.5.0" - else -@@ -6404,6 +6418,11 @@ echo "VTE support $vte $(echo_version $vte $vteversion)" - echo "TLS priority $tls_priority" - echo "GNUTLS support $gnutls" - echo "libgcrypt $gcrypt" -+if test "$gcrypt" = "yes" -+then -+ echo " hmac $gcrypt_hmac" -+ echo " XTS $gcrypt_xts" -+fi - echo "nettle $nettle $(echo_version $nettle $nettle_version)" - if test "$nettle" = "yes" - then -@@ -6889,6 +6908,9 @@ if test "$nettle" = "yes" ; then - echo "CONFIG_NETTLE=y" >> $config_host_mak - echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak - fi -+if test "$qemu_private_xts" = "yes" ; then -+ echo "CONFIG_QEMU_PRIVATE_XTS=y" >> $config_host_mak -+fi - if test "$tasn1" = "yes" ; then - echo "CONFIG_TASN1=y" >> $config_host_mak - fi -diff --git a/crypto/Makefile.objs b/crypto/Makefile.objs -index 7fe2fa9da2..cdb01f9de9 100644 ---- a/crypto/Makefile.objs -+++ b/crypto/Makefile.objs -@@ -31,7 +31,7 @@ crypto-obj-y += ivgen-essiv.o - crypto-obj-y += ivgen-plain.o - crypto-obj-y += ivgen-plain64.o - crypto-obj-y += afsplit.o --crypto-obj-y += xts.o -+crypto-obj-$(CONFIG_QEMU_PRIVATE_XTS) += xts.o - crypto-obj-y += block.o - crypto-obj-y += block-qcow.o - crypto-obj-y += block-luks.o -diff --git a/crypto/cipher-gcrypt.c b/crypto/cipher-gcrypt.c -index 5cece9b244..2864099527 100644 ---- a/crypto/cipher-gcrypt.c -+++ b/crypto/cipher-gcrypt.c -@@ -19,7 +19,9 @@ - */ - - #include "qemu/osdep.h" -+#ifdef CONFIG_QEMU_PRIVATE_XTS - #include "crypto/xts.h" -+#endif - #include "cipherpriv.h" - - #include -@@ -59,10 +61,12 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, - typedef struct QCryptoCipherGcrypt QCryptoCipherGcrypt; - struct QCryptoCipherGcrypt { - gcry_cipher_hd_t handle; -- gcry_cipher_hd_t tweakhandle; - size_t blocksize; -+#ifdef CONFIG_QEMU_PRIVATE_XTS -+ gcry_cipher_hd_t tweakhandle; - /* Initialization vector or Counter */ - uint8_t *iv; -+#endif - }; - - static void -@@ -74,10 +78,12 @@ qcrypto_gcrypt_cipher_free_ctx(QCryptoCipherGcrypt *ctx, - } - - gcry_cipher_close(ctx->handle); -+#ifdef CONFIG_QEMU_PRIVATE_XTS - if (mode == QCRYPTO_CIPHER_MODE_XTS) { - gcry_cipher_close(ctx->tweakhandle); - } - g_free(ctx->iv); -+#endif - g_free(ctx); - } - -@@ -94,8 +100,14 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, - - switch (mode) { - case QCRYPTO_CIPHER_MODE_ECB: -+ gcrymode = GCRY_CIPHER_MODE_ECB; -+ break; - case QCRYPTO_CIPHER_MODE_XTS: -+#ifdef CONFIG_QEMU_PRIVATE_XTS - gcrymode = GCRY_CIPHER_MODE_ECB; -+#else -+ gcrymode = GCRY_CIPHER_MODE_XTS; -+#endif - break; - case QCRYPTO_CIPHER_MODE_CBC: - gcrymode = GCRY_CIPHER_MODE_CBC; -@@ -172,6 +184,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, - gcry_strerror(err)); - goto error; - } -+#ifdef CONFIG_QEMU_PRIVATE_XTS - if (mode == QCRYPTO_CIPHER_MODE_XTS) { - err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0); - if (err != 0) { -@@ -180,6 +193,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, - goto error; - } - } -+#endif - - if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) { - /* We're using standard DES cipher from gcrypt, so we need -@@ -191,6 +205,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, - g_free(rfbkey); - ctx->blocksize = 8; - } else { -+#ifdef CONFIG_QEMU_PRIVATE_XTS - if (mode == QCRYPTO_CIPHER_MODE_XTS) { - nkey /= 2; - err = gcry_cipher_setkey(ctx->handle, key, nkey); -@@ -201,8 +216,11 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, - } - err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey); - } else { -+#endif - err = gcry_cipher_setkey(ctx->handle, key, nkey); -+#ifdef CONFIG_QEMU_PRIVATE_XTS - } -+#endif - if (err != 0) { - error_setg(errp, "Cannot set key: %s", - gcry_strerror(err)); -@@ -228,6 +246,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, - } - } - -+#ifdef CONFIG_QEMU_PRIVATE_XTS - if (mode == QCRYPTO_CIPHER_MODE_XTS) { - if (ctx->blocksize != XTS_BLOCK_SIZE) { - error_setg(errp, -@@ -237,6 +256,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, - } - ctx->iv = g_new0(uint8_t, ctx->blocksize); - } -+#endif - - return ctx; - -@@ -253,6 +273,7 @@ qcrypto_gcrypt_cipher_ctx_free(QCryptoCipher *cipher) - } - - -+#ifdef CONFIG_QEMU_PRIVATE_XTS - static void qcrypto_gcrypt_xts_encrypt(const void *ctx, - size_t length, - uint8_t *dst, -@@ -272,6 +293,7 @@ static void qcrypto_gcrypt_xts_decrypt(const void *ctx, - err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length); - g_assert(err == 0); - } -+#endif - - static int - qcrypto_gcrypt_cipher_encrypt(QCryptoCipher *cipher, -@@ -289,20 +311,23 @@ qcrypto_gcrypt_cipher_encrypt(QCryptoCipher *cipher, - return -1; - } - -+#ifdef CONFIG_QEMU_PRIVATE_XTS - if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) { - xts_encrypt(ctx->handle, ctx->tweakhandle, - qcrypto_gcrypt_xts_encrypt, - qcrypto_gcrypt_xts_decrypt, - ctx->iv, len, out, in); -- } else { -- err = gcry_cipher_encrypt(ctx->handle, -- out, len, -- in, len); -- if (err != 0) { -- error_setg(errp, "Cannot encrypt data: %s", -- gcry_strerror(err)); -- return -1; -- } -+ return 0; -+ } -+#endif -+ -+ err = gcry_cipher_encrypt(ctx->handle, -+ out, len, -+ in, len); -+ if (err != 0) { -+ error_setg(errp, "Cannot encrypt data: %s", -+ gcry_strerror(err)); -+ return -1; - } - - return 0; -@@ -325,20 +350,23 @@ qcrypto_gcrypt_cipher_decrypt(QCryptoCipher *cipher, - return -1; - } - -+#ifdef CONFIG_QEMU_PRIVATE_XTS - if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) { - xts_decrypt(ctx->handle, ctx->tweakhandle, - qcrypto_gcrypt_xts_encrypt, - qcrypto_gcrypt_xts_decrypt, - ctx->iv, len, out, in); -- } else { -- err = gcry_cipher_decrypt(ctx->handle, -- out, len, -- in, len); -- if (err != 0) { -- error_setg(errp, "Cannot decrypt data: %s", -- gcry_strerror(err)); -- return -1; -- } -+ return 0; -+ } -+#endif -+ -+ err = gcry_cipher_decrypt(ctx->handle, -+ out, len, -+ in, len); -+ if (err != 0) { -+ error_setg(errp, "Cannot decrypt data: %s", -+ gcry_strerror(err)); -+ return -1; - } - - return 0; -@@ -358,24 +386,27 @@ qcrypto_gcrypt_cipher_setiv(QCryptoCipher *cipher, - return -1; - } - -+#ifdef CONFIG_QEMU_PRIVATE_XTS - if (ctx->iv) { - memcpy(ctx->iv, iv, niv); -- } else { -- if (cipher->mode == QCRYPTO_CIPHER_MODE_CTR) { -- err = gcry_cipher_setctr(ctx->handle, iv, niv); -- if (err != 0) { -- error_setg(errp, "Cannot set Counter: %s", -+ return 0; -+ } -+#endif -+ -+ if (cipher->mode == QCRYPTO_CIPHER_MODE_CTR) { -+ err = gcry_cipher_setctr(ctx->handle, iv, niv); -+ if (err != 0) { -+ error_setg(errp, "Cannot set Counter: %s", - gcry_strerror(err)); -- return -1; -- } -- } else { -- gcry_cipher_reset(ctx->handle); -- err = gcry_cipher_setiv(ctx->handle, iv, niv); -- if (err != 0) { -- error_setg(errp, "Cannot set IV: %s", -+ return -1; -+ } -+ } else { -+ gcry_cipher_reset(ctx->handle); -+ err = gcry_cipher_setiv(ctx->handle, iv, niv); -+ if (err != 0) { -+ error_setg(errp, "Cannot set IV: %s", - gcry_strerror(err)); -- return -1; -- } -+ return -1; - } - } - -diff --git a/tests/Makefile.include b/tests/Makefile.include -index d6de4e1042..3be60ab999 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -132,7 +132,7 @@ check-unit-y += tests/test-base64$(EXESUF) - check-unit-$(call land,$(CONFIG_BLOCK),$(if $(CONFIG_NETTLE),y,$(CONFIG_GCRYPT))) += tests/test-crypto-pbkdf$(EXESUF) - check-unit-$(CONFIG_BLOCK) += tests/test-crypto-ivgen$(EXESUF) - check-unit-$(CONFIG_BLOCK) += tests/test-crypto-afsplit$(EXESUF) --check-unit-$(CONFIG_BLOCK) += tests/test-crypto-xts$(EXESUF) -+check-unit-$(if $(CONFIG_BLOCK),$(CONFIG_QEMU_PRIVATE_XTS)) += tests/test-crypto-xts$(EXESUF) - check-unit-$(CONFIG_BLOCK) += tests/test-crypto-block$(EXESUF) - check-unit-y += tests/test-logging$(EXESUF) - check-unit-$(call land,$(CONFIG_BLOCK),$(CONFIG_REPLICATION)) += tests/test-replication$(EXESUF) --- -2.27.0 - diff --git a/crypto-add-support-for-nettle-s-native-XTS-impl.patch b/crypto-add-support-for-nettle-s-native-XTS-impl.patch deleted file mode 100644 index 5aed7d626edf019c94c64939f35357274e39136a..0000000000000000000000000000000000000000 --- a/crypto-add-support-for-nettle-s-native-XTS-impl.patch +++ /dev/null @@ -1,126 +0,0 @@ -From c4db6fcb2c45b800cd46e088f8265ccc0631b6fc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Mon, 14 Oct 2019 17:28:27 +0100 -Subject: [PATCH] crypto: add support for nettle's native XTS impl -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Nettle 3.5.0 will add support for the XTS mode. Use this because long -term we wish to delete QEMU's XTS impl to avoid carrying private crypto -algorithm impls. - -Unfortunately this degrades nettle performance from 612 MB/s to 568 MB/s -as nettle's XTS impl isn't so well optimized yet. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefano Garzarella -Signed-off-by: Daniel P. Berrangé ---- - configure | 18 ++++++++++++++++++ - crypto/cipher-nettle.c | 18 ++++++++++++++++++ - 2 files changed, 36 insertions(+) - -diff --git a/configure b/configure -index 577533e9ed..5dcaac3b95 100755 ---- a/configure -+++ b/configure -@@ -473,6 +473,7 @@ gtk_gl="no" - tls_priority="NORMAL" - gnutls="" - nettle="" -+nettle_xts="no" - gcrypt="" - gcrypt_hmac="no" - auth_pam="" -@@ -2918,6 +2919,19 @@ if test "$nettle" != "no"; then - pass="yes" - fi - fi -+ if test "$pass" = "yes" -+ then -+ cat > $TMPC << EOF -+#include -+int main(void) { -+ return 0; -+} -+EOF -+ if compile_prog "$nettle_cflags" "$nettle_libs" ; then -+ nettle_xts=yes -+ qemu_private_xts=no -+ fi -+ fi - if test "$pass" = "no" && test "$nettle" = "yes"; then - feature_not_found "nettle" "Install nettle devel >= 2.7.1" - else -@@ -6391,6 +6405,10 @@ echo "TLS priority $tls_priority" - echo "GNUTLS support $gnutls" - echo "libgcrypt $gcrypt" - echo "nettle $nettle $(echo_version $nettle $nettle_version)" -+if test "$nettle" = "yes" -+then -+ echo " XTS $nettle_xts" -+fi - echo "libtasn1 $tasn1" - echo "PAM $auth_pam" - echo "iconv support $iconv" -diff --git a/crypto/cipher-nettle.c b/crypto/cipher-nettle.c -index d7411bb8ff..7e9a4cc199 100644 ---- a/crypto/cipher-nettle.c -+++ b/crypto/cipher-nettle.c -@@ -19,7 +19,9 @@ - */ - - #include "qemu/osdep.h" -+#ifdef CONFIG_QEMU_PRIVATE_XTS - #include "crypto/xts.h" -+#endif - #include "cipherpriv.h" - - #include -@@ -30,6 +32,9 @@ - #include - #include - #include -+#ifndef CONFIG_QEMU_PRIVATE_XTS -+#include -+#endif - - typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx, - size_t length, -@@ -626,9 +631,15 @@ qcrypto_nettle_cipher_encrypt(QCryptoCipher *cipher, - break; - - case QCRYPTO_CIPHER_MODE_XTS: -+#ifdef CONFIG_QEMU_PRIVATE_XTS - xts_encrypt(ctx->ctx, ctx->ctx_tweak, - ctx->alg_encrypt_wrapper, ctx->alg_encrypt_wrapper, - ctx->iv, len, out, in); -+#else -+ xts_encrypt_message(ctx->ctx, ctx->ctx_tweak, -+ ctx->alg_encrypt_native, -+ ctx->iv, len, out, in); -+#endif - break; - - case QCRYPTO_CIPHER_MODE_CTR: -@@ -673,9 +684,16 @@ qcrypto_nettle_cipher_decrypt(QCryptoCipher *cipher, - break; - - case QCRYPTO_CIPHER_MODE_XTS: -+#ifdef CONFIG_QEMU_PRIVATE_XTS - xts_decrypt(ctx->ctx, ctx->ctx_tweak, - ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper, - ctx->iv, len, out, in); -+#else -+ xts_decrypt_message(ctx->ctx, ctx->ctx_tweak, -+ ctx->alg_decrypt_native, -+ ctx->alg_encrypt_native, -+ ctx->iv, len, out, in); -+#endif - break; - case QCRYPTO_CIPHER_MODE_CTR: - ctr_crypt(ctx->ctx, ctx->alg_encrypt_native, --- -2.27.0 - diff --git a/crypto-avoid-leak-of-ctx-when-bad-cipher-mode-is-giv.patch b/crypto-avoid-leak-of-ctx-when-bad-cipher-mode-is-giv.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ef683c316e4f829c1e246d76c17c2984516b189 --- /dev/null +++ b/crypto-avoid-leak-of-ctx-when-bad-cipher-mode-is-giv.patch @@ -0,0 +1,48 @@ +From a0c5ce95e94a4621b12262423bfa021accb07625 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Mon, 16 Sep 2024 17:22:02 +0800 +Subject: [PATCH] =?UTF-8?q?crypto:=20avoid=20leak=20of=20ctx=20when=20bad?= + =?UTF-8?q?=20cipher=20mode=20is=20given=20Fixes:=20Coverity=20CID=2015468?= + =?UTF-8?q?84=20cherry=20picked=20from=20586ac2c67d707c2588766c5195d94fa55?= + =?UTF-8?q?3cc25af=20Reviewed-by:=20Peter=20Maydell=20=20Reviewed-by:=20Philippe=20Mathieu-Daud=C3=A9=20=20Signed-off-by:=20Daniel=20P.=20Berrang=C3=A9=20=20Signed-off-by:=20dinglimin=20?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +--- + crypto/cipher-nettle.c.inc | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/crypto/cipher-nettle.c.inc b/crypto/cipher-nettle.c.inc +index 42b39e18a2..766de036ba 100644 +--- a/crypto/cipher-nettle.c.inc ++++ b/crypto/cipher-nettle.c.inc +@@ -734,16 +734,19 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + #ifdef CONFIG_CRYPTO_SM4 + case QCRYPTO_CIPHER_ALG_SM4: + { +- QCryptoNettleSm4 *ctx = g_new0(QCryptoNettleSm4, 1); ++ QCryptoNettleSm4 *ctx; ++ const QCryptoCipherDriver *drv; + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: +- ctx->base.driver = &qcrypto_nettle_sm4_driver_ecb; ++ drv = &qcrypto_nettle_sm4_driver_ecb; + break; + default: + goto bad_cipher_mode; + } + ++ ctx = g_new0(QCryptoNettleSm4, 1); ++ ctx->base.driver = drv; + sm4_set_encrypt_key(&ctx->key[0], key); + sm4_set_decrypt_key(&ctx->key[1], key); + +-- +2.41.0.windows.1 + diff --git a/crypto-drop-gnutls-debug-logging-support.patch b/crypto-drop-gnutls-debug-logging-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..d097bfb6f3c69cd2adc189023a3e4e2dba0ee537 --- /dev/null +++ b/crypto-drop-gnutls-debug-logging-support.patch @@ -0,0 +1,57 @@ +From c64bd463b120056ff1e6c32e48fa24b6afd17f23 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 12 Oct 2024 13:47:25 +0800 +Subject: [PATCH] crypto: drop gnutls debug logging support MIME-Version: 1.0 + Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +GNUTLS already supports dynamically enabling its logging at runtime by +setting the env var 'GNUTLS_DEBUG_LEVEL=10', so there is no need to +re-invent this logic in QEMU in a way that requires a re-compile. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Daniel P. Berrangé +Signed-off-by: dinglimin +--- + crypto/init.c | 15 ++++----------- + 1 file changed, 4 insertions(+), 11 deletions(-) + +diff --git a/crypto/init.c b/crypto/init.c +index fb7f1bff10..674d237fa9 100644 +--- a/crypto/init.c ++++ b/crypto/init.c +@@ -34,14 +34,11 @@ + + #include "crypto/random.h" + +-/* #define DEBUG_GNUTLS */ +-#ifdef DEBUG_GNUTLS +-static void qcrypto_gnutls_log(int level, const char *str) +-{ +- fprintf(stderr, "%d: %s", level, str); +-} +-#endif + ++/* ++ * To debug GNUTLS see env vars listed in ++ * https://gnutls.org/manual/html_node/Debugging-and-auditing.html ++ */ + int qcrypto_init(Error **errp) + { + #ifdef CONFIG_GNUTLS +@@ -53,10 +50,6 @@ int qcrypto_init(Error **errp) + gnutls_strerror(ret)); + return -1; + } +-#ifdef DEBUG_GNUTLS +- gnutls_global_set_log_level(10); +- gnutls_global_set_log_function(qcrypto_gnutls_log); +-#endif + #endif + + #ifdef CONFIG_GCRYPT +-- +2.41.0.windows.1 + diff --git a/crypto-factor-out-conversion-of-QAPI-to-gcrypt-const.patch b/crypto-factor-out-conversion-of-QAPI-to-gcrypt-const.patch new file mode 100644 index 0000000000000000000000000000000000000000..ebeeb59312e1df711f0070c33b4dc1dbf0d0dbcc --- /dev/null +++ b/crypto-factor-out-conversion-of-QAPI-to-gcrypt-const.patch @@ -0,0 +1,168 @@ +From e1aaa51fc2de072871cce45dd165e2cb38515978 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 12 Oct 2024 14:00:08 +0800 +Subject: [PATCH] crypto: factor out conversion of QAPI to gcrypt constants + MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 + Content-Transfer-Encoding: 8bit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The conversion of cipher mode will shortly be required in more +than one place. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +Signed-off-by: dinglimin +--- + crypto/cipher-gcrypt.c.inc | 116 +++++++++++++++++++------------------ + 1 file changed, 60 insertions(+), 56 deletions(-) + +diff --git a/crypto/cipher-gcrypt.c.inc b/crypto/cipher-gcrypt.c.inc +index 1377cbaf14..6b82280f90 100644 +--- a/crypto/cipher-gcrypt.c.inc ++++ b/crypto/cipher-gcrypt.c.inc +@@ -20,6 +20,56 @@ + + #include + ++static int qcrypto_cipher_alg_to_gcry_alg(QCryptoCipherAlgorithm alg) ++{ ++ switch (alg) { ++ case QCRYPTO_CIPHER_ALG_DES: ++ return GCRY_CIPHER_DES; ++ case QCRYPTO_CIPHER_ALG_3DES: ++ return GCRY_CIPHER_3DES; ++ case QCRYPTO_CIPHER_ALG_AES_128: ++ return GCRY_CIPHER_AES128; ++ case QCRYPTO_CIPHER_ALG_AES_192: ++ return GCRY_CIPHER_AES192; ++ case QCRYPTO_CIPHER_ALG_AES_256: ++ return GCRY_CIPHER_AES256; ++ case QCRYPTO_CIPHER_ALG_CAST5_128: ++ return GCRY_CIPHER_CAST5; ++ case QCRYPTO_CIPHER_ALG_SERPENT_128: ++ return GCRY_CIPHER_SERPENT128; ++ case QCRYPTO_CIPHER_ALG_SERPENT_192: ++ return GCRY_CIPHER_SERPENT192; ++ case QCRYPTO_CIPHER_ALG_SERPENT_256: ++ return GCRY_CIPHER_SERPENT256; ++ case QCRYPTO_CIPHER_ALG_TWOFISH_128: ++ return GCRY_CIPHER_TWOFISH128; ++ case QCRYPTO_CIPHER_ALG_TWOFISH_256: ++ return GCRY_CIPHER_TWOFISH; ++#ifdef CONFIG_CRYPTO_SM4 ++ case QCRYPTO_CIPHER_ALG_SM4: ++ return GCRY_CIPHER_SM4; ++#endif ++ default: ++ return GCRY_CIPHER_NONE; ++ } ++} ++ ++static int qcrypto_cipher_mode_to_gcry_mode(QCryptoCipherMode mode) ++{ ++ switch (mode) { ++ case QCRYPTO_CIPHER_MODE_ECB: ++ return GCRY_CIPHER_MODE_ECB; ++ case QCRYPTO_CIPHER_MODE_XTS: ++ return GCRY_CIPHER_MODE_XTS; ++ case QCRYPTO_CIPHER_MODE_CBC: ++ return GCRY_CIPHER_MODE_CBC; ++ case QCRYPTO_CIPHER_MODE_CTR: ++ return GCRY_CIPHER_MODE_CTR; ++ default: ++ return GCRY_CIPHER_MODE_NONE; ++ } ++} ++ + bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode) + { +@@ -188,72 +238,26 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + return NULL; + } + +- switch (alg) { +- case QCRYPTO_CIPHER_ALG_DES: +- gcryalg = GCRY_CIPHER_DES; +- break; +- case QCRYPTO_CIPHER_ALG_3DES: +- gcryalg = GCRY_CIPHER_3DES; +- break; +- case QCRYPTO_CIPHER_ALG_AES_128: +- gcryalg = GCRY_CIPHER_AES128; +- break; +- case QCRYPTO_CIPHER_ALG_AES_192: +- gcryalg = GCRY_CIPHER_AES192; +- break; +- case QCRYPTO_CIPHER_ALG_AES_256: +- gcryalg = GCRY_CIPHER_AES256; +- break; +- case QCRYPTO_CIPHER_ALG_CAST5_128: +- gcryalg = GCRY_CIPHER_CAST5; +- break; +- case QCRYPTO_CIPHER_ALG_SERPENT_128: +- gcryalg = GCRY_CIPHER_SERPENT128; +- break; +- case QCRYPTO_CIPHER_ALG_SERPENT_192: +- gcryalg = GCRY_CIPHER_SERPENT192; +- break; +- case QCRYPTO_CIPHER_ALG_SERPENT_256: +- gcryalg = GCRY_CIPHER_SERPENT256; +- break; +- case QCRYPTO_CIPHER_ALG_TWOFISH_128: +- gcryalg = GCRY_CIPHER_TWOFISH128; +- break; +- case QCRYPTO_CIPHER_ALG_TWOFISH_256: +- gcryalg = GCRY_CIPHER_TWOFISH; +- break; +-#ifdef CONFIG_CRYPTO_SM4 +- case QCRYPTO_CIPHER_ALG_SM4: +- gcryalg = GCRY_CIPHER_SM4; +- break; +-#endif +- default: ++ gcryalg = qcrypto_cipher_alg_to_gcry_alg(alg); ++ if (gcryalg == GCRY_CIPHER_NONE) { + error_setg(errp, "Unsupported cipher algorithm %s", + QCryptoCipherAlgorithm_str(alg)); + return NULL; + } + +- drv = &qcrypto_gcrypt_driver; +- switch (mode) { +- case QCRYPTO_CIPHER_MODE_ECB: +- gcrymode = GCRY_CIPHER_MODE_ECB; +- break; +- case QCRYPTO_CIPHER_MODE_XTS: +- gcrymode = GCRY_CIPHER_MODE_XTS; +- break; +- case QCRYPTO_CIPHER_MODE_CBC: +- gcrymode = GCRY_CIPHER_MODE_CBC; +- break; +- case QCRYPTO_CIPHER_MODE_CTR: +- drv = &qcrypto_gcrypt_ctr_driver; +- gcrymode = GCRY_CIPHER_MODE_CTR; +- break; +- default: ++ gcrymode = qcrypto_cipher_mode_to_gcry_mode(mode); ++ if (gcrymode == GCRY_CIPHER_MODE_NONE) { + error_setg(errp, "Unsupported cipher mode %s", + QCryptoCipherMode_str(mode)); + return NULL; + } + ++ if (mode == QCRYPTO_CIPHER_MODE_CTR) { ++ drv = &qcrypto_gcrypt_ctr_driver; ++ } else { ++ drv = &qcrypto_gcrypt_driver; ++ } ++ + ctx = g_new0(QCryptoCipherGcrypt, 1); + ctx->base.driver = drv; + +-- +2.41.0.windows.1 + diff --git a/crypto-fix-error-check-on-gcry_md_open.patch b/crypto-fix-error-check-on-gcry_md_open.patch new file mode 100644 index 0000000000000000000000000000000000000000..a1e10e60ab28229777d3fba0c8a1cf9d80ad3f58 --- /dev/null +++ b/crypto-fix-error-check-on-gcry_md_open.patch @@ -0,0 +1,44 @@ +From 0029172c2c57c18d6aef61070c2471f40de6bb45 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Wed, 30 Oct 2024 10:08:12 +0000 +Subject: [PATCH] crypto: fix error check on gcry_md_open +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Gcrypt does not return negative values on error, it returns non-zero +values. This caused QEMU not to detect failure to open an unsupported +hash, resulting in a later crash trying to use a NULL context. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Daniel P. Berrangé +Signed-off-by: cheliequan +--- + crypto/hash-gcrypt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/crypto/hash-gcrypt.c b/crypto/hash-gcrypt.c +index d3bdfe5633..bf5d7ff9ba 100644 +--- a/crypto/hash-gcrypt.c ++++ b/crypto/hash-gcrypt.c +@@ -56,7 +56,7 @@ qcrypto_gcrypt_hash_bytesv(QCryptoHashAlgorithm alg, + size_t *resultlen, + Error **errp) + { +- int i, ret; ++ gcry_error_t ret; + gcry_md_hd_t md; + unsigned char *digest; + +@@ -69,7 +69,7 @@ qcrypto_gcrypt_hash_bytesv(QCryptoHashAlgorithm alg, + + ret = gcry_md_open(&md, qcrypto_hash_alg_map[alg], 0); + +- if (ret < 0) { ++ if (ret != 0) { + error_setg(errp, + "Unable to initialize hash algorithm: %s", + gcry_strerror(ret)); +-- +2.41.0.windows.1 + diff --git a/crypto-perform-runtime-check-for-hash-hmac-support-i.patch b/crypto-perform-runtime-check-for-hash-hmac-support-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..f153a1d54fc560fe4bd484283ebf8e1f2a4a88ee --- /dev/null +++ b/crypto-perform-runtime-check-for-hash-hmac-support-i.patch @@ -0,0 +1,48 @@ +From 17d589becc1a66934e55a4e2efffdd3876d56130 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Wed, 30 Oct 2024 10:09:30 +0000 +Subject: [PATCH] crypto: perform runtime check for hash/hmac support in gcrypt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +gcrypto has the ability to dynamically disable hash/hmac algorithms +at runtime, so QEMU must perform a runtime check. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Daniel P. Berrangé +Signed-off-by: cheliequan +--- + crypto/hash-gcrypt.c | 2 +- + crypto/hmac-gcrypt.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/crypto/hash-gcrypt.c b/crypto/hash-gcrypt.c +index d3bdfe5633..2b6dbd97bb 100644 +--- a/crypto/hash-gcrypt.c ++++ b/crypto/hash-gcrypt.c +@@ -42,7 +42,7 @@ gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg) + { + if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) && + qcrypto_hash_alg_map[alg] != GCRY_MD_NONE) { +- return true; ++ return gcry_md_test_algo(qcrypto_hash_alg_map[alg]) == 0; + } + return false; + } +diff --git a/crypto/hmac-gcrypt.c b/crypto/hmac-gcrypt.c +index 888afb86ed..15926fccfa 100644 +--- a/crypto/hmac-gcrypt.c ++++ b/crypto/hmac-gcrypt.c +@@ -40,7 +40,7 @@ bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg) + { + if (alg < G_N_ELEMENTS(qcrypto_hmac_alg_map) && + qcrypto_hmac_alg_map[alg] != GCRY_MAC_NONE) { +- return true; ++ return gcry_mac_test_algo(qcrypto_hmac_alg_map[alg]) == 0; + } + + return false; +-- +2.41.0.windows.1 + diff --git a/crypto-run-qcrypto_pbkdf2_count_iters-in-a-new-threa.patch b/crypto-run-qcrypto_pbkdf2_count_iters-in-a-new-threa.patch new file mode 100644 index 0000000000000000000000000000000000000000..55fa8a3c9c47265bf8085edd1ee3f5445bd7aa8c --- /dev/null +++ b/crypto-run-qcrypto_pbkdf2_count_iters-in-a-new-threa.patch @@ -0,0 +1,117 @@ +From d199d3a9af9f5bd7877a6ace1243c77097264f1a Mon Sep 17 00:00:00 2001 +From: Tiago Pasqualini +Date: Wed, 4 Sep 2024 20:52:30 -0300 +Subject: [PATCH] crypto: run qcrypto_pbkdf2_count_iters in a new thread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +CPU time accounting in the kernel has been demonstrated to have a +sawtooth pattern[1][2]. This can cause the getrusage system call to +not be as accurate as we are expecting, which can cause this calculation +to stall. + +The kernel discussions shows that this inaccuracy happens when CPU time +gets big enough, so this patch changes qcrypto_pbkdf2_count_iters to run +in a fresh thread to avoid this inaccuracy. It also adds a sanity check +to fail the process if CPU time is not accounted. + +[1] https://lore.kernel.org/lkml/159231011694.16989.16351419333851309713.tip-bot2@tip-bot2/ +[2] https://lore.kernel.org/lkml/20221226031010.4079885-1-maxing.lan@bytedance.com/t/#m1c7f2fdc0ea742776a70fd1aa2a2e414c437f534 + +Resolves: #2398 +Signed-off-by: Tiago Pasqualini +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit c72cab5ad9f849bbcfcf4be7952b8b8946cc626e) +Signed-off-by: zhujun2 +--- + crypto/pbkdf.c | 53 +++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 46 insertions(+), 7 deletions(-) + +diff --git a/crypto/pbkdf.c b/crypto/pbkdf.c +index 8d198c152c..d1c06ef3ed 100644 +--- a/crypto/pbkdf.c ++++ b/crypto/pbkdf.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/thread.h" + #include "qapi/error.h" + #include "crypto/pbkdf.h" + #ifndef _WIN32 +@@ -85,12 +86,28 @@ static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms, + #endif + } + +-uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash, +- const uint8_t *key, size_t nkey, +- const uint8_t *salt, size_t nsalt, +- size_t nout, +- Error **errp) ++typedef struct CountItersData { ++ QCryptoHashAlgorithm hash; ++ const uint8_t *key; ++ size_t nkey; ++ const uint8_t *salt; ++ size_t nsalt; ++ size_t nout; ++ uint64_t iterations; ++ Error **errp; ++} CountItersData; ++ ++static void *threaded_qcrypto_pbkdf2_count_iters(void *data) + { ++ CountItersData *iters_data = (CountItersData *) data; ++ QCryptoHashAlgorithm hash = iters_data->hash; ++ const uint8_t *key = iters_data->key; ++ size_t nkey = iters_data->nkey; ++ const uint8_t *salt = iters_data->salt; ++ size_t nsalt = iters_data->nsalt; ++ size_t nout = iters_data->nout; ++ Error **errp = iters_data->errp; ++ + uint64_t ret = -1; + g_autofree uint8_t *out = g_new(uint8_t, nout); + uint64_t iterations = (1 << 15); +@@ -114,7 +131,10 @@ uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash, + + delta_ms = end_ms - start_ms; + +- if (delta_ms > 500) { ++ if (delta_ms == 0) { /* sanity check */ ++ error_setg(errp, "Unable to get accurate CPU usage"); ++ goto cleanup; ++ } else if (delta_ms > 500) { + break; + } else if (delta_ms < 100) { + iterations = iterations * 10; +@@ -129,5 +149,24 @@ uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash, + + cleanup: + memset(out, 0, nout); +- return ret; ++ iters_data->iterations = ret; ++ return NULL; ++} ++ ++uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash, ++ const uint8_t *key, size_t nkey, ++ const uint8_t *salt, size_t nsalt, ++ size_t nout, ++ Error **errp) ++{ ++ CountItersData data = { ++ hash, key, nkey, salt, nsalt, nout, 0, errp ++ }; ++ QemuThread thread; ++ ++ qemu_thread_create(&thread, "pbkdf2", threaded_qcrypto_pbkdf2_count_iters, ++ &data, QEMU_THREAD_JOINABLE); ++ qemu_thread_join(&thread); ++ ++ return data.iterations; + } +-- +2.41.0.windows.1 + diff --git a/crypto-tlscredspsk-Free-username-on-finalize.patch b/crypto-tlscredspsk-Free-username-on-finalize.patch new file mode 100644 index 0000000000000000000000000000000000000000..3da983d169ab4db8117d6adff5235518644482c4 --- /dev/null +++ b/crypto-tlscredspsk-Free-username-on-finalize.patch @@ -0,0 +1,78 @@ +From ec07000764f578bb7cd21fe73c8e649a183d7674 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Mon, 26 Aug 2024 10:56:57 +0800 +Subject: [PATCH] crypto/tlscredspsk: Free username on finalize +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 87e012f29f2e47dcd8c385ff8bb8188f9e06d4ea + +When the creds->username property is set we allocate memory +for it in qcrypto_tls_creds_psk_prop_set_username(), but +we never free this when the QCryptoTLSCredsPSK is destroyed. +Free the memory in finalize. + +This fixes a LeakSanitizer complaint in migration-test: + +$ (cd build/asan; ASAN_OPTIONS="fast_unwind_on_malloc=0" QTEST_QEMU_BINARY=./qemu-system-x86_64 ./tests/qtest/migration-test --tap -k -p /x86_64/migration/precopy/unix/tls/psk) + +================================================================= +==3867512==ERROR: LeakSanitizer: detected memory leaks + +Direct leak of 5 byte(s) in 1 object(s) allocated from: + #0 0x5624e5c99dee in malloc (/mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/qemu-system-x86_64+0x218edee) (BuildId: a9e623fa1009a9435c0142c037cd7b8c1ad04ce3) + #1 0x7fb199ae9738 in g_malloc debian/build/deb/../../../glib/gmem.c:128:13 + #2 0x7fb199afe583 in g_strdup debian/build/deb/../../../glib/gstrfuncs.c:361:17 + #3 0x5624e82ea919 in qcrypto_tls_creds_psk_prop_set_username /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../crypto/tlscredspsk.c:255:23 + #4 0x5624e812c6b5 in property_set_str /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../qom/object.c:2277:5 + #5 0x5624e8125ce5 in object_property_set /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../qom/object.c:1463:5 + #6 0x5624e8136e7c in object_set_properties_from_qdict /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../qom/object_interfaces.c:55:14 + #7 0x5624e81372d2 in user_creatable_add_type /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../qom/object_interfaces.c:112:5 + #8 0x5624e8137964 in user_creatable_add_qapi /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../qom/object_interfaces.c:157:11 + #9 0x5624e891ba3c in qmp_object_add /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../qom/qom-qmp-cmds.c:227:5 + #10 0x5624e8af9118 in qmp_marshal_object_add /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/qapi/qapi-commands-qom.c:337:5 + #11 0x5624e8bd1d49 in do_qmp_dispatch_bh /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../qapi/qmp-dispatch.c:128:5 + #12 0x5624e8cb2531 in aio_bh_call /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../util/async.c:171:5 + #13 0x5624e8cb340c in aio_bh_poll /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../util/async.c:218:13 + #14 0x5624e8c0be98 in aio_dispatch /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../util/aio-posix.c:423:5 + #15 0x5624e8cba3ce in aio_ctx_dispatch /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../util/async.c:360:5 + #16 0x7fb199ae0d3a in g_main_dispatch debian/build/deb/../../../glib/gmain.c:3419:28 + #17 0x7fb199ae0d3a in g_main_context_dispatch debian/build/deb/../../../glib/gmain.c:4137:7 + #18 0x5624e8cbe1d9 in glib_pollfds_poll /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../util/main-loop.c:287:9 + #19 0x5624e8cbcb13 in os_host_main_loop_wait /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../util/main-loop.c:310:5 + #20 0x5624e8cbc6dc in main_loop_wait /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../util/main-loop.c:589:11 + #21 0x5624e6f3f917 in qemu_main_loop /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../system/runstate.c:801:9 + #22 0x5624e893379c in qemu_default_main /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../system/main.c:37:14 + #23 0x5624e89337e7 in main /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/../../system/main.c:48:12 + #24 0x7fb197972d8f in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16 + #25 0x7fb197972e3f in __libc_start_main csu/../csu/libc-start.c:392:3 + #26 0x5624e5c16fa4 in _start (/mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/asan/qemu-system-x86_64+0x210bfa4) (BuildId: a9e623fa1009a9435c0142c037cd7b8c1ad04ce3) + +SUMMARY: AddressSanitizer: 5 byte(s) leaked in 1 allocation(s). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Maydell +Reviewed-by: Daniel P. Berrangé +Message-ID: <20240819145021.38524-1-peter.maydell@linaro.org> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + crypto/tlscredspsk.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/crypto/tlscredspsk.c b/crypto/tlscredspsk.c +index 546cad1c5a..0d6b71a37c 100644 +--- a/crypto/tlscredspsk.c ++++ b/crypto/tlscredspsk.c +@@ -243,6 +243,7 @@ qcrypto_tls_creds_psk_finalize(Object *obj) + QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj); + + qcrypto_tls_creds_psk_unload(creds); ++ g_free(creds->username); + } + + static void +-- +2.41.0.windows.1 + diff --git a/crypto-use-consistent-error-reporting-pattern-for-un.patch b/crypto-use-consistent-error-reporting-pattern-for-un.patch new file mode 100644 index 0000000000000000000000000000000000000000..835e2790942190193f96acd1e797385d9e0da41f --- /dev/null +++ b/crypto-use-consistent-error-reporting-pattern-for-un.patch @@ -0,0 +1,78 @@ +From 7bd04536327357a97206d8048f5d9341780bbe5a Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 12 Oct 2024 11:26:16 +0800 +Subject: [PATCH] crypto: use consistent error reporting pattern for + unsupported cipher modes MIME-Version: 1.0 Content-Type: text/plain; + charset=UTF-8 Content-Transfer-Encoding: 8bit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Not all paths in qcrypto_cipher_ctx_new() were correctly distinguishing +between valid user input for cipher mode (which should report a user +facing error), vs program logic errors (which should assert). + +Reported-by: Peter Maydell +Signed-off-by: Daniel P. Berrangé +Signed-off-by: dinglimin +--- + crypto/cipher-nettle.c.inc | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/crypto/cipher-nettle.c.inc b/crypto/cipher-nettle.c.inc +index 766de036ba..2654b439c1 100644 +--- a/crypto/cipher-nettle.c.inc ++++ b/crypto/cipher-nettle.c.inc +@@ -525,8 +525,10 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + case QCRYPTO_CIPHER_MODE_CTR: + drv = &qcrypto_nettle_des_driver_ctr; + break; +- default: ++ case QCRYPTO_CIPHER_MODE_XTS: + goto bad_cipher_mode; ++ default: ++ g_assert_not_reached(); + } + + ctx = g_new0(QCryptoNettleDES, 1); +@@ -551,8 +553,10 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + case QCRYPTO_CIPHER_MODE_CTR: + drv = &qcrypto_nettle_des3_driver_ctr; + break; +- default: ++ case QCRYPTO_CIPHER_MODE_XTS: + goto bad_cipher_mode; ++ default: ++ g_assert_not_reached(); + } + + ctx = g_new0(QCryptoNettleDES3, 1); +@@ -663,8 +667,10 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + case QCRYPTO_CIPHER_MODE_CTR: + drv = &qcrypto_nettle_cast128_driver_ctr; + break; +- default: ++ case QCRYPTO_CIPHER_MODE_XTS: + goto bad_cipher_mode; ++ default: ++ g_assert_not_reached(); + } + + ctx = g_new0(QCryptoNettleCAST128, 1); +@@ -741,8 +747,12 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + case QCRYPTO_CIPHER_MODE_ECB: + drv = &qcrypto_nettle_sm4_driver_ecb; + break; +- default: ++ case QCRYPTO_CIPHER_MODE_CBC: ++ case QCRYPTO_CIPHER_MODE_CTR: ++ case QCRYPTO_CIPHER_MODE_XTS: + goto bad_cipher_mode; ++ default: ++ g_assert_not_reached(); + } + + ctx = g_new0(QCryptoNettleSm4, 1); +-- +2.41.0.windows.1 + diff --git a/cryptodev-Fix-error-handling-in-cryptodev_lkcf_execu.patch b/cryptodev-Fix-error-handling-in-cryptodev_lkcf_execu.patch new file mode 100644 index 0000000000000000000000000000000000000000..a942a46d9aac6849b779db6b46d6b7ae7f8adf3f --- /dev/null +++ b/cryptodev-Fix-error-handling-in-cryptodev_lkcf_execu.patch @@ -0,0 +1,52 @@ +From ca3f4fd234ea4b8f02a415b99b449e71d028c076 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Tue, 8 Apr 2025 07:27:47 -0400 +Subject: [PATCH] cryptodev: Fix error handling in + cryptodev_lkcf_execute_task() + +cheery-pick from 1c89dfefc4c33295126208225f202f39b5a234c3 + +When cryptodev_lkcf_set_op_desc() fails, we report an error, but +continue anyway. This is wrong. We then pass a non-null @local_error +to various functions, which could easily fail error_setv()'s assertion +on failure. + +Fail the function instead. + +When qcrypto_akcipher_new() fails, we fail the function without +reporting the error. This leaks the Error object. + +Add the missing error reporting. This also frees the Error object. + +Signed-off-by: Markus Armbruster +Message-ID: <20250312101131.1615777-1-armbru@redhat.com> +Reviewed-by: zhenwei pi +Signed-off-by: qihao_yewu +--- + backends/cryptodev-lkcf.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/backends/cryptodev-lkcf.c b/backends/cryptodev-lkcf.c +index 45aba1ff67..45b287a953 100644 +--- a/backends/cryptodev-lkcf.c ++++ b/backends/cryptodev-lkcf.c +@@ -330,6 +330,8 @@ static void cryptodev_lkcf_execute_task(CryptoDevLKCFTask *task) + cryptodev_lkcf_set_op_desc(&session->akcipher_opts, op_desc, + sizeof(op_desc), &local_error) != 0) { + error_report_err(local_error); ++ status = -VIRTIO_CRYPTO_ERR; ++ goto out; + } else { + key_id = add_key(KCTL_KEY_TYPE_PKEY, "lkcf-backend-priv-key", + p8info, p8info_len, KCTL_KEY_RING); +@@ -346,6 +348,7 @@ static void cryptodev_lkcf_execute_task(CryptoDevLKCFTask *task) + session->key, session->keylen, + &local_error); + if (!akcipher) { ++ error_report_err(local_error); + status = -VIRTIO_CRYPTO_ERR; + goto out; + } +-- +2.41.0.windows.1 + diff --git a/cvm-Add-support-for-TEE-based-national-encryption-ac.patch b/cvm-Add-support-for-TEE-based-national-encryption-ac.patch new file mode 100644 index 0000000000000000000000000000000000000000..2afbccce5a83273f52a9bc012a51b4590e190e73 --- /dev/null +++ b/cvm-Add-support-for-TEE-based-national-encryption-ac.patch @@ -0,0 +1,294 @@ +From dffc0f55d93ececee55a8548d7dab227ee76b234 Mon Sep 17 00:00:00 2001 +From: liupingwei +Date: Thu, 24 Oct 2024 19:05:58 +0800 +Subject: [PATCH] cvm : Add support for TEE-based national encryption + acceleration. + +This commit enables the use of TEE for national encryption acceleration +in cvm and speeds up OpenSSL encrption /decryption operations. + +Signed-off-by: liupingwei +--- + hw/arm/virt.c | 61 ++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 1 + + linux-headers/asm-arm64/kvm.h | 10 ++++++ + qapi/qom.json | 1 + + target/arm/kvm-tmm.c | 68 +++++++++++++++++++++++++++++++++-- + target/arm/kvm_arm.h | 4 +++ + 6 files changed, 142 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e73a795d3d..248788db03 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1967,6 +1967,10 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + "kvm-type", &error_abort); + + if (!strcmp(kvm_type, "cvm")) { ++ /* support kae vf device tree nodes */ ++ vms->memmap[VIRT_PCIE_MMIO] = (MemMapEntry) { 0x10000000, 0x2edf0000 }; ++ vms->memmap[VIRT_KAE_DEVICE] = (MemMapEntry) { 0x3edf0000, 0x00200000 }; ++ + vms->memmap[VIRT_MEM].base = 3 * GiB; + vms->memmap[VIRT_MEM].size = ms->ram_size; + info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base), +@@ -2380,6 +2384,56 @@ out: + return; + } + ++static void fdt_add_hisi_sec_nodes(const VirtMachineState *vms, int dev_id) ++{ ++ const MachineState *ms = MACHINE(vms); ++ hwaddr size = 0x10000; ++ ++ /* ++ * Calculate the base address for the sec device node. ++ * Each device group contains one sec device and one hpre device,spaced by 2 * size. ++ */ ++ hwaddr base = vms->memmap[VIRT_KAE_DEVICE].base + dev_id * 2 * size; ++ char *nodename; ++ ++ tmm_set_sec_addr(base, dev_id); ++ ++ nodename = g_strdup_printf("/hisi-sec@%" PRIx64, base); ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "hisilicon,hip07-sec-vf"); ++ qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 2, size); ++ g_free(nodename); ++} ++ ++static void fdt_add_hisi_hpre_nodes(const VirtMachineState *vms, int dev_id) ++{ ++ const MachineState *ms = MACHINE(vms); ++ hwaddr size = 0x10000; ++ ++ /* ++ * Calculate the base address for the hpre device node. ++ * Each hpre device follows the corresponding sec device by an additional offset of size. ++ */ ++ hwaddr base = vms->memmap[VIRT_KAE_DEVICE].base + dev_id * 2 * size + size; ++ char *nodename; ++ ++ tmm_set_hpre_addr(base, dev_id); ++ ++ nodename = g_strdup_printf("/hisi-hpre@%" PRIx64, base); ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "hisilicon,hip07-hpre-vf"); ++ qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 2, size); ++ g_free(nodename); ++} ++ ++static void fdt_add_all_hisi_nodes(const VirtMachineState *vms, int dev_id) ++{ ++ for (int i = 0; i < dev_id; i++) { ++ fdt_add_hisi_sec_nodes(vms, i); ++ fdt_add_hisi_hpre_nodes(vms, i); ++ } ++} ++ + static void machvirt_init(MachineState *machine) + { + VirtMachineState *vms = VIRT_MACHINE(machine); +@@ -2530,14 +2584,19 @@ static void machvirt_init(MachineState *machine) + } + } + ++ create_fdt(vms); ++ + if (virtcca_cvm_enabled()) { ++ int kae_num = tmm_get_kae_num(); ++ fdt_add_all_hisi_nodes(vms, kae_num); ++ + int ret = kvm_arm_tmm_init(machine->cgs, &error_fatal); + if (ret != 0) { + error_report("fail to initialize TMM"); + exit(1); + } + } +- create_fdt(vms); ++ + qemu_log("cpu init start\n"); + + cpu_class = object_class_by_name(machine->cpu_type); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 27f5333772..76a0d3fa5b 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -66,6 +66,7 @@ enum { + VIRT_FW_CFG, + VIRT_PCIE, + VIRT_PCIE_MMIO, ++ VIRT_KAE_DEVICE, + VIRT_PCIE_PIO, + VIRT_PCIE_ECAM, + VIRT_PLATFORM_BUS, +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index 2b040b5d60..552fdcb18f 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -541,6 +541,9 @@ struct reg_mask_range { + #define KVM_CAP_ARM_TMM_CFG_SVE 2 + #define KVM_CAP_ARM_TMM_CFG_DBG 3 + #define KVM_CAP_ARM_TMM_CFG_PMU 4 ++#define KVM_CAP_ARM_TMM_CFG_KAE 5 ++ ++#define KVM_ARM_TMM_MAX_KAE_VF_NUM 11 + + struct kvm_cap_arm_tmm_config_item { + __u32 cfg; +@@ -570,6 +573,13 @@ struct kvm_cap_arm_tmm_config_item { + struct { + __u32 num_pmu_cntrs; + }; ++ ++ /* cfg == KVM_CAP_ARM_TMM_CFG_KAE */ ++ struct { ++ __u32 kae_vf_num; ++ __u64 sec_addr[KVM_ARM_TMM_MAX_KAE_VF_NUM]; ++ __u64 hpre_addr[KVM_ARM_TMM_MAX_KAE_VF_NUM]; ++ }; + /* Fix the size of the union */ + __u8 reserved[256]; + }; +diff --git a/qapi/qom.json b/qapi/qom.json +index 213edd8db2..293d727a04 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -921,6 +921,7 @@ + { 'struct': 'TmmGuestProperties', + 'data': { '*sve-vector-length': 'uint32', + '*num-pmu-counters': 'uint32', ++ '*kae': 'uint32', + '*measurement-algo': 'TmmGuestMeasurementAlgo' } } + + ## +diff --git a/target/arm/kvm-tmm.c b/target/arm/kvm-tmm.c +index efe2ca0006..ea6bcc0f40 100644 +--- a/target/arm/kvm-tmm.c ++++ b/target/arm/kvm-tmm.c +@@ -19,13 +19,20 @@ + #include "sysemu/kvm.h" + #include "sysemu/runstate.h" + #include "hw/loader.h" ++#include "linux-headers/asm-arm64/kvm.h" + + #define TYPE_TMM_GUEST "tmm-guest" + OBJECT_DECLARE_SIMPLE_TYPE(TmmGuest, TMM_GUEST) + + #define TMM_PAGE_SIZE qemu_real_host_page_size() +-#define TMM_MAX_PMU_CTRS 0x20 +-#define TMM_MAX_CFG 5 ++#define TMM_MAX_PMU_CTRS 0x20 ++#define TMM_MAX_CFG 6 ++ ++typedef struct { ++ uint32_t kae_vf_num; ++ hwaddr sec_addr[KVM_ARM_TMM_MAX_KAE_VF_NUM]; ++ hwaddr hpre_addr[KVM_ARM_TMM_MAX_KAE_VF_NUM]; ++} KaeDeviceInfo; + + struct TmmGuest { + ConfidentialGuestSupport parent_obj; +@@ -33,6 +40,7 @@ struct TmmGuest { + TmmGuestMeasurementAlgo measurement_algo; + uint32_t sve_vl; + uint32_t num_pmu_cntrs; ++ KaeDeviceInfo kae_device_info; + }; + + typedef struct { +@@ -92,6 +100,17 @@ static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp) + args.num_pmu_cntrs = guest->num_pmu_cntrs; + cfg_str = "PMU"; + break; ++ case KVM_CAP_ARM_TMM_CFG_KAE: ++ if (!guest->kae_device_info.kae_vf_num) { ++ return 0; ++ } ++ args.kae_vf_num= guest->kae_device_info.kae_vf_num; ++ for (int i = 0; i < guest->kae_device_info.kae_vf_num; i++) { ++ args.sec_addr[i] = guest->kae_device_info.sec_addr[i]; ++ args.hpre_addr[i] = guest->kae_device_info.hpre_addr[i]; ++ } ++ cfg_str = "KAE"; ++ break; + default: + g_assert_not_reached(); + } +@@ -289,6 +308,47 @@ static void tmm_set_measurement_algo(Object *obj, int algo, Error **errp G_GNUC_ + guest->measurement_algo = algo; + } + ++static void tmm_get_kae_vf_num(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ ++ visit_type_uint32(v, name, &guest->kae_device_info.kae_vf_num, errp); ++} ++ ++static void tmm_set_kae_vf_num(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TmmGuest *guest = TMM_GUEST(obj); ++ uint32_t value; ++ ++ if (!visit_type_uint32(v, name, &value, errp)) { ++ return; ++ } ++ ++ if (value > KVM_ARM_TMM_MAX_KAE_VF_NUM) { ++ error_setg(errp, "invalid number of kae vfs"); ++ return; ++ } ++ ++ guest->kae_device_info.kae_vf_num = value; ++} ++ ++int tmm_get_kae_num(void) ++{ ++ return tmm_guest->kae_device_info.kae_vf_num; ++} ++ ++void tmm_set_sec_addr(hwaddr base, int num) ++{ ++ tmm_guest->kae_device_info.sec_addr[num] = base; ++} ++ ++void tmm_set_hpre_addr(hwaddr base, int num) ++{ ++ tmm_guest->kae_device_info.hpre_addr[num] = base; ++} ++ + static void tmm_guest_class_init(ObjectClass *oc, void *data) + { + object_class_property_add_enum(oc, "measurement-algo", +@@ -314,6 +374,10 @@ static void tmm_guest_class_init(ObjectClass *oc, void *data) + NULL, NULL); + object_class_property_set_description(oc, "num-pmu-counters", + "Number of PMU counters"); ++ object_class_property_add(oc, "kae", "uint32", tmm_get_kae_vf_num, ++ tmm_set_kae_vf_num, NULL, NULL); ++ object_class_property_set_description(oc, "kae", ++ "Number of KAE virtual functions. 0 disables KAE (the default)"); + } + + static void tmm_guest_instance_init(Object *obj) +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index d6c7139f4a..31457a57f7 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -390,6 +390,10 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + + void tmm_add_ram_region(hwaddr base1, hwaddr len1, hwaddr base2, hwaddr len2, bool populate); + ++int tmm_get_kae_num(void); ++void tmm_set_sec_addr(hwaddr base, int num); ++void tmm_set_hpre_addr(hwaddr base, int num); ++ + int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp); + bool kvm_arm_tmm_enabled(void); + +-- +2.41.0.windows.1 + diff --git a/cvm-Implement-command-blacklist-for-cvm-security-enh.patch b/cvm-Implement-command-blacklist-for-cvm-security-enh.patch new file mode 100644 index 0000000000000000000000000000000000000000..69b45e6ea3fa1aa1ff1895b2cde70cb855e84ca7 --- /dev/null +++ b/cvm-Implement-command-blacklist-for-cvm-security-enh.patch @@ -0,0 +1,118 @@ +From 384b3f41fd69ed6f5bf376ff1aac1a12deeea0fb Mon Sep 17 00:00:00 2001 +From: liupingwei +Date: Fri, 16 Aug 2024 18:06:10 +0800 +Subject: [PATCH] cvm : Implement command blacklist for cvm security + enhancement + +Added a new feature to intercept and block specific virsh commands(virsh +save,virsh restore,virsh dump,virsh suspend,virsh resume)that can impact +the security of cvm. + +Signed-off-by: liupingwei +--- + dump/dump.c | 7 +++++++ + migration/migration-hmp-cmds.c | 6 ++++++ + migration/savevm.c | 6 ++++++ + monitor/qmp-cmds.c | 6 ++++++ + 4 files changed, 25 insertions(+) + +diff --git a/dump/dump.c b/dump/dump.c +index 4819050764..787059ac2c 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -20,6 +20,7 @@ + #include "sysemu/dump.h" + #include "sysemu/runstate.h" + #include "sysemu/cpus.h" ++#include "sysemu/kvm.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-dump.h" + #include "qapi/qapi-events-dump.h" +@@ -2065,6 +2066,12 @@ void qmp_dump_guest_memory(bool paging, const char *protocol, + Error **errp) + { + ERRP_GUARD(); ++ ++ if (virtcca_cvm_enabled()) { ++ error_setg(errp, "The dump-guest-memory command is temporarily unsupported in cvm."); ++ return; ++ } ++ + const char *p; + int fd; + DumpState *s; +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 1fa6a5f478..386ba7fc98 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -30,6 +30,7 @@ + #include "sysemu/runstate.h" + #include "ui/qemu-spice.h" + #include "sysemu/sysemu.h" ++#include "sysemu/kvm.h" + #include "options.h" + #include "migration.h" + +@@ -406,6 +407,11 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict) + const char *name = qdict_get_str(qdict, "name"); + Error *err = NULL; + ++ if (virtcca_cvm_enabled()) { ++ error_setg(&err, "The loadvm command is temporarily unsupported in cvm."); ++ return; ++ } ++ + vm_stop(RUN_STATE_RESTORE_VM); + + if (load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { +diff --git a/migration/savevm.c b/migration/savevm.c +index 477a19719f..cc65da605e 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -61,6 +61,7 @@ + #include "sysemu/replay.h" + #include "sysemu/runstate.h" + #include "sysemu/sysemu.h" ++#include "sysemu/kvm.h" + #include "sysemu/xen.h" + #include "migration/colo.h" + #include "qemu/bitmap.h" +@@ -3044,6 +3045,11 @@ int qemu_loadvm_approve_switchover(void) + bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + bool has_devices, strList *devices, Error **errp) + { ++ if (virtcca_cvm_enabled()) { ++ error_setg(errp, "The savevm command is temporarily unsupported in cvm."); ++ return false; ++ } ++ + BlockDriverState *bs; + QEMUSnapshotInfo sn1, *sn = &sn1; + int ret = -1, ret2; +diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c +index e78462b857..c0b66f11bf 100644 +--- a/monitor/qmp-cmds.c ++++ b/monitor/qmp-cmds.c +@@ -23,6 +23,7 @@ + #include "sysemu/runstate.h" + #include "sysemu/runstate-action.h" + #include "sysemu/block-backend.h" ++#include "sysemu/kvm.h" + #include "qapi/error.h" + #include "qapi/qapi-init-commands.h" + #include "qapi/qapi-commands-control.h" +@@ -50,6 +51,11 @@ void qmp_quit(Error **errp) + + void qmp_stop(Error **errp) + { ++ if (virtcca_cvm_enabled()) { ++ error_setg(errp, "The stop command is temporarily unsupported in cvm."); ++ return; ++ } ++ + /* if there is a dump in background, we should wait until the dump + * finished */ + if (qemu_system_dump_in_progress()) { +-- +2.41.0.windows.1 + diff --git a/cvm-bug-fix-for-incorrect-device-name-check-for-vhos.patch b/cvm-bug-fix-for-incorrect-device-name-check-for-vhos.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9237175ea579ea77407a7c085f496339b1263a9 --- /dev/null +++ b/cvm-bug-fix-for-incorrect-device-name-check-for-vhos.patch @@ -0,0 +1,34 @@ +From 282d63f9b5915f0529e9d0ae54b47c0ceacc58c3 Mon Sep 17 00:00:00 2001 +From: liupingwei +Date: Mon, 19 Aug 2024 15:38:23 +0800 +Subject: [PATCH] cvm : bug-fix for incorrect device name check for + vhost-user-fs + +The 'vhost-user-fs' was being parsed as 'virtio-user-fs' during the +compilation and this caused the device to erroneously trigger the error +branch. + +Fixes: 5db954cb188d3775aec053fad8a39bf4c26a2b92("Add support for the +virtcca cvm feature.) + +Signed-off-by: liupingwei +--- + hw/virtio/virtio-bus.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index 7e750d073d..4f16e7ef77 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -83,7 +83,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + if (has_iommu) { + vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + +- if (virtcca_cvm_enabled() && (strcmp(vdev->name, "vhost-user-fs") == 0)) { ++ if (virtcca_cvm_enabled() && (strcmp(vdev->name, "virtio-user-fs") == 0)) { + vdev_has_iommu = true; + } + +-- +2.41.0.windows.1 + diff --git a/cvm-bug-fix-for-undefined-reference-to-virtcca_cvm_a.patch b/cvm-bug-fix-for-undefined-reference-to-virtcca_cvm_a.patch new file mode 100644 index 0000000000000000000000000000000000000000..87f5186e36f898269cb8ac879ac6b24742a9b39b --- /dev/null +++ b/cvm-bug-fix-for-undefined-reference-to-virtcca_cvm_a.patch @@ -0,0 +1,30 @@ +From 87dfbca72fe11b7a8d3f1afce52a7925be0e0b01 Mon Sep 17 00:00:00 2001 +From: liupingwei +Date: Wed, 4 Sep 2024 14:29:02 +0800 +Subject: [PATCH] cvm : bug fix for undefined reference to + 'virtcca_cvm_allowed' while compiling. + +Fixes a linking error due to an undefined reference to +'virtcca_cvm_allowed' when KVM is not enabled. + +Signed-off-by: liupingwei +--- + accel/stubs/kvm-stub.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index 1b37d9a302..ad39a434c4 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -25,6 +25,8 @@ bool kvm_allowed; + bool kvm_readonly_mem_allowed; + bool kvm_msi_use_devid; + ++bool virtcca_cvm_allowed; ++ + void kvm_flush_coalesced_mmio_buffer(void) + { + } +-- +2.41.0.windows.1 + diff --git a/delete-the-in-tpm.txt.patch b/delete-the-in-tpm.txt.patch deleted file mode 100644 index 01ce3ace541aca115bccd47100f5dbd954643764..0000000000000000000000000000000000000000 --- a/delete-the-in-tpm.txt.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 3020ae141ef40f06b17eb0f16d2a3c6d5872ff89 Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Wed, 29 Jul 2020 08:45:50 +0000 -Subject: [PATCH 05/19] delete the in tpm.txt - -Signed-off-by: jiangfangjie ---- - docs/specs/tpm.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt -index 5d8c26b1..9c8cca04 100644 ---- a/docs/specs/tpm.txt -+++ b/docs/specs/tpm.txt -@@ -89,7 +89,7 @@ TPM upon reboot. The PPI specification defines the operation requests and the - actions the firmware has to take. The system administrator passes the operation - request number to the firmware through an ACPI interface which writes this - number to a memory location that the firmware knows. Upon reboot, the firmware --finds the number and sends commands to the the TPM. The firmware writes the TPM -+finds the number and sends commands to the TPM. The firmware writes the TPM - result code and the operation request number to a memory location that ACPI can - read from and pass the result on to the administrator. - --- -2.23.0 - diff --git a/disable-keyring-option.patch b/disable-keyring-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..a33b320bbc596ceb0491ddc0d75208c94012f859 --- /dev/null +++ b/disable-keyring-option.patch @@ -0,0 +1,28 @@ +From fe771abc365ba0cb62dd1726f1aa5274f1807876 Mon Sep 17 00:00:00 2001 +From: Jiabo Feng +Date: Sat, 30 Mar 2024 16:24:45 +0800 +Subject: [PATCH] disable keyring option + +Due to the default prohibition of some syscall(e.g. add_key) in the Docker compilation environment, the testcases in test-crypto-secret.c cannot pass. + +Signed-off-by: Jiabo Feng +--- + meson_options.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/meson_options.txt b/meson_options.txt +index c9baeda639..cf9706c411 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -121,7 +121,7 @@ option('avx512f', type: 'feature', value: 'disabled', + description: 'AVX512F optimizations') + option('avx512bw', type: 'feature', value: 'auto', + description: 'AVX512BW optimizations') +-option('keyring', type: 'feature', value: 'auto', ++option('keyring', type: 'feature', value: 'disabled', + description: 'Linux keyring support') + option('libkeyutils', type: 'feature', value: 'auto', + description: 'Linux keyutils support') +-- +2.41.0.windows.1 + diff --git a/display-bochs-display-fix-memory-leak.patch b/display-bochs-display-fix-memory-leak.patch deleted file mode 100644 index 4dd3aa61c2b2b1026e0065c708ead4aeb79b3c21..0000000000000000000000000000000000000000 --- a/display-bochs-display-fix-memory-leak.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 7edca67dc630e31043644e87ede2e05e504f845b Mon Sep 17 00:00:00 2001 -From: Cameron Esfahani -Date: Tue, 10 Dec 2019 13:27:54 -0800 -Subject: [PATCH 1/8] display/bochs-display: fix memory leak -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix memory leak in bochs_display_update(). Leaks 304 bytes per frame. - -Fixes: 33ebad54056 -Signed-off-by: Cameron Esfahani -Message-Id: -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Gerd Hoffmann ---- - hw/display/bochs-display.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c -index 8e83b51..b601b2f 100644 ---- a/hw/display/bochs-display.c -+++ b/hw/display/bochs-display.c -@@ -251,6 +251,8 @@ static void bochs_display_update(void *opaque) - dpy_gfx_update(s->con, 0, ys, - mode.width, y - ys); - } -+ -+ g_free(snap); - } - } - --- -1.8.3.1 - diff --git a/dma-Fix-function-names-in-documentation.patch b/dma-Fix-function-names-in-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..ae47cb2e841ede49aca3e1ce627d2962b9877100 --- /dev/null +++ b/dma-Fix-function-names-in-documentation.patch @@ -0,0 +1,61 @@ +From d490ccc1254c7d4dbe8ab40dd78e189108155ae0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E5=BC=A0=E6=A5=9A=E5=90=9B?= + +Date: Fri, 18 Oct 2024 10:10:17 +0800 +Subject: [PATCH] dma: Fix function names in documentation Ensure the function + names match. + +Signed-off-by: Akihiko Odaki +Message-id: 20241012-dma-v2-1-6afddf5f3c8d@daynix.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +Signed-off-by: Zhang Chujun +--- + include/sysemu/dma.h | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h +index a1ac5bc1b5..5a49a30628 100644 +--- a/include/sysemu/dma.h ++++ b/include/sysemu/dma.h +@@ -152,7 +152,7 @@ static inline MemTxResult dma_memory_read(AddressSpace *as, dma_addr_t addr, + } + + /** +- * address_space_write: Write to address space from DMA controller. ++ * dma_memory_write: Write to address space from DMA controller. + * + * Return a MemTxResult indicating whether the operation succeeded + * or failed (eg unassigned memory, device rejected the transaction, +@@ -189,7 +189,7 @@ MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, + uint8_t c, dma_addr_t len, MemTxAttrs attrs); + + /** +- * address_space_map: Map a physical memory region into a host virtual address. ++ * dma_memory_map: Map a physical memory region into a host virtual address. + * + * May map a subset of the requested range, given by and returned in @plen. + * May return %NULL and set *@plen to zero(0), if resources needed to perform +@@ -216,16 +216,15 @@ static inline void *dma_memory_map(AddressSpace *as, + } + + /** +- * address_space_unmap: Unmaps a memory region previously mapped +- * by dma_memory_map() ++ * dma_memory_unmap: Unmaps a memory region previously mapped by dma_memory_map() + * + * Will also mark the memory as dirty if @dir == %DMA_DIRECTION_FROM_DEVICE. + * @access_len gives the amount of memory that was actually read or written + * by the caller. + * + * @as: #AddressSpace used +- * @buffer: host pointer as returned by address_space_map() +- * @len: buffer length as returned by address_space_map() ++ * @buffer: host pointer as returned by dma_memory_map() ++ * @len: buffer length as returned by dma_memory_map() + * @dir: indicates the transfer direction + * @access_len: amount of data actually transferred + */ +-- +2.41.0.windows.1 + diff --git a/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch b/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch deleted file mode 100644 index c61c9fd848c4e1d68baa778388c8440a8d28ec32..0000000000000000000000000000000000000000 --- a/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch +++ /dev/null @@ -1,79 +0,0 @@ -From fbde196c30e4797a51bda046ba514b187963d4ba Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 29 Jul 2019 23:34:16 +0200 -Subject: [PATCH] dma-helpers: ensure AIO callback is invoked after - cancellation - -dma_aio_cancel unschedules the BH if there is one, which corresponds -to the reschedule_dma case of dma_blk_cb. This can stall the DMA -permanently, because dma_complete will never get invoked and therefore -nobody will ever invoke the original AIO callback in dbs->common.cb. - -Fix this by invoking the callback (which is ensured to happen after -a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and -add assertions to check that the DMA state machine is indeed waiting -for dma_complete or reschedule_dma, but never both. - -Reported-by: John Snow -Signed-off-by: Paolo Bonzini -Message-id: 20190729213416.1972-1-pbonzini@redhat.com -Signed-off-by: John Snow -(cherry picked from commit 539343c0a47e19d5dd64d846d64d084d9793681f) -Signed-off-by: Michael Roth ---- - dma-helpers.c | 13 +++++++++---- - 1 file changed, 9 insertions(+), 4 deletions(-) - -diff --git a/dma-helpers.c b/dma-helpers.c -index 2d7e02d35e..d3871dc61e 100644 ---- a/dma-helpers.c -+++ b/dma-helpers.c -@@ -90,6 +90,7 @@ static void reschedule_dma(void *opaque) - { - DMAAIOCB *dbs = (DMAAIOCB *)opaque; - -+ assert(!dbs->acb && dbs->bh); - qemu_bh_delete(dbs->bh); - dbs->bh = NULL; - dma_blk_cb(dbs, 0); -@@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret) - { - trace_dma_complete(dbs, ret, dbs->common.cb); - -+ assert(!dbs->acb && !dbs->bh); - dma_blk_unmap(dbs); - if (dbs->common.cb) { - dbs->common.cb(dbs->common.opaque, ret); - } - qemu_iovec_destroy(&dbs->iov); -- if (dbs->bh) { -- qemu_bh_delete(dbs->bh); -- dbs->bh = NULL; -- } - qemu_aio_unref(dbs); - } - -@@ -179,14 +177,21 @@ static void dma_aio_cancel(BlockAIOCB *acb) - - trace_dma_aio_cancel(dbs); - -+ assert(!(dbs->acb && dbs->bh)); - if (dbs->acb) { -+ /* This will invoke dma_blk_cb. */ - blk_aio_cancel_async(dbs->acb); -+ return; - } -+ - if (dbs->bh) { - cpu_unregister_map_client(dbs->bh); - qemu_bh_delete(dbs->bh); - dbs->bh = NULL; - } -+ if (dbs->common.cb) { -+ dbs->common.cb(dbs->common.opaque, -ECANCELED); -+ } - } - - static AioContext *dma_get_aio_context(BlockAIOCB *acb) --- -2.23.0 diff --git a/doc-Update-multi-thread-compression-doc.patch b/doc-Update-multi-thread-compression-doc.patch index 33ef835acaf040ed3a404b6f0314a9e5c4af0673..e1f1db086dbf9a31213839897a47546ce331db1d 100644 --- a/doc-Update-multi-thread-compression-doc.patch +++ b/doc-Update-multi-thread-compression-doc.patch @@ -1,10 +1,11 @@ -From 642df85795097017e9370a9721f702cbec50c173 Mon Sep 17 00:00:00 2001 -From: Zeyu Jin +From 55e5f8cafda3c7d4a91e9d58c7b3259476e0dab9 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng Date: Sat, 30 Jan 2021 16:36:47 +0800 Subject: [PATCH] doc: Update multi-thread compression doc Modify the doc to fit the previous changes. +Signed-off-by: Chuan Zheng Signed-off-by: Zeyu Jin Signed-off-by: Ying Fang --- @@ -12,7 +13,7 @@ Signed-off-by: Ying Fang 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/docs/multi-thread-compression.txt b/docs/multi-thread-compression.txt -index bb88c6bdf1..d429963cb0 100644 +index 95b1556f67..450e5de469 100644 --- a/docs/multi-thread-compression.txt +++ b/docs/multi-thread-compression.txt @@ -33,14 +33,15 @@ thread compression can be used to accelerate the compression process. @@ -46,15 +47,15 @@ index bb88c6bdf1..d429963cb0 100644 + {qemu} migrate_set_parameter compress_method zstd + +4. Set the compression thread count on source: - {qemu} migrate_set_parameter compress_threads 12 + {qemu} migrate_set_parameter compress-threads 12 -4. Set the compression level on the source: +5. Set the compression level on the source: - {qemu} migrate_set_parameter compress_level 1 + {qemu} migrate_set_parameter compress-level 1 -5. Set the decompression thread count on destination: +6. Set the decompression thread count on destination: - {qemu} migrate_set_parameter decompress_threads 3 + {qemu} migrate_set_parameter decompress-threads 3 -6. Start outgoing migration: +7. Start outgoing migration: @@ -62,9 +63,9 @@ index bb88c6bdf1..d429963cb0 100644 {qemu} info migrate Capabilities: ... compress: on @@ -136,6 +140,7 @@ The following are the default settings: - compress_threads: 8 - decompress_threads: 2 - compress_level: 1 (which means best speed) + compress-threads: 8 + decompress-threads: 2 + compress-level: 1 (which means best speed) + compress_method: zlib So, only the first two steps are required to use the multiple diff --git a/doc-update-AMD-SEV-to-include-Live-migration-flow.patch b/doc-update-AMD-SEV-to-include-Live-migration-flow.patch new file mode 100644 index 0000000000000000000000000000000000000000..122a797b677f056c8ce1d7f1aefd0eb1d672d306 --- /dev/null +++ b/doc-update-AMD-SEV-to-include-Live-migration-flow.patch @@ -0,0 +1,69 @@ +From 2da2e7ebea456360cc41881ff2e4a81a03b6d10c Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 7 May 2020 22:26:17 +0000 +Subject: [PATCH] doc: update AMD SEV to include Live migration flow + +cherry-picked from https://github.com/AMDESE/qemu/commit/0e2b3d80e3. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Brijesh Singh +Signed-off-by: Ashish Kalra +Signed-off-by: hanliyang +--- + docs/system/i386/amd-memory-encryption.rst | 40 +++++++++++++++++++++- + 1 file changed, 39 insertions(+), 1 deletion(-) + +diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst +index e9bc142bc1..b7e3f46ff6 100644 +--- a/docs/system/i386/amd-memory-encryption.rst ++++ b/docs/system/i386/amd-memory-encryption.rst +@@ -177,7 +177,45 @@ TODO + Live Migration + --------------- + +-TODO ++AMD SEV encrypts the memory of VMs and because a different key is used ++in each VM, the hypervisor will be unable to simply copy the ++ciphertext from one VM to another to migrate the VM. Instead the AMD SEV Key ++Management API provides sets of function which the hypervisor can use ++to package a guest page for migration, while maintaining the confidentiality ++provided by AMD SEV. ++ ++SEV guest VMs have the concept of private and shared memory. The private ++memory is encrypted with the guest-specific key, while shared memory may ++be encrypted with the hypervisor key. The migration APIs provided by the ++SEV API spec should be used for migrating the private pages. The ++KVM_GET_PAGE_ENC_BITMAP ioctl can be used to get the guest page encryption ++bitmap. The bitmap can be used to check if the given guest page is ++private or shared. ++ ++Before initiating the migration, we need to know the targets machine's public ++Diffie-Hellman key (PDH) and certificate chain. It can be retrieved ++with the 'query-sev-capabilities' QMP command or using the sev-tool. The ++migrate-set-parameter can be used to pass the target machine's PDH and ++certificate chain. ++ ++During the migration flow, the SEND_START is called on the source hypervisor ++to create an outgoing encryption context. The SEV guest policy dictates whether ++the certificate passed through the migrate-sev-set-info command will be ++validated. SEND_UPDATE_DATA is called to encrypt the guest private pages. ++After migration is completed, SEND_FINISH is called to destroy the encryption ++context and make the VM non-runnable to protect it against cloning. ++ ++On the target machine, RECEIVE_START is called first to create an ++incoming encryption context. The RECEIVE_UPDATE_DATA is called to copy ++the received encrypted page into guest memory. After migration has ++completed, RECEIVE_FINISH is called to make the VM runnable. ++ ++For more information about the migration see SEV API Appendix A ++Usage flow (Live migration section). ++ ++NOTE: ++To protect against the memory clone SEV APIs are designed to make the VM ++unrunnable in case of the migration failure. + + References + ---------- +-- +2.41.0.windows.1 + diff --git a/docs-Add-GNR-SRF-and-CWF-CPU-models.patch b/docs-Add-GNR-SRF-and-CWF-CPU-models.patch new file mode 100644 index 0000000000000000000000000000000000000000..f59f818c17d48eb158df6ea08b2be8b906111e30 --- /dev/null +++ b/docs-Add-GNR-SRF-and-CWF-CPU-models.patch @@ -0,0 +1,119 @@ +From 2753607e8768002debb4608dacafe1309420a4dd Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:50 +0800 +Subject: [PATCH] docs: Add GNR, SRF and CWF CPU models + +commit 0a6dec6d11e5e392dcd6299548bf1514f1201707 upstream. + +Update GraniteRapids, SierraForest and ClearwaterForest CPU models in +section "Preferred CPU models for Intel x86 hosts". + +Also introduce bhi-no, gds-no and rfds-no in doc. + +Intel-SIG: commit 0a6dec6d11e5 docs: Add GNR, SRF and CWF CPU models. + +Suggested-by: Zhao Liu +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-5-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + docs/system/cpu-models-x86.rst.inc | 50 +++++++++++++++++++++++++++--- + 1 file changed, 46 insertions(+), 4 deletions(-) + +diff --git a/docs/system/cpu-models-x86.rst.inc b/docs/system/cpu-models-x86.rst.inc +index 7f6368f999..37fe1d0ac8 100644 +--- a/docs/system/cpu-models-x86.rst.inc ++++ b/docs/system/cpu-models-x86.rst.inc +@@ -71,6 +71,16 @@ mixture of host CPU models between machines, if live migration + compatibility is required, use the newest CPU model that is compatible + across all desired hosts. + ++``ClearwaterForest`` ++ Intel Xeon Processor (ClearwaterForest, 2025) ++ ++``SierraForest``, ``SierraForest-v2`` ++ Intel Xeon Processor (SierraForest, 2024), SierraForest-v2 mitigates ++ the GDS and RFDS vulnerabilities with stepping 3. ++ ++``GraniteRapids``, ``GraniteRapids-v2`` ++ Intel Xeon Processor (GraniteRapids, 2024) ++ + ``Cascadelake-Server``, ``Cascadelake-Server-noTSX`` + Intel Xeon Processor (Cascade Lake, 2019), with "stepping" levels 6 + or 7 only. (The Cascade Lake Xeon processor with *stepping 5 is +@@ -181,7 +191,7 @@ features are included if using "Host passthrough" or "Host model". + CVE-2018-12127, [MSBDS] CVE-2018-12126). + + This is an MSR (Model-Specific Register) feature rather than a CPUID feature, +- so it will not appear in the Linux ``/proc/cpuinfo`` in the host or ++ therefore it will not appear in the Linux ``/proc/cpuinfo`` in the host or + guest. Instead, the host kernel uses it to populate the MDS + vulnerability file in ``sysfs``. + +@@ -189,10 +199,10 @@ features are included if using "Host passthrough" or "Host model". + affected} in the ``/sys/devices/system/cpu/vulnerabilities/mds`` file. + + ``taa-no`` +- Recommended to inform that the guest that the host is ``not`` ++ Recommended to inform the guest that the host is ``not`` + vulnerable to CVE-2019-11135, TSX Asynchronous Abort (TAA). + +- This too is an MSR feature, so it does not show up in the Linux ++ This is also an MSR feature, therefore it does not show up in the Linux + ``/proc/cpuinfo`` in the host or guest. + + It should only be enabled for VMs if the host reports ``Not affected`` +@@ -214,7 +224,7 @@ features are included if using "Host passthrough" or "Host model". + By disabling TSX, KVM-based guests can avoid paying the price of + mitigating TSX-based attacks. + +- Note that ``tsx-ctrl`` too is an MSR feature, so it does not show ++ Note that ``tsx-ctrl`` is also an MSR feature, therefore it does not show + up in the Linux ``/proc/cpuinfo`` in the host or guest. + + To validate that Intel TSX is indeed disabled for the guest, there are +@@ -223,6 +233,38 @@ features are included if using "Host passthrough" or "Host model". + ``/sys/devices/system/cpu/vulnerabilities/tsx_async_abort`` file in + the guest should report ``Mitigation: TSX disabled``. + ++``bhi-no`` ++ Recommended to inform the guest that the host is ``not`` ++ vulnerable to CVE-2022-0001, Branch History Injection (BHI). ++ ++ This is also an MSR feature, therefore it does not show up in the Linux ++ ``/proc/cpuinfo`` in the host or guest. ++ ++ It should only be enabled for VMs if the host reports ++ ``BHI: Not affected`` in the ++ ``/sys/devices/system/cpu/vulnerabilities/spectre_v2`` file. ++ ++``gds-no`` ++ Recommended to inform the guest that the host is ``not`` ++ vulnerable to CVE-2022-40982, Gather Data Sampling (GDS). ++ ++ This is also an MSR feature, therefore it does not show up in the Linux ++ ``/proc/cpuinfo`` in the host or guest. ++ ++ It should only be enabled for VMs if the host reports ``Not affected`` ++ in the ``/sys/devices/system/cpu/vulnerabilities/gather_data_sampling`` ++ file. ++ ++``rfds-no`` ++ Recommended to inform the guest that the host is ``not`` ++ vulnerable to CVE-2023-28746, Register File Data Sampling (RFDS). ++ ++ This is also an MSR feature, therefore it does not show up in the Linux ++ ``/proc/cpuinfo`` in the host or guest. ++ ++ It should only be enabled for VMs if the host reports ``Not affected`` ++ in the ``/sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling`` ++ file. + + Preferred CPU models for AMD x86 hosts + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +-- +2.41.0.windows.1 + diff --git a/docs-Add-generic-vhost-vdpa-device-documentation.patch b/docs-Add-generic-vhost-vdpa-device-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..3480791dfabf4f6641a57b423c8185e5b74c63da --- /dev/null +++ b/docs-Add-generic-vhost-vdpa-device-documentation.patch @@ -0,0 +1,78 @@ +From 28ed79b98f08b5701dcaab7c6ad1015602b28e02 Mon Sep 17 00:00:00 2001 +From: libai +Date: Sat, 12 Nov 2022 22:40:13 +0800 +Subject: [PATCH] docs: Add generic vhost-vdpa device documentation + +Add the description of the generic vhost-vdpa device + +Signed-off-by: libai +--- + docs/system/device-emulation.rst | 1 + + .../devices/vhost-vdpa-generic-device.rst | 46 +++++++++++++++++++ + 2 files changed, 47 insertions(+) + create mode 100644 docs/system/devices/vhost-vdpa-generic-device.rst + +diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst +index d1f3277cb0..e1b2d18fb1 100644 +--- a/docs/system/device-emulation.rst ++++ b/docs/system/device-emulation.rst +@@ -98,3 +98,4 @@ Emulated Devices + devices/canokey.rst + devices/usb-u2f.rst + devices/igb.rst ++ devices/vhost-vdpa-generic-device.rst +diff --git a/docs/system/devices/vhost-vdpa-generic-device.rst b/docs/system/devices/vhost-vdpa-generic-device.rst +new file mode 100644 +index 0000000000..25fbcac60e +--- /dev/null ++++ b/docs/system/devices/vhost-vdpa-generic-device.rst +@@ -0,0 +1,46 @@ ++ ++========================= ++vhost-vDPA generic device ++========================= ++ ++This document explains the usage of the vhost-vDPA generic device. ++ ++Description ++----------- ++ ++vDPA(virtio data path acceleration) device is a device that uses a datapath ++which complies with the virtio specifications with vendor specific control ++path. ++ ++QEMU provides two types of vhost-vDPA devices to enable the vDPA device, one ++is type sensitive which means QEMU needs to know the actual device type ++(e.g. net, blk, scsi) and another is called "vhost-vDPA generic device" which ++is type insensitive ++ ++The vhost-vDPA generic device builds on the vhost-vdpa subsystem and virtio ++subsystem. It is quite small, but it can support any type of virtio device. ++ ++Examples ++-------- ++ ++Prepare the vhost-vDPA backends first: ++ ++:: ++ host# ls -l /dev/vhost-vdpa-* ++ crw------- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 ++ ++Start QEMU with virtio-mmio bus: ++ ++:: ++ host# qemu-system \ ++ -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ ++ -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ ++ ... ++ ++Start QEMU with virtio-pci bus: ++ ++:: ++ host# qemu-system \ ++ -M pc -m 512 -smp 2 \ ++ -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ ++ ...\ +-- +2.27.0 + diff --git a/docs-devel-Add-VFIO-iommufd-backend-documentation.patch b/docs-devel-Add-VFIO-iommufd-backend-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..f15a1f8709b2e478e69212219d494196d9f2df66 --- /dev/null +++ b/docs-devel-Add-VFIO-iommufd-backend-documentation.patch @@ -0,0 +1,220 @@ +From fd1d6d64803a052adcab8c7993ca40cabc9c926d Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:53:03 +0800 +Subject: [PATCH] docs/devel: Add VFIO iommufd backend documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Suggested-by: Cédric Le Goater +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + MAINTAINERS | 1 + + docs/devel/index-internals.rst | 1 + + docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++ + 3 files changed, 168 insertions(+) + create mode 100644 docs/devel/vfio-iommufd.rst + +diff --git a/MAINTAINERS b/MAINTAINERS +index ca70bb4e64..0ddb20a35f 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2176,6 +2176,7 @@ F: backends/iommufd.c + F: include/sysemu/iommufd.h + F: include/qemu/chardev_open.h + F: util/chardev_open.c ++F: docs/devel/vfio-iommufd.rst + + vhost + M: Michael S. Tsirkin +diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst +index 6f81df92bc..3def4a138b 100644 +--- a/docs/devel/index-internals.rst ++++ b/docs/devel/index-internals.rst +@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them. + s390-dasd-ipl + tracing + vfio-migration ++ vfio-iommufd + writing-monitor-commands + virtio-backends +diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst +new file mode 100644 +index 0000000000..3d1c11f175 +--- /dev/null ++++ b/docs/devel/vfio-iommufd.rst +@@ -0,0 +1,166 @@ ++=============================== ++IOMMUFD BACKEND usage with VFIO ++=============================== ++ ++(Same meaning for backend/container/BE) ++ ++With the introduction of iommufd, the Linux kernel provides a generic ++interface for user space drivers to propagate their DMA mappings to kernel ++for assigned devices. While the legacy kernel interface is group-centric, ++the new iommufd interface is device-centric, relying on device fd and iommufd. ++ ++To support both interfaces in the QEMU VFIO device, introduce a base container ++to abstract the common part of VFIO legacy and iommufd container. So that the ++generic VFIO code can use either container. ++ ++The base container implements generic functions such as memory_listener and ++address space management whereas the derived container implements callbacks ++specific to either legacy or iommufd. Each container has its own way to setup ++secure context and dma management interface. The below diagram shows how it ++looks like with both containers. ++ ++:: ++ ++ VFIO AddressSpace/Memory ++ +-------+ +----------+ +-----+ +-----+ ++ | pci | | platform | | ap | | ccw | ++ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+ ++ | | | | | AddressSpace | ++ | | | | +------------+---------+ ++ +---V-----------V-----------V--------V----+ / ++ | VFIOAddressSpace | <------------+ ++ | | | MemoryListener ++ | VFIOContainerBase list | ++ +-------+----------------------------+----+ ++ | | ++ | | ++ +-------V------+ +--------V----------+ ++ | iommufd | | vfio legacy | ++ | container | | container | ++ +-------+------+ +--------+----------+ ++ | | ++ | /dev/iommu | /dev/vfio/vfio ++ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id ++ Userspace | | ++ ============+============================+=========================== ++ Kernel | device fd | ++ +---------------+ | group/container fd ++ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU) ++ | ATTACH_IOAS) | | device fd ++ | | | ++ | +-------V------------V-----------------+ ++ iommufd | | vfio | ++ (map/unmap | +---------+--------------------+-------+ ++ ioas_copy) | | | map/unmap ++ | | | ++ +------V------+ +-----V------+ +------V--------+ ++ | iommfd core | | device | | vfio iommu | ++ +-------------+ +------------+ +---------------+ ++ ++* Secure Context setup ++ ++ - iommufd BE: uses device fd and iommufd to setup secure context ++ (bind_iommufd, attach_ioas) ++ - vfio legacy BE: uses group fd and container fd to setup secure context ++ (set_container, set_iommu) ++ ++* Device access ++ ++ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX`` ++ - vfio legacy BE: device fd is retrieved from group fd ioctl ++ ++* DMA Mapping flow ++ ++ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener ++ 2. VFIO populates DMA map/unmap via the container BEs ++ * iommufd BE: uses iommufd ++ * vfio legacy BE: uses container fd ++ ++Example configuration ++===================== ++ ++Step 1: configure the host device ++--------------------------------- ++ ++It's exactly same as the VFIO device with legacy VFIO container. ++ ++Step 2: configure QEMU ++---------------------- ++ ++Interactions with the ``/dev/iommu`` are abstracted by a new iommufd ++object (compiled in with the ``CONFIG_IOMMUFD`` option). ++ ++Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must ++be linked with an iommufd object. It gets a new optional property ++named iommufd which allows to pass an iommufd object. Take ``vfio-pci`` ++device for example: ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0 ++ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 ++ ++Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a ++management layer. In such a case the fd is passed, the fd supports a ++string naming the fd or a number, for example: ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0,fd=22 ++ -device vfio-pci,iommufd=iommufd0,fd=23 ++ ++If the ``fd`` property is not passed, the fd is opened by QEMU. ++ ++If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd ++is not used and the user gets the behavior based on the legacy VFIO ++container: ++ ++.. code-block:: bash ++ ++ -device vfio-pci,host=0000:02:00.0 ++ ++Supported platform ++================== ++ ++Supports x86, ARM and s390x currently. ++ ++Caveats ++======= ++ ++Dirty page sync ++--------------- ++ ++Dirty page sync with iommufd backend is unsupported yet, live migration is ++disabled by default. But it can be force enabled like below, low efficient ++though. ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0 ++ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on ++ ++P2P DMA ++------- ++ ++PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI ++BAR region yet. Below warning shows for assigned PCI device, it's not a bug. ++ ++.. code-block:: none ++ ++ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR? ++ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address) ++ ++FD passing with mdev ++-------------------- ++ ++``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev. ++If FD passing is used, there is no way to know that and the mdev is treated ++like a real PCI device. There is an error as below if user wants to enable ++RAM discarding for mdev. ++ ++.. code-block:: none ++ ++ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices ++ ++``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend ++devices are always mdev and RAM discarding is force enabled. +-- +2.41.0.windows.1 + diff --git a/docs-interop-firmware.json-Add-arm-rme-firmware-feat.patch b/docs-interop-firmware.json-Add-arm-rme-firmware-feat.patch new file mode 100644 index 0000000000000000000000000000000000000000..abfcc38c6d405babce8f2187729dd8d5353590d7 --- /dev/null +++ b/docs-interop-firmware.json-Add-arm-rme-firmware-feat.patch @@ -0,0 +1,53 @@ +From e8055696aa1d0ee3fab298fb3605473f285c9cc6 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 16 Apr 2025 13:40:08 +0100 +Subject: [PATCH] docs/interop/firmware.json: Add arm-rme firmware feature + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b547ad23843a33030968a51e547d0e2ff875086b + +Some distributions provide packages continaing firmware to be run under +QEMU, such as "qemu-efi-aarch64" or "edk2-aarch64". Those packages also +contain descriptors in /usr/share/qemu/firmware/*.json listing the +firmware features, so that environments like libvirt can figure out +which firmware they can load. + +Define an optional feature for arm64 firmware to indicate that a +firmware supports running in a Realm. Firmware implementations need +extra support for running in a Realm, in particular to distinguish +shared from private guest memory. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + docs/interop/firmware.json +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + docs/interop/firmware.json | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json +index cc8f869186..08c2fbabe7 100644 +--- a/docs/interop/firmware.json ++++ b/docs/interop/firmware.json +@@ -127,6 +127,9 @@ + # options related to this feature are documented in + # "docs/system/i386/amd-memory-encryption.rst". + # ++# @arm-rme: The firmware supports running in a Realm, under the Arm Realm ++# Management Extension (RME). ++# + # @intel-tdx: The firmware supports running under Intel Trust Domain + # Extensions (TDX). + # +@@ -196,7 +199,7 @@ + { 'enum' : 'FirmwareFeature', + 'data' : [ 'acpi-s3', 'acpi-s4', + 'amd-sev', 'amd-sev-es', 'amd-sev-snp', +- 'intel-tdx', ++ 'arm-rme', 'intel-tdx', + 'enrolled-keys', 'requires-smm', 'secure-boot', + 'verbose-dynamic', 'verbose-static' ] } + +-- +2.33.0 + diff --git a/docs-migration-Convert-virtio.txt-into-rST.patch b/docs-migration-Convert-virtio.txt-into-rST.patch new file mode 100644 index 0000000000000000000000000000000000000000..a635eec6daa56111645862105e5cc2029e2379d8 --- /dev/null +++ b/docs-migration-Convert-virtio.txt-into-rST.patch @@ -0,0 +1,271 @@ +From 689a0e1d7e3fea78bc90ded9b17ccbf66b5e91ad Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:21 +0800 +Subject: [17/99] docs/migration: Convert virtio.txt into rST +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 4d7a691bcfeb5580e3f7457e1f1c2fbd64572161 upstream. + +Convert the plain old .txt into .rst, add it into migration/index.rst. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-4-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/index.rst | 1 + + docs/devel/migration/virtio.rst | 115 ++++++++++++++++++++++++++++++++ + docs/devel/migration/virtio.txt | 108 ------------------------------ + 3 files changed, 116 insertions(+), 108 deletions(-) + create mode 100644 docs/devel/migration/virtio.rst + delete mode 100644 docs/devel/migration/virtio.txt + +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 02cfdcc969..2cb701c77c 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -9,3 +9,4 @@ QEMU live migration works. + + main + vfio ++ virtio +diff --git a/docs/devel/migration/virtio.rst b/docs/devel/migration/virtio.rst +new file mode 100644 +index 0000000000..611a18b821 +--- /dev/null ++++ b/docs/devel/migration/virtio.rst +@@ -0,0 +1,115 @@ ++======================= ++Virtio device migration ++======================= ++ ++Copyright 2015 IBM Corp. ++ ++This work is licensed under the terms of the GNU GPL, version 2 or later. See ++the COPYING file in the top-level directory. ++ ++Saving and restoring the state of virtio devices is a bit of a twisty maze, ++for several reasons: ++ ++- state is distributed between several parts: ++ ++ - virtio core, for common fields like features, number of queues, ... ++ ++ - virtio transport (pci, ccw, ...), for the different proxy devices and ++ transport specific state (msix vectors, indicators, ...) ++ ++ - virtio device (net, blk, ...), for the different device types and their ++ state (mac address, request queue, ...) ++ ++- most fields are saved via the stream interface; subsequently, subsections ++ have been added to make cross-version migration possible ++ ++This file attempts to document the current procedure and point out some ++caveats. ++ ++Save state procedure ++==================== ++ ++:: ++ ++ virtio core virtio transport virtio device ++ ----------- ---------------- ------------- ++ ++ save() function registered ++ via VMState wrapper on ++ device class ++ virtio_save() <---------- ++ ------> save_config() ++ - save proxy device ++ - save transport-specific ++ device fields ++ - save common device ++ fields ++ - save common virtqueue ++ fields ++ ------> save_queue() ++ - save transport-specific ++ virtqueue fields ++ ------> save_device() ++ - save device-specific ++ fields ++ - save subsections ++ - device endianness, ++ if changed from ++ default endianness ++ - 64 bit features, if ++ any high feature bit ++ is set ++ - virtio-1 virtqueue ++ fields, if VERSION_1 ++ is set ++ ++Load state procedure ++==================== ++ ++:: ++ ++ virtio core virtio transport virtio device ++ ----------- ---------------- ------------- ++ ++ load() function registered ++ via VMState wrapper on ++ device class ++ virtio_load() <---------- ++ ------> load_config() ++ - load proxy device ++ - load transport-specific ++ device fields ++ - load common device ++ fields ++ - load common virtqueue ++ fields ++ ------> load_queue() ++ - load transport-specific ++ virtqueue fields ++ - notify guest ++ ------> load_device() ++ - load device-specific ++ fields ++ - load subsections ++ - device endianness ++ - 64 bit features ++ - virtio-1 virtqueue ++ fields ++ - sanitize endianness ++ - sanitize features ++ - virtqueue index sanity ++ check ++ - feature-dependent setup ++ ++Implications of this setup ++========================== ++ ++Devices need to be careful in their state processing during load: The ++load_device() procedure is invoked by the core before subsections have ++been loaded. Any code that depends on information transmitted in subsections ++therefore has to be invoked in the device's load() function _after_ ++virtio_load() returned (like e.g. code depending on features). ++ ++Any extension of the state being migrated should be done in subsections ++added to the core for compatibility reasons. If transport or device specific ++state is added, core needs to invoke a callback from the new subsection. +diff --git a/docs/devel/migration/virtio.txt b/docs/devel/migration/virtio.txt +deleted file mode 100644 +index 98a6b0ffb5..0000000000 +--- a/docs/devel/migration/virtio.txt ++++ /dev/null +@@ -1,108 +0,0 @@ +-Virtio devices and migration +-============================ +- +-Copyright 2015 IBM Corp. +- +-This work is licensed under the terms of the GNU GPL, version 2 or later. See +-the COPYING file in the top-level directory. +- +-Saving and restoring the state of virtio devices is a bit of a twisty maze, +-for several reasons: +-- state is distributed between several parts: +- - virtio core, for common fields like features, number of queues, ... +- - virtio transport (pci, ccw, ...), for the different proxy devices and +- transport specific state (msix vectors, indicators, ...) +- - virtio device (net, blk, ...), for the different device types and their +- state (mac address, request queue, ...) +-- most fields are saved via the stream interface; subsequently, subsections +- have been added to make cross-version migration possible +- +-This file attempts to document the current procedure and point out some +-caveats. +- +- +-Save state procedure +-==================== +- +-virtio core virtio transport virtio device +------------ ---------------- ------------- +- +- save() function registered +- via VMState wrapper on +- device class +-virtio_save() <---------- +- ------> save_config() +- - save proxy device +- - save transport-specific +- device fields +-- save common device +- fields +-- save common virtqueue +- fields +- ------> save_queue() +- - save transport-specific +- virtqueue fields +- ------> save_device() +- - save device-specific +- fields +-- save subsections +- - device endianness, +- if changed from +- default endianness +- - 64 bit features, if +- any high feature bit +- is set +- - virtio-1 virtqueue +- fields, if VERSION_1 +- is set +- +- +-Load state procedure +-==================== +- +-virtio core virtio transport virtio device +------------ ---------------- ------------- +- +- load() function registered +- via VMState wrapper on +- device class +-virtio_load() <---------- +- ------> load_config() +- - load proxy device +- - load transport-specific +- device fields +-- load common device +- fields +-- load common virtqueue +- fields +- ------> load_queue() +- - load transport-specific +- virtqueue fields +-- notify guest +- ------> load_device() +- - load device-specific +- fields +-- load subsections +- - device endianness +- - 64 bit features +- - virtio-1 virtqueue +- fields +-- sanitize endianness +-- sanitize features +-- virtqueue index sanity +- check +- - feature-dependent setup +- +- +-Implications of this setup +-========================== +- +-Devices need to be careful in their state processing during load: The +-load_device() procedure is invoked by the core before subsections have +-been loaded. Any code that depends on information transmitted in subsections +-therefore has to be invoked in the device's load() function _after_ +-virtio_load() returned (like e.g. code depending on features). +- +-Any extension of the state being migrated should be done in subsections +-added to the core for compatibility reasons. If transport or device specific +-state is added, core needs to invoke a callback from the new subsection. +-- +2.33.0 + diff --git a/docs-migration-Create-index-page.patch b/docs-migration-Create-index-page.patch new file mode 100644 index 0000000000000000000000000000000000000000..402e4f563dab1c1ffff80bbf7d2ba99750a9590b --- /dev/null +++ b/docs-migration-Create-index-page.patch @@ -0,0 +1,94 @@ +From d91782d895b71e416f66bc7e42797d50699839bb Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:20 +0800 +Subject: [16/99] docs/migration: Create index page +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit f6bbac985e6df492f2c6be94fb893ada75ffdefa upstream. + +Create an index page for migration module. Move VFIO migration there too. +A trivial touch-up on the title to use lower case there. + +Since then we'll have "migration" as the top title, make the main doc file +renamed to "migration framework". + +Cc: Alex Williamson +Cc: Cédric Le Goater +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-3-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/index-internals.rst | 3 +-- + docs/devel/migration/index.rst | 11 +++++++++++ + docs/devel/migration/main.rst | 6 +++--- + docs/devel/migration/vfio.rst | 2 +- + 4 files changed, 16 insertions(+), 6 deletions(-) + create mode 100644 docs/devel/migration/index.rst + +diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst +index a41d62c1eb..5636e9cf1d 100644 +--- a/docs/devel/index-internals.rst ++++ b/docs/devel/index-internals.rst +@@ -11,13 +11,12 @@ Details about QEMU's various subsystems including how to add features to them. + block-coroutine-wrapper + clocks + ebpf_rss +- migration/main ++ migration/index + multi-process + reset + s390-cpu-topology + s390-dasd-ipl + tracing +- vfio-migration + vfio-iommufd + writing-monitor-commands + virtio-backends +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +new file mode 100644 +index 0000000000..02cfdcc969 +--- /dev/null ++++ b/docs/devel/migration/index.rst +@@ -0,0 +1,11 @@ ++Migration ++========= ++ ++This is the main entry for QEMU migration documentations. It explains how ++QEMU live migration works. ++ ++.. toctree:: ++ :maxdepth: 2 ++ ++ main ++ vfio +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index ec55089b25..82cdb420bf 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -1,6 +1,6 @@ +-========= +-Migration +-========= ++=================== ++Migration framework ++=================== + + QEMU has code to load/save the state of the guest that it is running. + These are two complementary operations. Saving the state just does +diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst +index 605fe60e96..c49482eab6 100644 +--- a/docs/devel/migration/vfio.rst ++++ b/docs/devel/migration/vfio.rst +@@ -1,5 +1,5 @@ + ===================== +-VFIO device Migration ++VFIO device migration + ===================== + + Migration of virtual machine involves saving the state for each device that +-- +2.33.0 + diff --git a/docs-migration-Create-migration-directory.patch b/docs-migration-Create-migration-directory.patch new file mode 100644 index 0000000000000000000000000000000000000000..9734695f1cc86a60884025b4da8745f59fcbc58f --- /dev/null +++ b/docs-migration-Create-migration-directory.patch @@ -0,0 +1,65 @@ +From 830cfda7df1e63448c916492ce6be497511d6fb7 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:19 +0800 +Subject: [15/99] docs/migration: Create migration/ directory +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 8cb2f8b172e74a7279fabb5d5c20aee32b5b98cd upstream. + +Migration documentation is growing into a single file too large. Create a +sub-directory for it for a split. + +We also already have separate vfio/virtio documentations, move it all over +into the directory. + +Note that the virtio one is still not yet converted to rST. That is a job +for later. + +Cc: "Michael S. Tsirkin" +Cc: Jason Wang +Cc: Alex Williamson +Cc: Cédric Le Goater +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-2-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/index-internals.rst | 2 +- + docs/devel/{migration.rst => migration/main.rst} | 0 + docs/devel/{vfio-migration.rst => migration/vfio.rst} | 0 + docs/devel/{virtio-migration.txt => migration/virtio.txt} | 0 + 4 files changed, 1 insertion(+), 1 deletion(-) + rename docs/devel/{migration.rst => migration/main.rst} (100%) + rename docs/devel/{vfio-migration.rst => migration/vfio.rst} (100%) + rename docs/devel/{virtio-migration.txt => migration/virtio.txt} (100%) + +diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst +index 3def4a138b..a41d62c1eb 100644 +--- a/docs/devel/index-internals.rst ++++ b/docs/devel/index-internals.rst +@@ -11,7 +11,7 @@ Details about QEMU's various subsystems including how to add features to them. + block-coroutine-wrapper + clocks + ebpf_rss +- migration ++ migration/main + multi-process + reset + s390-cpu-topology +diff --git a/docs/devel/migration.rst b/docs/devel/migration/main.rst +similarity index 100% +rename from docs/devel/migration.rst +rename to docs/devel/migration/main.rst +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/migration/vfio.rst +similarity index 100% +rename from docs/devel/vfio-migration.rst +rename to docs/devel/migration/vfio.rst +diff --git a/docs/devel/virtio-migration.txt b/docs/devel/migration/virtio.txt +similarity index 100% +rename from docs/devel/virtio-migration.txt +rename to docs/devel/migration/virtio.txt +-- +2.33.0 + diff --git a/docs-migration-Further-move-vfio-to-be-feature-of-mi.patch b/docs-migration-Further-move-vfio-to-be-feature-of-mi.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffc17bf9b0abc571c9ec098bd0157e58f5f0603f --- /dev/null +++ b/docs-migration-Further-move-vfio-to-be-feature-of-mi.patch @@ -0,0 +1,47 @@ +From e9614f86ff43d0417ddaa3eab8be67c565e561b9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:27 +0800 +Subject: [23/99] docs/migration: Further move vfio to be feature of migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 66fd3b1a7ab02f7d8c84f92eba23e3ddc955204d upstream. + +Move it one layer down, so taking VFIO-migration as a feature for +migration. + +Cc: Alex Williamson +Cc: Cédric Le Goater +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-10-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/index.rst | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index e257d0d100..dea016f707 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -8,3 +8,4 @@ Migration has plenty of features to support different use cases. + + postcopy + dirty-limit ++ vfio +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 21ad58b189..b1357309e1 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -10,6 +10,5 @@ QEMU live migration works. + main + features + compatibility +- vfio + virtio + best-practices +-- +2.33.0 + diff --git a/docs-migration-Further-move-virtio-to-be-feature-of-.patch b/docs-migration-Further-move-virtio-to-be-feature-of-.patch new file mode 100644 index 0000000000000000000000000000000000000000..231fcf1c407800ef0a1b56df2bb4bb9d1c323852 --- /dev/null +++ b/docs-migration-Further-move-virtio-to-be-feature-of-.patch @@ -0,0 +1,47 @@ +From a8d5d9425ddec134a9e9c164a80b0bf1ba29381b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:28 +0800 +Subject: [24/99] docs/migration: Further move virtio to be feature of + migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit eb9f6daae49c06bb91e9660908587cc55265e43a upstream. + +Move it one layer down, so taking Virtio-migration as a feature for +migration. + +Cc: "Michael S. Tsirkin" +Cc: Jason Wang +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-11-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/index.rst | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index dea016f707..a9acaf618e 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -9,3 +9,4 @@ Migration has plenty of features to support different use cases. + postcopy + dirty-limit + vfio ++ virtio +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index b1357309e1..2aa294d631 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -10,5 +10,4 @@ QEMU live migration works. + main + features + compatibility +- virtio + best-practices +-- +2.33.0 + diff --git a/docs-migration-Organize-Postcopy-page.patch b/docs-migration-Organize-Postcopy-page.patch new file mode 100644 index 0000000000000000000000000000000000000000..eac0d909d6e9bb09e40c4fd438a0ed85f413c2c9 --- /dev/null +++ b/docs-migration-Organize-Postcopy-page.patch @@ -0,0 +1,229 @@ +From b15ee6a2f82aa810cfed0401d0843f33f5761d48 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:26 +0800 +Subject: [22/99] docs/migration: Organize "Postcopy" page +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 21b17cd011c959c3fd3fdad994389410a02df901 upstream. + +Reorganize the page, moving things around, and add a few +headlines ("Postcopy internals", "Postcopy features") to cover sub-areas. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-9-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/postcopy.rst | 159 ++++++++++++++++-------------- + 1 file changed, 84 insertions(+), 75 deletions(-) + +diff --git a/docs/devel/migration/postcopy.rst b/docs/devel/migration/postcopy.rst +index d60eec06ab..6c51e96d79 100644 +--- a/docs/devel/migration/postcopy.rst ++++ b/docs/devel/migration/postcopy.rst +@@ -1,6 +1,9 @@ ++======== + Postcopy + ======== + ++.. contents:: ++ + 'Postcopy' migration is a way to deal with migrations that refuse to converge + (or take too long to converge) its plus side is that there is an upper bound on + the amount of migration traffic and time it takes, the down side is that during +@@ -14,7 +17,7 @@ Postcopy can be combined with precopy (i.e. normal migration) so that if precopy + doesn't finish in a given time the switch is made to postcopy. + + Enabling postcopy +------------------ ++================= + + To enable postcopy, issue this command on the monitor (both source and + destination) prior to the start of migration: +@@ -49,8 +52,71 @@ time per vCPU. + ``migrate_set_parameter`` is ignored (to avoid delaying requested pages that + the destination is waiting for). + +-Postcopy device transfer +------------------------- ++Postcopy internals ++================== ++ ++State machine ++------------- ++ ++Postcopy moves through a series of states (see postcopy_state) from ++ADVISE->DISCARD->LISTEN->RUNNING->END ++ ++ - Advise ++ ++ Set at the start of migration if postcopy is enabled, even ++ if it hasn't had the start command; here the destination ++ checks that its OS has the support needed for postcopy, and performs ++ setup to ensure the RAM mappings are suitable for later postcopy. ++ The destination will fail early in migration at this point if the ++ required OS support is not present. ++ (Triggered by reception of POSTCOPY_ADVISE command) ++ ++ - Discard ++ ++ Entered on receipt of the first 'discard' command; prior to ++ the first Discard being performed, hugepages are switched off ++ (using madvise) to ensure that no new huge pages are created ++ during the postcopy phase, and to cause any huge pages that ++ have discards on them to be broken. ++ ++ - Listen ++ ++ The first command in the package, POSTCOPY_LISTEN, switches ++ the destination state to Listen, and starts a new thread ++ (the 'listen thread') which takes over the job of receiving ++ pages off the migration stream, while the main thread carries ++ on processing the blob. With this thread able to process page ++ reception, the destination now 'sensitises' the RAM to detect ++ any access to missing pages (on Linux using the 'userfault' ++ system). ++ ++ - Running ++ ++ POSTCOPY_RUN causes the destination to synchronise all ++ state and start the CPUs and IO devices running. The main ++ thread now finishes processing the migration package and ++ now carries on as it would for normal precopy migration ++ (although it can't do the cleanup it would do as it ++ finishes a normal migration). ++ ++ - Paused ++ ++ Postcopy can run into a paused state (normally on both sides when ++ happens), where all threads will be temporarily halted mostly due to ++ network errors. When reaching paused state, migration will make sure ++ the qemu binary on both sides maintain the data without corrupting ++ the VM. To continue the migration, the admin needs to fix the ++ migration channel using the QMP command 'migrate-recover' on the ++ destination node, then resume the migration using QMP command 'migrate' ++ again on source node, with resume=true flag set. ++ ++ - End ++ ++ The listen thread can now quit, and perform the cleanup of migration ++ state, the migration is now complete. ++ ++Device transfer ++--------------- + + Loading of device data may cause the device emulation to access guest RAM + that may trigger faults that have to be resolved by the source, as such +@@ -130,7 +196,20 @@ processing. + is no longer used by migration, while the listen thread carries on servicing + page data until the end of migration. + +-Postcopy Recovery ++Source side page bitmap ++----------------------- ++ ++The 'migration bitmap' in postcopy is basically the same as in the precopy, ++where each of the bit to indicate that page is 'dirty' - i.e. needs ++sending. During the precopy phase this is updated as the CPU dirties ++pages, however during postcopy the CPUs are stopped and nothing should ++dirty anything any more. Instead, dirty bits are cleared when the relevant ++pages are sent during postcopy. ++ ++Postcopy features ++================= ++ ++Postcopy recovery + ----------------- + + Comparing to precopy, postcopy is special on error handlings. When any +@@ -166,76 +245,6 @@ configurations of the guest. For example, when with async page fault + enabled, logically the guest can proactively schedule out the threads + accessing missing pages. + +-Postcopy states +---------------- +- +-Postcopy moves through a series of states (see postcopy_state) from +-ADVISE->DISCARD->LISTEN->RUNNING->END +- +- - Advise +- +- Set at the start of migration if postcopy is enabled, even +- if it hasn't had the start command; here the destination +- checks that its OS has the support needed for postcopy, and performs +- setup to ensure the RAM mappings are suitable for later postcopy. +- The destination will fail early in migration at this point if the +- required OS support is not present. +- (Triggered by reception of POSTCOPY_ADVISE command) +- +- - Discard +- +- Entered on receipt of the first 'discard' command; prior to +- the first Discard being performed, hugepages are switched off +- (using madvise) to ensure that no new huge pages are created +- during the postcopy phase, and to cause any huge pages that +- have discards on them to be broken. +- +- - Listen +- +- The first command in the package, POSTCOPY_LISTEN, switches +- the destination state to Listen, and starts a new thread +- (the 'listen thread') which takes over the job of receiving +- pages off the migration stream, while the main thread carries +- on processing the blob. With this thread able to process page +- reception, the destination now 'sensitises' the RAM to detect +- any access to missing pages (on Linux using the 'userfault' +- system). +- +- - Running +- +- POSTCOPY_RUN causes the destination to synchronise all +- state and start the CPUs and IO devices running. The main +- thread now finishes processing the migration package and +- now carries on as it would for normal precopy migration +- (although it can't do the cleanup it would do as it +- finishes a normal migration). +- +- - Paused +- +- Postcopy can run into a paused state (normally on both sides when +- happens), where all threads will be temporarily halted mostly due to +- network errors. When reaching paused state, migration will make sure +- the qemu binary on both sides maintain the data without corrupting +- the VM. To continue the migration, the admin needs to fix the +- migration channel using the QMP command 'migrate-recover' on the +- destination node, then resume the migration using QMP command 'migrate' +- again on source node, with resume=true flag set. +- +- - End +- +- The listen thread can now quit, and perform the cleanup of migration +- state, the migration is now complete. +- +-Source side page map +--------------------- +- +-The 'migration bitmap' in postcopy is basically the same as in the precopy, +-where each of the bit to indicate that page is 'dirty' - i.e. needs +-sending. During the precopy phase this is updated as the CPU dirties +-pages, however during postcopy the CPUs are stopped and nothing should +-dirty anything any more. Instead, dirty bits are cleared when the relevant +-pages are sent during postcopy. +- + Postcopy with hugepages + ----------------------- + +@@ -293,7 +302,7 @@ Retro-fitting postcopy to existing clients is possible: + guest memory access is made while holding a lock then all other + threads waiting for that lock will also be blocked. + +-Postcopy Preemption Mode ++Postcopy preemption mode + ------------------------ + + Postcopy preempt is a new capability introduced in 8.0 QEMU release, it +-- +2.33.0 + diff --git a/docs-migration-Split-Backwards-compatibility-separat.patch b/docs-migration-Split-Backwards-compatibility-separat.patch new file mode 100644 index 0000000000000000000000000000000000000000..e02fdadf8958cdc8427743e3653159ed179da8c2 --- /dev/null +++ b/docs-migration-Split-Backwards-compatibility-separat.patch @@ -0,0 +1,1088 @@ +From ed43780ea13b581be42a154890bdcc8e58919dd9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:22 +0800 +Subject: [18/99] docs/migration: Split "Backwards compatibility" separately +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 6cc6a7b98b88f1a7d1d5ed99db0d373a46606aac upstream. + +Split the section from main.rst into a separate file. Reference it in the +index.rst. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-5-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/compatibility.rst | 517 ++++++++++++++++++++++++ + docs/devel/migration/index.rst | 1 + + docs/devel/migration/main.rst | 519 ------------------------- + 3 files changed, 518 insertions(+), 519 deletions(-) + create mode 100644 docs/devel/migration/compatibility.rst + +diff --git a/docs/devel/migration/compatibility.rst b/docs/devel/migration/compatibility.rst +new file mode 100644 +index 0000000000..5a5417ef06 +--- /dev/null ++++ b/docs/devel/migration/compatibility.rst +@@ -0,0 +1,517 @@ ++Backwards compatibility ++======================= ++ ++How backwards compatibility works ++--------------------------------- ++ ++When we do migration, we have two QEMU processes: the source and the ++target. There are two cases, they are the same version or they are ++different versions. The easy case is when they are the same version. ++The difficult one is when they are different versions. ++ ++There are two things that are different, but they have very similar ++names and sometimes get confused: ++ ++- QEMU version ++- machine type version ++ ++Let's start with a practical example, we start with: ++ ++- qemu-system-x86_64 (v5.2), from now on qemu-5.2. ++- qemu-system-x86_64 (v5.1), from now on qemu-5.1. ++ ++Related to this are the "latest" machine types defined on each of ++them: ++ ++- pc-q35-5.2 (newer one in qemu-5.2) from now on pc-5.2 ++- pc-q35-5.1 (newer one in qemu-5.1) from now on pc-5.1 ++ ++First of all, migration is only supposed to work if you use the same ++machine type in both source and destination. The QEMU hardware ++configuration needs to be the same also on source and destination. ++Most aspects of the backend configuration can be changed at will, ++except for a few cases where the backend features influence frontend ++device feature exposure. But that is not relevant for this section. ++ ++I am going to list the number of combinations that we can have. Let's ++start with the trivial ones, QEMU is the same on source and ++destination: ++ ++1 - qemu-5.2 -M pc-5.2 -> migrates to -> qemu-5.2 -M pc-5.2 ++ ++ This is the latest QEMU with the latest machine type. ++ This have to work, and if it doesn't work it is a bug. ++ ++2 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 ++ ++ Exactly the same case than the previous one, but for 5.1. ++ Nothing to see here either. ++ ++This are the easiest ones, we will not talk more about them in this ++section. ++ ++Now we start with the more interesting cases. Consider the case where ++we have the same QEMU version in both sides (qemu-5.2) but we are using ++the latest machine type for that version (pc-5.2) but one of an older ++QEMU version, in this case pc-5.1. ++ ++3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ It needs to use the definition of pc-5.1 and the devices as they ++ were configured on 5.1, but this should be easy in the sense that ++ both sides are the same QEMU and both sides have exactly the same ++ idea of what the pc-5.1 machine is. ++ ++4 - qemu-5.1 -M pc-5.2 -> migrates to -> qemu-5.1 -M pc-5.2 ++ ++ This combination is not possible as the qemu-5.1 doesn't understand ++ pc-5.2 machine type. So nothing to worry here. ++ ++Now it comes the interesting ones, when both QEMU processes are ++different. Notice also that the machine type needs to be pc-5.1, ++because we have the limitation than qemu-5.1 doesn't know pc-5.2. So ++the possible cases are: ++ ++5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 ++ ++ This migration is known as newer to older. We need to make sure ++ when we are developing 5.2 we need to take care about not to break ++ migration to qemu-5.1. Notice that we can't make updates to ++ qemu-5.1 to understand whatever qemu-5.2 decides to change, so it is ++ in qemu-5.2 side to make the relevant changes. ++ ++6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ This migration is known as older to newer. We need to make sure ++ than we are able to receive migrations from qemu-5.1. The problem is ++ similar to the previous one. ++ ++If qemu-5.1 and qemu-5.2 were the same, there will not be any ++compatibility problems. But the reason that we create qemu-5.2 is to ++get new features, devices, defaults, etc. ++ ++If we get a device that has a new feature, or change a default value, ++we have a problem when we try to migrate between different QEMU ++versions. ++ ++So we need a way to tell qemu-5.2 that when we are using machine type ++pc-5.1, it needs to **not** use the feature, to be able to migrate to ++real qemu-5.1. ++ ++And the equivalent part when migrating from qemu-5.1 to qemu-5.2. ++qemu-5.2 has to expect that it is not going to get data for the new ++feature, because qemu-5.1 doesn't know about it. ++ ++How do we tell QEMU about these device feature changes? In ++hw/core/machine.c:hw_compat_X_Y arrays. ++ ++If we change a default value, we need to put back the old value on ++that array. And the device, during initialization needs to look at ++that array to see what value it needs to get for that feature. And ++what are we going to put in that array, the value of a property. ++ ++To create a property for a device, we need to use one of the ++DEFINE_PROP_*() macros. See include/hw/qdev-properties.h to find the ++macros that exist. With it, we set the default value for that ++property, and that is what it is going to get in the latest released ++version. But if we want a different value for a previous version, we ++can change that in the hw_compat_X_Y arrays. ++ ++hw_compat_X_Y is an array of registers that have the format: ++ ++- name_device ++- name_property ++- value ++ ++Let's see a practical example. ++ ++In qemu-5.2 virtio-blk-device got multi queue support. This is a ++change that is not backward compatible. In qemu-5.1 it has one ++queue. In qemu-5.2 it has the same number of queues as the number of ++cpus in the system. ++ ++When we are doing migration, if we migrate from a device that has 4 ++queues to a device that have only one queue, we don't know where to ++put the extra information for the other 3 queues, and we fail ++migration. ++ ++Similar problem when we migrate from qemu-5.1 that has only one queue ++to qemu-5.2, we only sent information for one queue, but destination ++has 4, and we have 3 queues that are not properly initialized and ++anything can happen. ++ ++So, how can we address this problem. Easy, just convince qemu-5.2 ++that when it is running pc-5.1, it needs to set the number of queues ++for virtio-blk-devices to 1. ++ ++That way we fix the cases 5 and 6. ++ ++5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 ++ ++ qemu-5.2 -M pc-5.1 sets number of queues to be 1. ++ qemu-5.1 -M pc-5.1 expects number of queues to be 1. ++ ++ correct. migration works. ++ ++6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ qemu-5.1 -M pc-5.1 sets number of queues to be 1. ++ qemu-5.2 -M pc-5.1 expects number of queues to be 1. ++ ++ correct. migration works. ++ ++And now the other interesting case, case 3. In this case we have: ++ ++3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ Here we have the same QEMU in both sides. So it doesn't matter a ++ lot if we have set the number of queues to 1 or not, because ++ they are the same. ++ ++ WRONG! ++ ++ Think what happens if we do one of this double migrations: ++ ++ A -> migrates -> B -> migrates -> C ++ ++ where: ++ ++ A: qemu-5.1 -M pc-5.1 ++ B: qemu-5.2 -M pc-5.1 ++ C: qemu-5.2 -M pc-5.1 ++ ++ migration A -> B is case 6, so number of queues needs to be 1. ++ ++ migration B -> C is case 3, so we don't care. But actually we ++ care because we haven't started the guest in qemu-5.2, it came ++ migrated from qemu-5.1. So to be in the safe place, we need to ++ always use number of queues 1 when we are using pc-5.1. ++ ++Now, how was this done in reality? The following commit shows how it ++was done:: ++ ++ commit 9445e1e15e66c19e42bea942ba810db28052cd05 ++ Author: Stefan Hajnoczi ++ Date: Tue Aug 18 15:33:47 2020 +0100 ++ ++ virtio-blk-pci: default num_queues to -smp N ++ ++The relevant parts for migration are:: ++ ++ @@ -1281,7 +1284,8 @@ static Property virtio_blk_properties[] = { ++ #endif ++ DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, ++ true), ++ - DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1), ++ + DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, ++ + VIRTIO_BLK_AUTO_NUM_QUEUES), ++ DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), ++ ++It changes the default value of num_queues. But it fishes it for old ++machine types to have the right value:: ++ ++ @@ -31,6 +31,7 @@ ++ GlobalProperty hw_compat_5_1[] = { ++ ... ++ + { "virtio-blk-device", "num-queues", "1"}, ++ ... ++ }; ++ ++A device with different features on both sides ++---------------------------------------------- ++ ++Let's assume that we are using the same QEMU binary on both sides, ++just to make the things easier. But we have a device that has ++different features on both sides of the migration. That can be ++because the devices are different, because the kernel driver of both ++devices have different features, whatever. ++ ++How can we get this to work with migration. The way to do that is ++"theoretically" easy. You have to get the features that the device ++has in the source of the migration. The features that the device has ++on the target of the migration, you get the intersection of the ++features of both sides, and that is the way that you should launch ++QEMU. ++ ++Notice that this is not completely related to QEMU. The most ++important thing here is that this should be handled by the managing ++application that launches QEMU. If QEMU is configured correctly, the ++migration will succeed. ++ ++That said, actually doing it is complicated. Almost all devices are ++bad at being able to be launched with only some features enabled. ++With one big exception: cpus. ++ ++You can read the documentation for QEMU x86 cpu models here: ++ ++https://qemu-project.gitlab.io/qemu/system/qemu-cpu-models.html ++ ++See when they talk about migration they recommend that one chooses the ++newest cpu model that is supported for all cpus. ++ ++Let's say that we have: ++ ++Host A: ++ ++Device X has the feature Y ++ ++Host B: ++ ++Device X has not the feature Y ++ ++If we try to migrate without any care from host A to host B, it will ++fail because when migration tries to load the feature Y on ++destination, it will find that the hardware is not there. ++ ++Doing this would be the equivalent of doing with cpus: ++ ++Host A: ++ ++$ qemu-system-x86_64 -cpu host ++ ++Host B: ++ ++$ qemu-system-x86_64 -cpu host ++ ++When both hosts have different cpu features this is guaranteed to ++fail. Especially if Host B has less features than host A. If host A ++has less features than host B, sometimes it works. Important word of ++last sentence is "sometimes". ++ ++So, forgetting about cpu models and continuing with the -cpu host ++example, let's see that the differences of the cpus is that Host A and ++B have the following features: ++ ++Features: 'pcid' 'stibp' 'taa-no' ++Host A: X X ++Host B: X ++ ++And we want to migrate between them, the way configure both QEMU cpu ++will be: ++ ++Host A: ++ ++$ qemu-system-x86_64 -cpu host,pcid=off,stibp=off ++ ++Host B: ++ ++$ qemu-system-x86_64 -cpu host,taa-no=off ++ ++And you would be able to migrate between them. It is responsibility ++of the management application or of the user to make sure that the ++configuration is correct. QEMU doesn't know how to look at this kind ++of features in general. ++ ++Notice that we don't recommend to use -cpu host for migration. It is ++used in this example because it makes the example simpler. ++ ++Other devices have worse control about individual features. If they ++want to be able to migrate between hosts that show different features, ++the device needs a way to configure which ones it is going to use. ++ ++In this section we have considered that we are using the same QEMU ++binary in both sides of the migration. If we use different QEMU ++versions process, then we need to have into account all other ++differences and the examples become even more complicated. ++ ++How to mitigate when we have a backward compatibility error ++----------------------------------------------------------- ++ ++We broke migration for old machine types continuously during ++development. But as soon as we find that there is a problem, we fix ++it. The problem is what happens when we detect after we have done a ++release that something has gone wrong. ++ ++Let see how it worked with one example. ++ ++After the release of qemu-8.0 we found a problem when doing migration ++of the machine type pc-7.2. ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++ This migration works ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++ ++ This migration works ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++ This migration fails ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++ ++ This migration fails ++ ++So clearly something fails when migration between qemu-7.2 and ++qemu-8.0 with machine type pc-7.2. The error messages, and git bisect ++pointed to this commit. ++ ++In qemu-8.0 we got this commit:: ++ ++ commit 010746ae1db7f52700cb2e2c46eb94f299cfa0d2 ++ Author: Jonathan Cameron ++ Date: Thu Mar 2 13:37:02 2023 +0000 ++ ++ hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register ++ ++ ++The relevant bits of the commit for our example are this ones:: ++ ++ --- a/hw/pci/pcie_aer.c ++ +++ b/hw/pci/pcie_aer.c ++ @@ -112,6 +112,10 @@ int pcie_aer_init(PCIDevice *dev, ++ ++ pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, ++ PCI_ERR_UNC_SUPPORTED); ++ + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_MASK_DEFAULT); ++ + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_SUPPORTED); ++ ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, ++ PCI_ERR_UNC_SEVERITY_DEFAULT); ++ ++The patch changes how we configure PCI space for AER. But QEMU fails ++when the PCI space configuration is different between source and ++destination. ++ ++The following commit shows how this got fixed:: ++ ++ commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f ++ Author: Leonardo Bras ++ Date: Tue May 2 21:27:02 2023 -0300 ++ ++ hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 ++ ++ [...] ++ ++The relevant parts of the fix in QEMU are as follow: ++ ++First, we create a new property for the device to be able to configure ++the old behaviour or the new behaviour:: ++ ++ diff --git a/hw/pci/pci.c b/hw/pci/pci.c ++ index 8a87ccc8b0..5153ad63d6 100644 ++ --- a/hw/pci/pci.c ++ +++ b/hw/pci/pci.c ++ @@ -79,6 +79,8 @@ static Property pci_props[] = { ++ DEFINE_PROP_STRING("failover_pair_id", PCIDevice, ++ failover_pair_id), ++ DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), ++ + DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, ++ + QEMU_PCIE_ERR_UNC_MASK_BITNR, true), ++ DEFINE_PROP_END_OF_LIST() ++ }; ++ ++Notice that we enable the feature for new machine types. ++ ++Now we see how the fix is done. This is going to depend on what kind ++of breakage happens, but in this case it is quite simple:: ++ ++ diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c ++ index 103667c368..374d593ead 100644 ++ --- a/hw/pci/pcie_aer.c ++ +++ b/hw/pci/pcie_aer.c ++ @@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, ++ uint16_t offset, ++ ++ pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, ++ PCI_ERR_UNC_SUPPORTED); ++ - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ - PCI_ERR_UNC_MASK_DEFAULT); ++ - pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ - PCI_ERR_UNC_SUPPORTED); ++ + ++ + if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { ++ + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_MASK_DEFAULT); ++ + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_SUPPORTED); ++ + } ++ ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, ++ PCI_ERR_UNC_SEVERITY_DEFAULT); ++ ++I.e. If the property bit is enabled, we configure it as we did for ++qemu-8.0. If the property bit is not set, we configure it as it was in 7.2. ++ ++And now, everything that is missing is disabling the feature for old ++machine types:: ++ ++ diff --git a/hw/core/machine.c b/hw/core/machine.c ++ index 47a34841a5..07f763eb2e 100644 ++ --- a/hw/core/machine.c ++ +++ b/hw/core/machine.c ++ @@ -48,6 +48,7 @@ GlobalProperty hw_compat_7_2[] = { ++ { "e1000e", "migrate-timadj", "off" }, ++ { "virtio-mem", "x-early-migration", "false" }, ++ { "migration", "x-preempt-pre-7-2", "true" }, ++ + { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, ++ }; ++ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); ++ ++And now, when qemu-8.0.1 is released with this fix, all combinations ++are going to work as supposed. ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) ++ ++So the normality has been restored and everything is ok, no? ++ ++Not really, now our matrix is much bigger. We started with the easy ++cases, migration from the same version to the same version always ++works: ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++ ++Now the interesting ones. When the QEMU processes versions are ++different. For the 1st set, their fail and we can do nothing, both ++versions are released and we can't change anything. ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++This two are the ones that work. The whole point of making the ++change in qemu-8.0.1 release was to fix this issue: ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++But now we found that qemu-8.0 neither can migrate to qemu-7.2 not ++qemu-8.0.1. ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++ ++So, if we start a pc-7.2 machine in qemu-8.0 we can't migrate it to ++anything except to qemu-8.0. ++ ++Can we do better? ++ ++Yeap. If we know that we are going to do this migration: ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++ ++We can launch the appropriate devices with:: ++ ++ --device...,x-pci-e-err-unc-mask=on ++ ++And now we can receive a migration from 8.0. And from now on, we can ++do that migration to new machine types if we remember to enable that ++property for pc-7.2. Notice that we need to remember, it is not ++enough to know that the source of the migration is qemu-8.0. Think of ++this example: ++ ++$ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 -> qemu-8.2 -M pc-7.2 ++ ++In the second migration, the source is not qemu-8.0, but we still have ++that "problem" and have that property enabled. Notice that we need to ++continue having this mark/property until we have this machine ++rebooted. But it is not a normal reboot (that don't reload QEMU) we ++need the machine to poweroff/poweron on a fixed QEMU. And from now ++on we can use the proper real machine. +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 2cb701c77c..7fc02b9520 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -8,5 +8,6 @@ QEMU live migration works. + :maxdepth: 2 + + main ++ compatibility + vfio + virtio +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 82cdb420bf..04194414af 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -993,522 +993,3 @@ In some cases it may be best to tie specific firmware versions to specific + versioned machine types to cut down on the combinations that will need + support. This is also useful when newer versions of firmware outgrow + the padding. +- +- +-Backwards compatibility +-======================= +- +-How backwards compatibility works +---------------------------------- +- +-When we do migration, we have two QEMU processes: the source and the +-target. There are two cases, they are the same version or they are +-different versions. The easy case is when they are the same version. +-The difficult one is when they are different versions. +- +-There are two things that are different, but they have very similar +-names and sometimes get confused: +- +-- QEMU version +-- machine type version +- +-Let's start with a practical example, we start with: +- +-- qemu-system-x86_64 (v5.2), from now on qemu-5.2. +-- qemu-system-x86_64 (v5.1), from now on qemu-5.1. +- +-Related to this are the "latest" machine types defined on each of +-them: +- +-- pc-q35-5.2 (newer one in qemu-5.2) from now on pc-5.2 +-- pc-q35-5.1 (newer one in qemu-5.1) from now on pc-5.1 +- +-First of all, migration is only supposed to work if you use the same +-machine type in both source and destination. The QEMU hardware +-configuration needs to be the same also on source and destination. +-Most aspects of the backend configuration can be changed at will, +-except for a few cases where the backend features influence frontend +-device feature exposure. But that is not relevant for this section. +- +-I am going to list the number of combinations that we can have. Let's +-start with the trivial ones, QEMU is the same on source and +-destination: +- +-1 - qemu-5.2 -M pc-5.2 -> migrates to -> qemu-5.2 -M pc-5.2 +- +- This is the latest QEMU with the latest machine type. +- This have to work, and if it doesn't work it is a bug. +- +-2 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 +- +- Exactly the same case than the previous one, but for 5.1. +- Nothing to see here either. +- +-This are the easiest ones, we will not talk more about them in this +-section. +- +-Now we start with the more interesting cases. Consider the case where +-we have the same QEMU version in both sides (qemu-5.2) but we are using +-the latest machine type for that version (pc-5.2) but one of an older +-QEMU version, in this case pc-5.1. +- +-3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- It needs to use the definition of pc-5.1 and the devices as they +- were configured on 5.1, but this should be easy in the sense that +- both sides are the same QEMU and both sides have exactly the same +- idea of what the pc-5.1 machine is. +- +-4 - qemu-5.1 -M pc-5.2 -> migrates to -> qemu-5.1 -M pc-5.2 +- +- This combination is not possible as the qemu-5.1 doesn't understand +- pc-5.2 machine type. So nothing to worry here. +- +-Now it comes the interesting ones, when both QEMU processes are +-different. Notice also that the machine type needs to be pc-5.1, +-because we have the limitation than qemu-5.1 doesn't know pc-5.2. So +-the possible cases are: +- +-5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 +- +- This migration is known as newer to older. We need to make sure +- when we are developing 5.2 we need to take care about not to break +- migration to qemu-5.1. Notice that we can't make updates to +- qemu-5.1 to understand whatever qemu-5.2 decides to change, so it is +- in qemu-5.2 side to make the relevant changes. +- +-6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- This migration is known as older to newer. We need to make sure +- than we are able to receive migrations from qemu-5.1. The problem is +- similar to the previous one. +- +-If qemu-5.1 and qemu-5.2 were the same, there will not be any +-compatibility problems. But the reason that we create qemu-5.2 is to +-get new features, devices, defaults, etc. +- +-If we get a device that has a new feature, or change a default value, +-we have a problem when we try to migrate between different QEMU +-versions. +- +-So we need a way to tell qemu-5.2 that when we are using machine type +-pc-5.1, it needs to **not** use the feature, to be able to migrate to +-real qemu-5.1. +- +-And the equivalent part when migrating from qemu-5.1 to qemu-5.2. +-qemu-5.2 has to expect that it is not going to get data for the new +-feature, because qemu-5.1 doesn't know about it. +- +-How do we tell QEMU about these device feature changes? In +-hw/core/machine.c:hw_compat_X_Y arrays. +- +-If we change a default value, we need to put back the old value on +-that array. And the device, during initialization needs to look at +-that array to see what value it needs to get for that feature. And +-what are we going to put in that array, the value of a property. +- +-To create a property for a device, we need to use one of the +-DEFINE_PROP_*() macros. See include/hw/qdev-properties.h to find the +-macros that exist. With it, we set the default value for that +-property, and that is what it is going to get in the latest released +-version. But if we want a different value for a previous version, we +-can change that in the hw_compat_X_Y arrays. +- +-hw_compat_X_Y is an array of registers that have the format: +- +-- name_device +-- name_property +-- value +- +-Let's see a practical example. +- +-In qemu-5.2 virtio-blk-device got multi queue support. This is a +-change that is not backward compatible. In qemu-5.1 it has one +-queue. In qemu-5.2 it has the same number of queues as the number of +-cpus in the system. +- +-When we are doing migration, if we migrate from a device that has 4 +-queues to a device that have only one queue, we don't know where to +-put the extra information for the other 3 queues, and we fail +-migration. +- +-Similar problem when we migrate from qemu-5.1 that has only one queue +-to qemu-5.2, we only sent information for one queue, but destination +-has 4, and we have 3 queues that are not properly initialized and +-anything can happen. +- +-So, how can we address this problem. Easy, just convince qemu-5.2 +-that when it is running pc-5.1, it needs to set the number of queues +-for virtio-blk-devices to 1. +- +-That way we fix the cases 5 and 6. +- +-5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 +- +- qemu-5.2 -M pc-5.1 sets number of queues to be 1. +- qemu-5.1 -M pc-5.1 expects number of queues to be 1. +- +- correct. migration works. +- +-6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- qemu-5.1 -M pc-5.1 sets number of queues to be 1. +- qemu-5.2 -M pc-5.1 expects number of queues to be 1. +- +- correct. migration works. +- +-And now the other interesting case, case 3. In this case we have: +- +-3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- Here we have the same QEMU in both sides. So it doesn't matter a +- lot if we have set the number of queues to 1 or not, because +- they are the same. +- +- WRONG! +- +- Think what happens if we do one of this double migrations: +- +- A -> migrates -> B -> migrates -> C +- +- where: +- +- A: qemu-5.1 -M pc-5.1 +- B: qemu-5.2 -M pc-5.1 +- C: qemu-5.2 -M pc-5.1 +- +- migration A -> B is case 6, so number of queues needs to be 1. +- +- migration B -> C is case 3, so we don't care. But actually we +- care because we haven't started the guest in qemu-5.2, it came +- migrated from qemu-5.1. So to be in the safe place, we need to +- always use number of queues 1 when we are using pc-5.1. +- +-Now, how was this done in reality? The following commit shows how it +-was done:: +- +- commit 9445e1e15e66c19e42bea942ba810db28052cd05 +- Author: Stefan Hajnoczi +- Date: Tue Aug 18 15:33:47 2020 +0100 +- +- virtio-blk-pci: default num_queues to -smp N +- +-The relevant parts for migration are:: +- +- @@ -1281,7 +1284,8 @@ static Property virtio_blk_properties[] = { +- #endif +- DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, +- true), +- - DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1), +- + DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, +- + VIRTIO_BLK_AUTO_NUM_QUEUES), +- DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), +- +-It changes the default value of num_queues. But it fishes it for old +-machine types to have the right value:: +- +- @@ -31,6 +31,7 @@ +- GlobalProperty hw_compat_5_1[] = { +- ... +- + { "virtio-blk-device", "num-queues", "1"}, +- ... +- }; +- +-A device with different features on both sides +----------------------------------------------- +- +-Let's assume that we are using the same QEMU binary on both sides, +-just to make the things easier. But we have a device that has +-different features on both sides of the migration. That can be +-because the devices are different, because the kernel driver of both +-devices have different features, whatever. +- +-How can we get this to work with migration. The way to do that is +-"theoretically" easy. You have to get the features that the device +-has in the source of the migration. The features that the device has +-on the target of the migration, you get the intersection of the +-features of both sides, and that is the way that you should launch +-QEMU. +- +-Notice that this is not completely related to QEMU. The most +-important thing here is that this should be handled by the managing +-application that launches QEMU. If QEMU is configured correctly, the +-migration will succeed. +- +-That said, actually doing it is complicated. Almost all devices are +-bad at being able to be launched with only some features enabled. +-With one big exception: cpus. +- +-You can read the documentation for QEMU x86 cpu models here: +- +-https://qemu-project.gitlab.io/qemu/system/qemu-cpu-models.html +- +-See when they talk about migration they recommend that one chooses the +-newest cpu model that is supported for all cpus. +- +-Let's say that we have: +- +-Host A: +- +-Device X has the feature Y +- +-Host B: +- +-Device X has not the feature Y +- +-If we try to migrate without any care from host A to host B, it will +-fail because when migration tries to load the feature Y on +-destination, it will find that the hardware is not there. +- +-Doing this would be the equivalent of doing with cpus: +- +-Host A: +- +-$ qemu-system-x86_64 -cpu host +- +-Host B: +- +-$ qemu-system-x86_64 -cpu host +- +-When both hosts have different cpu features this is guaranteed to +-fail. Especially if Host B has less features than host A. If host A +-has less features than host B, sometimes it works. Important word of +-last sentence is "sometimes". +- +-So, forgetting about cpu models and continuing with the -cpu host +-example, let's see that the differences of the cpus is that Host A and +-B have the following features: +- +-Features: 'pcid' 'stibp' 'taa-no' +-Host A: X X +-Host B: X +- +-And we want to migrate between them, the way configure both QEMU cpu +-will be: +- +-Host A: +- +-$ qemu-system-x86_64 -cpu host,pcid=off,stibp=off +- +-Host B: +- +-$ qemu-system-x86_64 -cpu host,taa-no=off +- +-And you would be able to migrate between them. It is responsibility +-of the management application or of the user to make sure that the +-configuration is correct. QEMU doesn't know how to look at this kind +-of features in general. +- +-Notice that we don't recommend to use -cpu host for migration. It is +-used in this example because it makes the example simpler. +- +-Other devices have worse control about individual features. If they +-want to be able to migrate between hosts that show different features, +-the device needs a way to configure which ones it is going to use. +- +-In this section we have considered that we are using the same QEMU +-binary in both sides of the migration. If we use different QEMU +-versions process, then we need to have into account all other +-differences and the examples become even more complicated. +- +-How to mitigate when we have a backward compatibility error +------------------------------------------------------------ +- +-We broke migration for old machine types continuously during +-development. But as soon as we find that there is a problem, we fix +-it. The problem is what happens when we detect after we have done a +-release that something has gone wrong. +- +-Let see how it worked with one example. +- +-After the release of qemu-8.0 we found a problem when doing migration +-of the machine type pc-7.2. +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +- This migration works +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +- +- This migration works +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +- This migration fails +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +- +- This migration fails +- +-So clearly something fails when migration between qemu-7.2 and +-qemu-8.0 with machine type pc-7.2. The error messages, and git bisect +-pointed to this commit. +- +-In qemu-8.0 we got this commit:: +- +- commit 010746ae1db7f52700cb2e2c46eb94f299cfa0d2 +- Author: Jonathan Cameron +- Date: Thu Mar 2 13:37:02 2023 +0000 +- +- hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register +- +- +-The relevant bits of the commit for our example are this ones:: +- +- --- a/hw/pci/pcie_aer.c +- +++ b/hw/pci/pcie_aer.c +- @@ -112,6 +112,10 @@ int pcie_aer_init(PCIDevice *dev, +- +- pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, +- PCI_ERR_UNC_SUPPORTED); +- + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_MASK_DEFAULT); +- + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_SUPPORTED); +- +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, +- PCI_ERR_UNC_SEVERITY_DEFAULT); +- +-The patch changes how we configure PCI space for AER. But QEMU fails +-when the PCI space configuration is different between source and +-destination. +- +-The following commit shows how this got fixed:: +- +- commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f +- Author: Leonardo Bras +- Date: Tue May 2 21:27:02 2023 -0300 +- +- hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 +- +- [...] +- +-The relevant parts of the fix in QEMU are as follow: +- +-First, we create a new property for the device to be able to configure +-the old behaviour or the new behaviour:: +- +- diff --git a/hw/pci/pci.c b/hw/pci/pci.c +- index 8a87ccc8b0..5153ad63d6 100644 +- --- a/hw/pci/pci.c +- +++ b/hw/pci/pci.c +- @@ -79,6 +79,8 @@ static Property pci_props[] = { +- DEFINE_PROP_STRING("failover_pair_id", PCIDevice, +- failover_pair_id), +- DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), +- + DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, +- + QEMU_PCIE_ERR_UNC_MASK_BITNR, true), +- DEFINE_PROP_END_OF_LIST() +- }; +- +-Notice that we enable the feature for new machine types. +- +-Now we see how the fix is done. This is going to depend on what kind +-of breakage happens, but in this case it is quite simple:: +- +- diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c +- index 103667c368..374d593ead 100644 +- --- a/hw/pci/pcie_aer.c +- +++ b/hw/pci/pcie_aer.c +- @@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, +- uint16_t offset, +- +- pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, +- PCI_ERR_UNC_SUPPORTED); +- - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- - PCI_ERR_UNC_MASK_DEFAULT); +- - pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- - PCI_ERR_UNC_SUPPORTED); +- + +- + if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { +- + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_MASK_DEFAULT); +- + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_SUPPORTED); +- + } +- +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, +- PCI_ERR_UNC_SEVERITY_DEFAULT); +- +-I.e. If the property bit is enabled, we configure it as we did for +-qemu-8.0. If the property bit is not set, we configure it as it was in 7.2. +- +-And now, everything that is missing is disabling the feature for old +-machine types:: +- +- diff --git a/hw/core/machine.c b/hw/core/machine.c +- index 47a34841a5..07f763eb2e 100644 +- --- a/hw/core/machine.c +- +++ b/hw/core/machine.c +- @@ -48,6 +48,7 @@ GlobalProperty hw_compat_7_2[] = { +- { "e1000e", "migrate-timadj", "off" }, +- { "virtio-mem", "x-early-migration", "false" }, +- { "migration", "x-preempt-pre-7-2", "true" }, +- + { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, +- }; +- const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); +- +-And now, when qemu-8.0.1 is released with this fix, all combinations +-are going to work as supposed. +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) +- +-So the normality has been restored and everything is ok, no? +- +-Not really, now our matrix is much bigger. We started with the easy +-cases, migration from the same version to the same version always +-works: +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +- +-Now the interesting ones. When the QEMU processes versions are +-different. For the 1st set, their fail and we can do nothing, both +-versions are released and we can't change anything. +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +-- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +-This two are the ones that work. The whole point of making the +-change in qemu-8.0.1 release was to fix this issue: +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +-But now we found that qemu-8.0 neither can migrate to qemu-7.2 not +-qemu-8.0.1. +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +- +-So, if we start a pc-7.2 machine in qemu-8.0 we can't migrate it to +-anything except to qemu-8.0. +- +-Can we do better? +- +-Yeap. If we know that we are going to do this migration: +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +- +-We can launch the appropriate devices with:: +- +- --device...,x-pci-e-err-unc-mask=on +- +-And now we can receive a migration from 8.0. And from now on, we can +-do that migration to new machine types if we remember to enable that +-property for pc-7.2. Notice that we need to remember, it is not +-enough to know that the source of the migration is qemu-8.0. Think of +-this example: +- +-$ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 -> qemu-8.2 -M pc-7.2 +- +-In the second migration, the source is not qemu-8.0, but we still have +-that "problem" and have that property enabled. Notice that we need to +-continue having this mark/property until we have this machine +-rebooted. But it is not a normal reboot (that don't reload QEMU) we +-need the machine to poweroff/poweron on a fixed QEMU. And from now +-on we can use the proper real machine. +-- +2.33.0 + diff --git a/docs-migration-Split-Debugging-and-Firmware.patch b/docs-migration-Split-Debugging-and-Firmware.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2c6db4a448f8323551f202346772c04ebd1a7ed --- /dev/null +++ b/docs-migration-Split-Debugging-and-Firmware.patch @@ -0,0 +1,149 @@ +From 4d6c041c7c43372921b96446d9731a4797468555 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:23 +0800 +Subject: [19/99] docs/migration: Split "Debugging" and "Firmware" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 774ad6b53b9449223115ffa8851eb93de92b0ce7 upstream. + +Move the two sections into a separate file called "best-practices.rst". +Add the entry into index. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-6-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/best-practices.rst | 48 +++++++++++++++++++++++++ + docs/devel/migration/index.rst | 1 + + docs/devel/migration/main.rst | 44 ----------------------- + 3 files changed, 49 insertions(+), 44 deletions(-) + create mode 100644 docs/devel/migration/best-practices.rst + +diff --git a/docs/devel/migration/best-practices.rst b/docs/devel/migration/best-practices.rst +new file mode 100644 +index 0000000000..d7c34a3014 +--- /dev/null ++++ b/docs/devel/migration/best-practices.rst +@@ -0,0 +1,48 @@ ++============== ++Best practices ++============== ++ ++Debugging ++========= ++ ++The migration stream can be analyzed thanks to ``scripts/analyze-migration.py``. ++ ++Example usage: ++ ++.. code-block:: shell ++ ++ $ qemu-system-x86_64 -display none -monitor stdio ++ (qemu) migrate "exec:cat > mig" ++ (qemu) q ++ $ ./scripts/analyze-migration.py -f mig ++ { ++ "ram (3)": { ++ "section sizes": { ++ "pc.ram": "0x0000000008000000", ++ ... ++ ++See also ``analyze-migration.py -h`` help for more options. ++ ++Firmware ++======== ++ ++Migration migrates the copies of RAM and ROM, and thus when running ++on the destination it includes the firmware from the source. Even after ++resetting a VM, the old firmware is used. Only once QEMU has been restarted ++is the new firmware in use. ++ ++- Changes in firmware size can cause changes in the required RAMBlock size ++ to hold the firmware and thus migration can fail. In practice it's best ++ to pad firmware images to convenient powers of 2 with plenty of space ++ for growth. ++ ++- Care should be taken with device emulation code so that newer ++ emulation code can work with older firmware to allow forward migration. ++ ++- Care should be taken with newer firmware so that backward migration ++ to older systems with older device emulation code will work. ++ ++In some cases it may be best to tie specific firmware versions to specific ++versioned machine types to cut down on the combinations that will need ++support. This is also useful when newer versions of firmware outgrow ++the padding. +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 7fc02b9520..9a8fd1ead7 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -11,3 +11,4 @@ QEMU live migration works. + compatibility + vfio + virtio ++ best-practices +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 04194414af..7ca3b4dd3f 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -52,27 +52,6 @@ All these migration protocols use the same infrastructure to + save/restore state devices. This infrastructure is shared with the + savevm/loadvm functionality. + +-Debugging +-========= +- +-The migration stream can be analyzed thanks to ``scripts/analyze-migration.py``. +- +-Example usage: +- +-.. code-block:: shell +- +- $ qemu-system-x86_64 -display none -monitor stdio +- (qemu) migrate "exec:cat > mig" +- (qemu) q +- $ ./scripts/analyze-migration.py -f mig +- { +- "ram (3)": { +- "section sizes": { +- "pc.ram": "0x0000000008000000", +- ... +- +-See also ``analyze-migration.py -h`` help for more options. +- + Common infrastructure + ===================== + +@@ -970,26 +949,3 @@ the background migration channel. Anyone who cares about latencies of page + faults during a postcopy migration should enable this feature. By default, + it's not enabled. + +-Firmware +-======== +- +-Migration migrates the copies of RAM and ROM, and thus when running +-on the destination it includes the firmware from the source. Even after +-resetting a VM, the old firmware is used. Only once QEMU has been restarted +-is the new firmware in use. +- +-- Changes in firmware size can cause changes in the required RAMBlock size +- to hold the firmware and thus migration can fail. In practice it's best +- to pad firmware images to convenient powers of 2 with plenty of space +- for growth. +- +-- Care should be taken with device emulation code so that newer +- emulation code can work with older firmware to allow forward migration. +- +-- Care should be taken with newer firmware so that backward migration +- to older systems with older device emulation code will work. +- +-In some cases it may be best to tie specific firmware versions to specific +-versioned machine types to cut down on the combinations that will need +-support. This is also useful when newer versions of firmware outgrow +-the padding. +-- +2.33.0 + diff --git a/docs-migration-Split-Postcopy.patch b/docs-migration-Split-Postcopy.patch new file mode 100644 index 0000000000000000000000000000000000000000..a5f99baf0744bad63c5afb74545aea95b134692f --- /dev/null +++ b/docs-migration-Split-Postcopy.patch @@ -0,0 +1,679 @@ +From f335519e759500adc05157fc0399335a3646461d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:24 +0800 +Subject: [20/99] docs/migration: Split "Postcopy" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit bfb4c7cd99f1c39dedf33381954d03b9f8f244ec upstream. + +Split postcopy into a separate file. Introduce a head page "features.rst" +to keep all the features on top of migration framework. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-7-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 9 + + docs/devel/migration/index.rst | 1 + + docs/devel/migration/main.rst | 305 ------------------------------ + docs/devel/migration/postcopy.rst | 304 +++++++++++++++++++++++++++++ + 4 files changed, 314 insertions(+), 305 deletions(-) + create mode 100644 docs/devel/migration/features.rst + create mode 100644 docs/devel/migration/postcopy.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +new file mode 100644 +index 0000000000..0054e0c900 +--- /dev/null ++++ b/docs/devel/migration/features.rst +@@ -0,0 +1,9 @@ ++Migration features ++================== ++ ++Migration has plenty of features to support different use cases. ++ ++.. toctree:: ++ :maxdepth: 2 ++ ++ postcopy +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 9a8fd1ead7..21ad58b189 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -8,6 +8,7 @@ QEMU live migration works. + :maxdepth: 2 + + main ++ features + compatibility + vfio + virtio +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 7ca3b4dd3f..1e98e9e40c 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -644,308 +644,3 @@ algorithm will restrict virtual CPUs as needed to keep their dirty page + rate inside the limit. This leads to more steady reading performance during + live migration and can aid in improving large guest responsiveness. + +-Postcopy +-======== +- +-'Postcopy' migration is a way to deal with migrations that refuse to converge +-(or take too long to converge) its plus side is that there is an upper bound on +-the amount of migration traffic and time it takes, the down side is that during +-the postcopy phase, a failure of *either* side causes the guest to be lost. +- +-In postcopy the destination CPUs are started before all the memory has been +-transferred, and accesses to pages that are yet to be transferred cause +-a fault that's translated by QEMU into a request to the source QEMU. +- +-Postcopy can be combined with precopy (i.e. normal migration) so that if precopy +-doesn't finish in a given time the switch is made to postcopy. +- +-Enabling postcopy +------------------ +- +-To enable postcopy, issue this command on the monitor (both source and +-destination) prior to the start of migration: +- +-``migrate_set_capability postcopy-ram on`` +- +-The normal commands are then used to start a migration, which is still +-started in precopy mode. Issuing: +- +-``migrate_start_postcopy`` +- +-will now cause the transition from precopy to postcopy. +-It can be issued immediately after migration is started or any +-time later on. Issuing it after the end of a migration is harmless. +- +-Blocktime is a postcopy live migration metric, intended to show how +-long the vCPU was in state of interruptible sleep due to pagefault. +-That metric is calculated both for all vCPUs as overlapped value, and +-separately for each vCPU. These values are calculated on destination +-side. To enable postcopy blocktime calculation, enter following +-command on destination monitor: +- +-``migrate_set_capability postcopy-blocktime on`` +- +-Postcopy blocktime can be retrieved by query-migrate qmp command. +-postcopy-blocktime value of qmp command will show overlapped blocking +-time for all vCPU, postcopy-vcpu-blocktime will show list of blocking +-time per vCPU. +- +-.. note:: +- During the postcopy phase, the bandwidth limits set using +- ``migrate_set_parameter`` is ignored (to avoid delaying requested pages that +- the destination is waiting for). +- +-Postcopy device transfer +------------------------- +- +-Loading of device data may cause the device emulation to access guest RAM +-that may trigger faults that have to be resolved by the source, as such +-the migration stream has to be able to respond with page data *during* the +-device load, and hence the device data has to be read from the stream completely +-before the device load begins to free the stream up. This is achieved by +-'packaging' the device data into a blob that's read in one go. +- +-Source behaviour +----------------- +- +-Until postcopy is entered the migration stream is identical to normal +-precopy, except for the addition of a 'postcopy advise' command at +-the beginning, to tell the destination that postcopy might happen. +-When postcopy starts the source sends the page discard data and then +-forms the 'package' containing: +- +- - Command: 'postcopy listen' +- - The device state +- +- A series of sections, identical to the precopy streams device state stream +- containing everything except postcopiable devices (i.e. RAM) +- - Command: 'postcopy run' +- +-The 'package' is sent as the data part of a Command: ``CMD_PACKAGED``, and the +-contents are formatted in the same way as the main migration stream. +- +-During postcopy the source scans the list of dirty pages and sends them +-to the destination without being requested (in much the same way as precopy), +-however when a page request is received from the destination, the dirty page +-scanning restarts from the requested location. This causes requested pages +-to be sent quickly, and also causes pages directly after the requested page +-to be sent quickly in the hope that those pages are likely to be used +-by the destination soon. +- +-Destination behaviour +---------------------- +- +-Initially the destination looks the same as precopy, with a single thread +-reading the migration stream; the 'postcopy advise' and 'discard' commands +-are processed to change the way RAM is managed, but don't affect the stream +-processing. +- +-:: +- +- ------------------------------------------------------------------------------ +- 1 2 3 4 5 6 7 +- main -----DISCARD-CMD_PACKAGED ( LISTEN DEVICE DEVICE DEVICE RUN ) +- thread | | +- | (page request) +- | \___ +- v \ +- listen thread: --- page -- page -- page -- page -- page -- +- +- a b c +- ------------------------------------------------------------------------------ +- +-- On receipt of ``CMD_PACKAGED`` (1) +- +- All the data associated with the package - the ( ... ) section in the diagram - +- is read into memory, and the main thread recurses into qemu_loadvm_state_main +- to process the contents of the package (2) which contains commands (3,6) and +- devices (4...) +- +-- On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package) +- +- a new thread (a) is started that takes over servicing the migration stream, +- while the main thread carries on loading the package. It loads normal +- background page data (b) but if during a device load a fault happens (5) +- the returned page (c) is loaded by the listen thread allowing the main +- threads device load to carry on. +- +-- The last thing in the ``CMD_PACKAGED`` is a 'RUN' command (6) +- +- letting the destination CPUs start running. At the end of the +- ``CMD_PACKAGED`` (7) the main thread returns to normal running behaviour and +- is no longer used by migration, while the listen thread carries on servicing +- page data until the end of migration. +- +-Postcopy Recovery +------------------ +- +-Comparing to precopy, postcopy is special on error handlings. When any +-error happens (in this case, mostly network errors), QEMU cannot easily +-fail a migration because VM data resides in both source and destination +-QEMU instances. On the other hand, when issue happens QEMU on both sides +-will go into a paused state. It'll need a recovery phase to continue a +-paused postcopy migration. +- +-The recovery phase normally contains a few steps: +- +- - When network issue occurs, both QEMU will go into PAUSED state +- +- - When the network is recovered (or a new network is provided), the admin +- can setup the new channel for migration using QMP command +- 'migrate-recover' on destination node, preparing for a resume. +- +- - On source host, the admin can continue the interrupted postcopy +- migration using QMP command 'migrate' with resume=true flag set. +- +- - After the connection is re-established, QEMU will continue the postcopy +- migration on both sides. +- +-During a paused postcopy migration, the VM can logically still continue +-running, and it will not be impacted from any page access to pages that +-were already migrated to destination VM before the interruption happens. +-However, if any of the missing pages got accessed on destination VM, the VM +-thread will be halted waiting for the page to be migrated, it means it can +-be halted until the recovery is complete. +- +-The impact of accessing missing pages can be relevant to different +-configurations of the guest. For example, when with async page fault +-enabled, logically the guest can proactively schedule out the threads +-accessing missing pages. +- +-Postcopy states +---------------- +- +-Postcopy moves through a series of states (see postcopy_state) from +-ADVISE->DISCARD->LISTEN->RUNNING->END +- +- - Advise +- +- Set at the start of migration if postcopy is enabled, even +- if it hasn't had the start command; here the destination +- checks that its OS has the support needed for postcopy, and performs +- setup to ensure the RAM mappings are suitable for later postcopy. +- The destination will fail early in migration at this point if the +- required OS support is not present. +- (Triggered by reception of POSTCOPY_ADVISE command) +- +- - Discard +- +- Entered on receipt of the first 'discard' command; prior to +- the first Discard being performed, hugepages are switched off +- (using madvise) to ensure that no new huge pages are created +- during the postcopy phase, and to cause any huge pages that +- have discards on them to be broken. +- +- - Listen +- +- The first command in the package, POSTCOPY_LISTEN, switches +- the destination state to Listen, and starts a new thread +- (the 'listen thread') which takes over the job of receiving +- pages off the migration stream, while the main thread carries +- on processing the blob. With this thread able to process page +- reception, the destination now 'sensitises' the RAM to detect +- any access to missing pages (on Linux using the 'userfault' +- system). +- +- - Running +- +- POSTCOPY_RUN causes the destination to synchronise all +- state and start the CPUs and IO devices running. The main +- thread now finishes processing the migration package and +- now carries on as it would for normal precopy migration +- (although it can't do the cleanup it would do as it +- finishes a normal migration). +- +- - Paused +- +- Postcopy can run into a paused state (normally on both sides when +- happens), where all threads will be temporarily halted mostly due to +- network errors. When reaching paused state, migration will make sure +- the qemu binary on both sides maintain the data without corrupting +- the VM. To continue the migration, the admin needs to fix the +- migration channel using the QMP command 'migrate-recover' on the +- destination node, then resume the migration using QMP command 'migrate' +- again on source node, with resume=true flag set. +- +- - End +- +- The listen thread can now quit, and perform the cleanup of migration +- state, the migration is now complete. +- +-Source side page map +--------------------- +- +-The 'migration bitmap' in postcopy is basically the same as in the precopy, +-where each of the bit to indicate that page is 'dirty' - i.e. needs +-sending. During the precopy phase this is updated as the CPU dirties +-pages, however during postcopy the CPUs are stopped and nothing should +-dirty anything any more. Instead, dirty bits are cleared when the relevant +-pages are sent during postcopy. +- +-Postcopy with hugepages +------------------------ +- +-Postcopy now works with hugetlbfs backed memory: +- +- a) The linux kernel on the destination must support userfault on hugepages. +- b) The huge-page configuration on the source and destination VMs must be +- identical; i.e. RAMBlocks on both sides must use the same page size. +- c) Note that ``-mem-path /dev/hugepages`` will fall back to allocating normal +- RAM if it doesn't have enough hugepages, triggering (b) to fail. +- Using ``-mem-prealloc`` enforces the allocation using hugepages. +- d) Care should be taken with the size of hugepage used; postcopy with 2MB +- hugepages works well, however 1GB hugepages are likely to be problematic +- since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, +- and until the full page is transferred the destination thread is blocked. +- +-Postcopy with shared memory +---------------------------- +- +-Postcopy migration with shared memory needs explicit support from the other +-processes that share memory and from QEMU. There are restrictions on the type of +-memory that userfault can support shared. +- +-The Linux kernel userfault support works on ``/dev/shm`` memory and on ``hugetlbfs`` +-(although the kernel doesn't provide an equivalent to ``madvise(MADV_DONTNEED)`` +-for hugetlbfs which may be a problem in some configurations). +- +-The vhost-user code in QEMU supports clients that have Postcopy support, +-and the ``vhost-user-bridge`` (in ``tests/``) and the DPDK package have changes +-to support postcopy. +- +-The client needs to open a userfaultfd and register the areas +-of memory that it maps with userfault. The client must then pass the +-userfaultfd back to QEMU together with a mapping table that allows +-fault addresses in the clients address space to be converted back to +-RAMBlock/offsets. The client's userfaultfd is added to the postcopy +-fault-thread and page requests are made on behalf of the client by QEMU. +-QEMU performs 'wake' operations on the client's userfaultfd to allow it +-to continue after a page has arrived. +- +-.. note:: +- There are two future improvements that would be nice: +- a) Some way to make QEMU ignorant of the addresses in the clients +- address space +- b) Avoiding the need for QEMU to perform ufd-wake calls after the +- pages have arrived +- +-Retro-fitting postcopy to existing clients is possible: +- a) A mechanism is needed for the registration with userfault as above, +- and the registration needs to be coordinated with the phases of +- postcopy. In vhost-user extra messages are added to the existing +- control channel. +- b) Any thread that can block due to guest memory accesses must be +- identified and the implication understood; for example if the +- guest memory access is made while holding a lock then all other +- threads waiting for that lock will also be blocked. +- +-Postcopy Preemption Mode +------------------------- +- +-Postcopy preempt is a new capability introduced in 8.0 QEMU release, it +-allows urgent pages (those got page fault requested from destination QEMU +-explicitly) to be sent in a separate preempt channel, rather than queued in +-the background migration channel. Anyone who cares about latencies of page +-faults during a postcopy migration should enable this feature. By default, +-it's not enabled. +- +diff --git a/docs/devel/migration/postcopy.rst b/docs/devel/migration/postcopy.rst +new file mode 100644 +index 0000000000..d60eec06ab +--- /dev/null ++++ b/docs/devel/migration/postcopy.rst +@@ -0,0 +1,304 @@ ++Postcopy ++======== ++ ++'Postcopy' migration is a way to deal with migrations that refuse to converge ++(or take too long to converge) its plus side is that there is an upper bound on ++the amount of migration traffic and time it takes, the down side is that during ++the postcopy phase, a failure of *either* side causes the guest to be lost. ++ ++In postcopy the destination CPUs are started before all the memory has been ++transferred, and accesses to pages that are yet to be transferred cause ++a fault that's translated by QEMU into a request to the source QEMU. ++ ++Postcopy can be combined with precopy (i.e. normal migration) so that if precopy ++doesn't finish in a given time the switch is made to postcopy. ++ ++Enabling postcopy ++----------------- ++ ++To enable postcopy, issue this command on the monitor (both source and ++destination) prior to the start of migration: ++ ++``migrate_set_capability postcopy-ram on`` ++ ++The normal commands are then used to start a migration, which is still ++started in precopy mode. Issuing: ++ ++``migrate_start_postcopy`` ++ ++will now cause the transition from precopy to postcopy. ++It can be issued immediately after migration is started or any ++time later on. Issuing it after the end of a migration is harmless. ++ ++Blocktime is a postcopy live migration metric, intended to show how ++long the vCPU was in state of interruptible sleep due to pagefault. ++That metric is calculated both for all vCPUs as overlapped value, and ++separately for each vCPU. These values are calculated on destination ++side. To enable postcopy blocktime calculation, enter following ++command on destination monitor: ++ ++``migrate_set_capability postcopy-blocktime on`` ++ ++Postcopy blocktime can be retrieved by query-migrate qmp command. ++postcopy-blocktime value of qmp command will show overlapped blocking ++time for all vCPU, postcopy-vcpu-blocktime will show list of blocking ++time per vCPU. ++ ++.. note:: ++ During the postcopy phase, the bandwidth limits set using ++ ``migrate_set_parameter`` is ignored (to avoid delaying requested pages that ++ the destination is waiting for). ++ ++Postcopy device transfer ++------------------------ ++ ++Loading of device data may cause the device emulation to access guest RAM ++that may trigger faults that have to be resolved by the source, as such ++the migration stream has to be able to respond with page data *during* the ++device load, and hence the device data has to be read from the stream completely ++before the device load begins to free the stream up. This is achieved by ++'packaging' the device data into a blob that's read in one go. ++ ++Source behaviour ++---------------- ++ ++Until postcopy is entered the migration stream is identical to normal ++precopy, except for the addition of a 'postcopy advise' command at ++the beginning, to tell the destination that postcopy might happen. ++When postcopy starts the source sends the page discard data and then ++forms the 'package' containing: ++ ++ - Command: 'postcopy listen' ++ - The device state ++ ++ A series of sections, identical to the precopy streams device state stream ++ containing everything except postcopiable devices (i.e. RAM) ++ - Command: 'postcopy run' ++ ++The 'package' is sent as the data part of a Command: ``CMD_PACKAGED``, and the ++contents are formatted in the same way as the main migration stream. ++ ++During postcopy the source scans the list of dirty pages and sends them ++to the destination without being requested (in much the same way as precopy), ++however when a page request is received from the destination, the dirty page ++scanning restarts from the requested location. This causes requested pages ++to be sent quickly, and also causes pages directly after the requested page ++to be sent quickly in the hope that those pages are likely to be used ++by the destination soon. ++ ++Destination behaviour ++--------------------- ++ ++Initially the destination looks the same as precopy, with a single thread ++reading the migration stream; the 'postcopy advise' and 'discard' commands ++are processed to change the way RAM is managed, but don't affect the stream ++processing. ++ ++:: ++ ++ ------------------------------------------------------------------------------ ++ 1 2 3 4 5 6 7 ++ main -----DISCARD-CMD_PACKAGED ( LISTEN DEVICE DEVICE DEVICE RUN ) ++ thread | | ++ | (page request) ++ | \___ ++ v \ ++ listen thread: --- page -- page -- page -- page -- page -- ++ ++ a b c ++ ------------------------------------------------------------------------------ ++ ++- On receipt of ``CMD_PACKAGED`` (1) ++ ++ All the data associated with the package - the ( ... ) section in the diagram - ++ is read into memory, and the main thread recurses into qemu_loadvm_state_main ++ to process the contents of the package (2) which contains commands (3,6) and ++ devices (4...) ++ ++- On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package) ++ ++ a new thread (a) is started that takes over servicing the migration stream, ++ while the main thread carries on loading the package. It loads normal ++ background page data (b) but if during a device load a fault happens (5) ++ the returned page (c) is loaded by the listen thread allowing the main ++ threads device load to carry on. ++ ++- The last thing in the ``CMD_PACKAGED`` is a 'RUN' command (6) ++ ++ letting the destination CPUs start running. At the end of the ++ ``CMD_PACKAGED`` (7) the main thread returns to normal running behaviour and ++ is no longer used by migration, while the listen thread carries on servicing ++ page data until the end of migration. ++ ++Postcopy Recovery ++----------------- ++ ++Comparing to precopy, postcopy is special on error handlings. When any ++error happens (in this case, mostly network errors), QEMU cannot easily ++fail a migration because VM data resides in both source and destination ++QEMU instances. On the other hand, when issue happens QEMU on both sides ++will go into a paused state. It'll need a recovery phase to continue a ++paused postcopy migration. ++ ++The recovery phase normally contains a few steps: ++ ++ - When network issue occurs, both QEMU will go into PAUSED state ++ ++ - When the network is recovered (or a new network is provided), the admin ++ can setup the new channel for migration using QMP command ++ 'migrate-recover' on destination node, preparing for a resume. ++ ++ - On source host, the admin can continue the interrupted postcopy ++ migration using QMP command 'migrate' with resume=true flag set. ++ ++ - After the connection is re-established, QEMU will continue the postcopy ++ migration on both sides. ++ ++During a paused postcopy migration, the VM can logically still continue ++running, and it will not be impacted from any page access to pages that ++were already migrated to destination VM before the interruption happens. ++However, if any of the missing pages got accessed on destination VM, the VM ++thread will be halted waiting for the page to be migrated, it means it can ++be halted until the recovery is complete. ++ ++The impact of accessing missing pages can be relevant to different ++configurations of the guest. For example, when with async page fault ++enabled, logically the guest can proactively schedule out the threads ++accessing missing pages. ++ ++Postcopy states ++--------------- ++ ++Postcopy moves through a series of states (see postcopy_state) from ++ADVISE->DISCARD->LISTEN->RUNNING->END ++ ++ - Advise ++ ++ Set at the start of migration if postcopy is enabled, even ++ if it hasn't had the start command; here the destination ++ checks that its OS has the support needed for postcopy, and performs ++ setup to ensure the RAM mappings are suitable for later postcopy. ++ The destination will fail early in migration at this point if the ++ required OS support is not present. ++ (Triggered by reception of POSTCOPY_ADVISE command) ++ ++ - Discard ++ ++ Entered on receipt of the first 'discard' command; prior to ++ the first Discard being performed, hugepages are switched off ++ (using madvise) to ensure that no new huge pages are created ++ during the postcopy phase, and to cause any huge pages that ++ have discards on them to be broken. ++ ++ - Listen ++ ++ The first command in the package, POSTCOPY_LISTEN, switches ++ the destination state to Listen, and starts a new thread ++ (the 'listen thread') which takes over the job of receiving ++ pages off the migration stream, while the main thread carries ++ on processing the blob. With this thread able to process page ++ reception, the destination now 'sensitises' the RAM to detect ++ any access to missing pages (on Linux using the 'userfault' ++ system). ++ ++ - Running ++ ++ POSTCOPY_RUN causes the destination to synchronise all ++ state and start the CPUs and IO devices running. The main ++ thread now finishes processing the migration package and ++ now carries on as it would for normal precopy migration ++ (although it can't do the cleanup it would do as it ++ finishes a normal migration). ++ ++ - Paused ++ ++ Postcopy can run into a paused state (normally on both sides when ++ happens), where all threads will be temporarily halted mostly due to ++ network errors. When reaching paused state, migration will make sure ++ the qemu binary on both sides maintain the data without corrupting ++ the VM. To continue the migration, the admin needs to fix the ++ migration channel using the QMP command 'migrate-recover' on the ++ destination node, then resume the migration using QMP command 'migrate' ++ again on source node, with resume=true flag set. ++ ++ - End ++ ++ The listen thread can now quit, and perform the cleanup of migration ++ state, the migration is now complete. ++ ++Source side page map ++-------------------- ++ ++The 'migration bitmap' in postcopy is basically the same as in the precopy, ++where each of the bit to indicate that page is 'dirty' - i.e. needs ++sending. During the precopy phase this is updated as the CPU dirties ++pages, however during postcopy the CPUs are stopped and nothing should ++dirty anything any more. Instead, dirty bits are cleared when the relevant ++pages are sent during postcopy. ++ ++Postcopy with hugepages ++----------------------- ++ ++Postcopy now works with hugetlbfs backed memory: ++ ++ a) The linux kernel on the destination must support userfault on hugepages. ++ b) The huge-page configuration on the source and destination VMs must be ++ identical; i.e. RAMBlocks on both sides must use the same page size. ++ c) Note that ``-mem-path /dev/hugepages`` will fall back to allocating normal ++ RAM if it doesn't have enough hugepages, triggering (b) to fail. ++ Using ``-mem-prealloc`` enforces the allocation using hugepages. ++ d) Care should be taken with the size of hugepage used; postcopy with 2MB ++ hugepages works well, however 1GB hugepages are likely to be problematic ++ since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, ++ and until the full page is transferred the destination thread is blocked. ++ ++Postcopy with shared memory ++--------------------------- ++ ++Postcopy migration with shared memory needs explicit support from the other ++processes that share memory and from QEMU. There are restrictions on the type of ++memory that userfault can support shared. ++ ++The Linux kernel userfault support works on ``/dev/shm`` memory and on ``hugetlbfs`` ++(although the kernel doesn't provide an equivalent to ``madvise(MADV_DONTNEED)`` ++for hugetlbfs which may be a problem in some configurations). ++ ++The vhost-user code in QEMU supports clients that have Postcopy support, ++and the ``vhost-user-bridge`` (in ``tests/``) and the DPDK package have changes ++to support postcopy. ++ ++The client needs to open a userfaultfd and register the areas ++of memory that it maps with userfault. The client must then pass the ++userfaultfd back to QEMU together with a mapping table that allows ++fault addresses in the clients address space to be converted back to ++RAMBlock/offsets. The client's userfaultfd is added to the postcopy ++fault-thread and page requests are made on behalf of the client by QEMU. ++QEMU performs 'wake' operations on the client's userfaultfd to allow it ++to continue after a page has arrived. ++ ++.. note:: ++ There are two future improvements that would be nice: ++ a) Some way to make QEMU ignorant of the addresses in the clients ++ address space ++ b) Avoiding the need for QEMU to perform ufd-wake calls after the ++ pages have arrived ++ ++Retro-fitting postcopy to existing clients is possible: ++ a) A mechanism is needed for the registration with userfault as above, ++ and the registration needs to be coordinated with the phases of ++ postcopy. In vhost-user extra messages are added to the existing ++ control channel. ++ b) Any thread that can block due to guest memory accesses must be ++ identified and the implication understood; for example if the ++ guest memory access is made while holding a lock then all other ++ threads waiting for that lock will also be blocked. ++ ++Postcopy Preemption Mode ++------------------------ ++ ++Postcopy preempt is a new capability introduced in 8.0 QEMU release, it ++allows urgent pages (those got page fault requested from destination QEMU ++explicitly) to be sent in a separate preempt channel, rather than queued in ++the background migration channel. Anyone who cares about latencies of page ++faults during a postcopy migration should enable this feature. By default, ++it's not enabled. +-- +2.33.0 + diff --git a/docs-migration-Split-dirty-limit.patch b/docs-migration-Split-dirty-limit.patch new file mode 100644 index 0000000000000000000000000000000000000000..0947d1e49e0b6f018c0f5efac800a794f863e89f --- /dev/null +++ b/docs-migration-Split-dirty-limit.patch @@ -0,0 +1,192 @@ +From 10545ddb8797505ac298960171afaebc327c926c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:25 +0800 +Subject: [21/99] docs/migration: Split "dirty limit" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 4c6f8a79ae539eeb1f86af6522e4000edde3638b upstream. + +Split that into a separate file, put under "features". + +Cc: Yong Huang +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-8-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/dirty-limit.rst | 71 ++++++++++++++++++++++++++++ + docs/devel/migration/features.rst | 1 + + docs/devel/migration/main.rst | 71 ---------------------------- + 3 files changed, 72 insertions(+), 71 deletions(-) + create mode 100644 docs/devel/migration/dirty-limit.rst + +diff --git a/docs/devel/migration/dirty-limit.rst b/docs/devel/migration/dirty-limit.rst +new file mode 100644 +index 0000000000..8f32329d5f +--- /dev/null ++++ b/docs/devel/migration/dirty-limit.rst +@@ -0,0 +1,71 @@ ++Dirty limit ++=========== ++ ++The dirty limit, short for dirty page rate upper limit, is a new capability ++introduced in the 8.1 QEMU release that uses a new algorithm based on the KVM ++dirty ring to throttle down the guest during live migration. ++ ++The algorithm framework is as follows: ++ ++:: ++ ++ ------------------------------------------------------------------------------ ++ main --------------> throttle thread ------------> PREPARE(1) <-------- ++ thread \ | | ++ \ | | ++ \ V | ++ -\ CALCULATE(2) | ++ \ | | ++ \ | | ++ \ V | ++ \ SET PENALTY(3) ----- ++ -\ | ++ \ | ++ \ V ++ -> virtual CPU thread -------> ACCEPT PENALTY(4) ++ ------------------------------------------------------------------------------ ++ ++When the qmp command qmp_set_vcpu_dirty_limit is called for the first time, ++the QEMU main thread starts the throttle thread. The throttle thread, once ++launched, executes the loop, which consists of three steps: ++ ++ - PREPARE (1) ++ ++ The entire work of PREPARE (1) is preparation for the second stage, ++ CALCULATE(2), as the name implies. It involves preparing the dirty ++ page rate value and the corresponding upper limit of the VM: ++ The dirty page rate is calculated via the KVM dirty ring mechanism, ++ which tells QEMU how many dirty pages a virtual CPU has had since the ++ last KVM_EXIT_DIRTY_RING_FULL exception; The dirty page rate upper ++ limit is specified by caller, therefore fetch it directly. ++ ++ - CALCULATE (2) ++ ++ Calculate a suitable sleep period for each virtual CPU, which will be ++ used to determine the penalty for the target virtual CPU. The ++ computation must be done carefully in order to reduce the dirty page ++ rate progressively down to the upper limit without oscillation. To ++ achieve this, two strategies are provided: the first is to add or ++ subtract sleep time based on the ratio of the current dirty page rate ++ to the limit, which is used when the current dirty page rate is far ++ from the limit; the second is to add or subtract a fixed time when ++ the current dirty page rate is close to the limit. ++ ++ - SET PENALTY (3) ++ ++ Set the sleep time for each virtual CPU that should be penalized based ++ on the results of the calculation supplied by step CALCULATE (2). ++ ++After completing the three above stages, the throttle thread loops back ++to step PREPARE (1) until the dirty limit is reached. ++ ++On the other hand, each virtual CPU thread reads the sleep duration and ++sleeps in the path of the KVM_EXIT_DIRTY_RING_FULL exception handler, that ++is ACCEPT PENALTY (4). Virtual CPUs tied with writing processes will ++obviously exit to the path and get penalized, whereas virtual CPUs involved ++with read processes will not. ++ ++In summary, thanks to the KVM dirty ring technology, the dirty limit ++algorithm will restrict virtual CPUs as needed to keep their dirty page ++rate inside the limit. This leads to more steady reading performance during ++live migration and can aid in improving large guest responsiveness. +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index 0054e0c900..e257d0d100 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -7,3 +7,4 @@ Migration has plenty of features to support different use cases. + :maxdepth: 2 + + postcopy ++ dirty-limit +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 1e98e9e40c..396c7c51ca 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -573,74 +573,3 @@ path. + Return path - opened by main thread, written by main thread AND postcopy + thread (protected by rp_mutex) + +-Dirty limit +-===================== +-The dirty limit, short for dirty page rate upper limit, is a new capability +-introduced in the 8.1 QEMU release that uses a new algorithm based on the KVM +-dirty ring to throttle down the guest during live migration. +- +-The algorithm framework is as follows: +- +-:: +- +- ------------------------------------------------------------------------------ +- main --------------> throttle thread ------------> PREPARE(1) <-------- +- thread \ | | +- \ | | +- \ V | +- -\ CALCULATE(2) | +- \ | | +- \ | | +- \ V | +- \ SET PENALTY(3) ----- +- -\ | +- \ | +- \ V +- -> virtual CPU thread -------> ACCEPT PENALTY(4) +- ------------------------------------------------------------------------------ +- +-When the qmp command qmp_set_vcpu_dirty_limit is called for the first time, +-the QEMU main thread starts the throttle thread. The throttle thread, once +-launched, executes the loop, which consists of three steps: +- +- - PREPARE (1) +- +- The entire work of PREPARE (1) is preparation for the second stage, +- CALCULATE(2), as the name implies. It involves preparing the dirty +- page rate value and the corresponding upper limit of the VM: +- The dirty page rate is calculated via the KVM dirty ring mechanism, +- which tells QEMU how many dirty pages a virtual CPU has had since the +- last KVM_EXIT_DIRTY_RING_FULL exception; The dirty page rate upper +- limit is specified by caller, therefore fetch it directly. +- +- - CALCULATE (2) +- +- Calculate a suitable sleep period for each virtual CPU, which will be +- used to determine the penalty for the target virtual CPU. The +- computation must be done carefully in order to reduce the dirty page +- rate progressively down to the upper limit without oscillation. To +- achieve this, two strategies are provided: the first is to add or +- subtract sleep time based on the ratio of the current dirty page rate +- to the limit, which is used when the current dirty page rate is far +- from the limit; the second is to add or subtract a fixed time when +- the current dirty page rate is close to the limit. +- +- - SET PENALTY (3) +- +- Set the sleep time for each virtual CPU that should be penalized based +- on the results of the calculation supplied by step CALCULATE (2). +- +-After completing the three above stages, the throttle thread loops back +-to step PREPARE (1) until the dirty limit is reached. +- +-On the other hand, each virtual CPU thread reads the sleep duration and +-sleeps in the path of the KVM_EXIT_DIRTY_RING_FULL exception handler, that +-is ACCEPT PENALTY (4). Virtual CPUs tied with writing processes will +-obviously exit to the path and get penalized, whereas virtual CPUs involved +-with read processes will not. +- +-In summary, thanks to the KVM dirty ring technology, the dirty limit +-algorithm will restrict virtual CPUs as needed to keep their dirty page +-rate inside the limit. This leads to more steady reading performance during +-live migration and can aid in improving large guest responsiveness. +- +-- +2.33.0 + diff --git a/docs-migration-add-qatzip-compression-feature.patch b/docs-migration-add-qatzip-compression-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..b300a314ebfab23b1ad0b4c7a24826ecf0fce607 --- /dev/null +++ b/docs-migration-add-qatzip-compression-feature.patch @@ -0,0 +1,206 @@ +From 5fa111eb3e3d73a0500d33d0b81638c579476845 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Fri, 30 Aug 2024 16:27:18 -0700 +Subject: [88/99] docs/migration: add qatzip compression feature + +commit 85da4cbe6e5eb6ba6f31c8b30ee4582625546da7 upstream. + +add Intel QATzip compression method introduction + +Reviewed-by: Nanhai Zou +Reviewed-by: Peter Xu +Reviewed-by: Fabiano Rosas +Signed-off-by: Yuan Liu +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-2-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/qatzip-compression.rst | 165 ++++++++++++++++++++ + 2 files changed, 166 insertions(+) + create mode 100644 docs/devel/migration/qatzip-compression.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index 0c9cb3dd6c..7c5ce9e79d 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -12,3 +12,4 @@ Migration has plenty of features to support different use cases. + virtio + qpl-compression + uadk-compression ++ qatzip-compression +diff --git a/docs/devel/migration/qatzip-compression.rst b/docs/devel/migration/qatzip-compression.rst +new file mode 100644 +index 0000000000..862b383164 +--- /dev/null ++++ b/docs/devel/migration/qatzip-compression.rst +@@ -0,0 +1,165 @@ ++================== ++QATzip Compression ++================== ++In scenarios with limited network bandwidth, the ``QATzip`` solution can help ++users save a lot of host CPU resources by accelerating compression and ++decompression through the Intel QuickAssist Technology(``QAT``) hardware. ++ ++ ++The following test was conducted using 8 multifd channels and 10Gbps network ++bandwidth. The results show that, compared to zstd, ``QATzip`` significantly ++saves CPU resources on the sender and reduces migration time. Compared to the ++uncompressed solution, ``QATzip`` greatly improves the dirty page processing ++capability, indicated by the Pages per Second metric, and also reduces the ++total migration time. ++ ++:: ++ ++ VM Configuration: 16 vCPU and 64G memory ++ VM Workload: all vCPUs are idle and 54G memory is filled with Silesia data. ++ QAT Devices: 4 ++ |-----------|--------|---------|----------|----------|------|------| ++ |8 Channels |Total |down |throughput|pages per | send | recv | ++ | |time(ms)|time(ms) |(mbps) |second | cpu %| cpu% | ++ |-----------|--------|---------|----------|----------|------|------| ++ |qatzip | 16630| 28| 10467| 2940235| 160| 360| ++ |-----------|--------|---------|----------|----------|------|------| ++ |zstd | 20165| 24| 8579| 2391465| 810| 340| ++ |-----------|--------|---------|----------|----------|------|------| ++ |none | 46063| 40| 10848| 330240| 45| 85| ++ |-----------|--------|---------|----------|----------|------|------| ++ ++ ++QATzip Compression Framework ++============================ ++ ++``QATzip`` is a user space library which builds on top of the Intel QuickAssist ++Technology to provide extended accelerated compression and decompression ++services. ++ ++For more ``QATzip`` introduction, please refer to `QATzip Introduction ++`_ ++ ++:: ++ ++ +----------------+ ++ | MultiFd Thread | ++ +-------+--------+ ++ | ++ | compress/decompress ++ +-------+--------+ ++ | QATzip library | ++ +-------+--------+ ++ | ++ +-------+--------+ ++ | QAT library | ++ +-------+--------+ ++ | user space ++ --------+--------------------- ++ | kernel space ++ +------+-------+ ++ | QAT Driver | ++ +------+-------+ ++ | ++ +------+-------+ ++ | QAT Devices | ++ +--------------+ ++ ++ ++QATzip Installation ++------------------- ++ ++The ``QATzip`` installation package has been integrated into some Linux ++distributions and can be installed directly. For example, the Ubuntu Server ++24.04 LTS system can be installed using below command ++ ++.. code-block:: shell ++ ++ #apt search qatzip ++ libqatzip-dev/noble 1.2.0-0ubuntu3 amd64 ++ Intel QuickAssist user space library development files ++ ++ libqatzip3/noble 1.2.0-0ubuntu3 amd64 ++ Intel QuickAssist user space library ++ ++ qatzip/noble,now 1.2.0-0ubuntu3 amd64 [installed] ++ Compression user-space tool for Intel QuickAssist Technology ++ ++ #sudo apt install libqatzip-dev libqatzip3 qatzip ++ ++If your system does not support the ``QATzip`` installation package, you can ++use the source code to build and install, please refer to `QATzip source code installation ++`_ ++ ++QAT Hardware Deployment ++----------------------- ++ ++``QAT`` supports physical functions(PFs) and virtual functions(VFs) for ++deployment, and users can configure ``QAT`` resources for migration according ++to actual needs. For more details about ``QAT`` deployment, please refer to ++`Intel QuickAssist Technology Documentation ++`_ ++ ++For more ``QAT`` hardware introduction, please refer to `intel-quick-assist-technology-overview ++`_ ++ ++How To Use QATzip Compression ++============================= ++ ++1 - Install ``QATzip`` library ++ ++2 - Build ``QEMU`` with ``--enable-qatzip`` parameter ++ ++ E.g. configure --target-list=x86_64-softmmu --enable-kvm ``--enable-qatzip`` ++ ++3 - Set ``migrate_set_parameter multifd-compression qatzip`` ++ ++4 - Set ``migrate_set_parameter multifd-qatzip-level comp_level``, the default ++comp_level value is 1, and it supports levels from 1 to 9 ++ ++QAT Memory Requirements ++======================= ++ ++The user needs to reserve system memory for the QAT memory management to ++allocate DMA memory. The size of the reserved system memory depends on the ++number of devices used for migration and the number of multifd channels. ++ ++Because memory usage depends on QAT configuration, please refer to `QAT Memory ++Driver Queries ++`_ ++for memory usage calculation. ++ ++.. list-table:: An example of a PF used for migration ++ :header-rows: 1 ++ ++ * - Number of channels ++ - Sender memory usage ++ - Receiver memory usage ++ * - 2 ++ - 10M ++ - 10M ++ * - 4 ++ - 12M ++ - 14M ++ * - 8 ++ - 16M ++ - 20M ++ ++How To Choose Between QATzip and QPL ++==================================== ++Starting from 4th Gen Intel Xeon Scalable processors, codenamed Sapphire Rapids ++processor(``SPR``), multiple built-in accelerators are supported including ++``QAT`` and ``IAA``. The former can accelerate ``QATzip`` and the latter is ++used to accelerate ``QPL``. ++ ++Here are some suggestions: ++ ++1 - If the live migration scenario is limited by network bandwidth and ``QAT`` ++hardware resources exceed ``IAA``, use the ``QATzip`` method, which can save a ++lot of host CPU resources for compression. ++ ++2 - If the system cannot support shared virtual memory (SVM) technology, use ++the ``QATzip`` method because ``QPL`` performance is not good without SVM ++support. ++ ++3 - For other scenarios, use the ``QPL`` method first. +-- +2.33.0 + diff --git a/docs-migration-add-qpl-compression-feature.patch b/docs-migration-add-qpl-compression-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..15587ec4a6e4f1ce2ac4a4b9a6e0b9222b4689c2 --- /dev/null +++ b/docs-migration-add-qpl-compression-feature.patch @@ -0,0 +1,304 @@ +From 4c4e9830f3bee7313f3ac49fe4887f040fd85f7a Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:04 +0800 +Subject: [72/99] docs/migration: add qpl compression feature + +commit 0d40b3d76ced77c1c82c77a636af703fabdb407c upstream. + +add Intel Query Processing Library (QPL) compression method +introduction + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Acked-by: Peter Xu +Signed-off-by: Fabiano Rosas + + Conflicts: + docs/devel/migration/features.rst +[jz: resolve simple context conflict] +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/qpl-compression.rst | 260 +++++++++++++++++++++++ + 2 files changed, 261 insertions(+) + create mode 100644 docs/devel/migration/qpl-compression.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index a9acaf618e..9819393c12 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -10,3 +10,4 @@ Migration has plenty of features to support different use cases. + dirty-limit + vfio + virtio ++ qpl-compression +diff --git a/docs/devel/migration/qpl-compression.rst b/docs/devel/migration/qpl-compression.rst +new file mode 100644 +index 0000000000..990992d786 +--- /dev/null ++++ b/docs/devel/migration/qpl-compression.rst +@@ -0,0 +1,260 @@ ++=============== ++QPL Compression ++=============== ++The Intel Query Processing Library (Intel ``QPL``) is an open-source library to ++provide compression and decompression features and it is based on deflate ++compression algorithm (RFC 1951). ++ ++The ``QPL`` compression relies on Intel In-Memory Analytics Accelerator(``IAA``) ++and Shared Virtual Memory(``SVM``) technology, they are new features supported ++from Intel 4th Gen Intel Xeon Scalable processors, codenamed Sapphire Rapids ++processor(``SPR``). ++ ++For more ``QPL`` introduction, please refer to `QPL Introduction ++`_ ++ ++QPL Compression Framework ++========================= ++ ++:: ++ ++ +----------------+ +------------------+ ++ | MultiFD Thread | |accel-config tool | ++ +-------+--------+ +--------+---------+ ++ | | ++ | | ++ |compress/decompress | ++ +-------+--------+ | Setup IAA ++ | QPL library | | Resources ++ +-------+---+----+ | ++ | | | ++ | +-------------+-------+ ++ | Open IAA | ++ | Devices +-----+-----+ ++ | |idxd driver| ++ | +-----+-----+ ++ | | ++ | | ++ | +-----+-----+ ++ +-----------+IAA Devices| ++ Submit jobs +-----------+ ++ via enqcmd ++ ++ ++QPL Build And Installation ++-------------------------- ++ ++.. code-block:: shell ++ ++ $git clone --recursive https://github.com/intel/qpl.git qpl ++ $mkdir qpl/build ++ $cd qpl/build ++ $cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DQPL_LIBRARY_TYPE=SHARED .. ++ $sudo cmake --build . --target install ++ ++For more details about ``QPL`` installation, please refer to `QPL Installation ++`_ ++ ++IAA Device Management ++--------------------- ++ ++The number of ``IAA`` devices will vary depending on the Xeon product model. ++On a ``SPR`` server, there can be a maximum of 8 ``IAA`` devices, with up to ++4 devices per socket. ++ ++By default, all ``IAA`` devices are disabled and need to be configured and ++enabled by users manually. ++ ++Check the number of devices through the following command ++ ++.. code-block:: shell ++ ++ #lspci -d 8086:0cfe ++ 6a:02.0 System peripheral: Intel Corporation Device 0cfe ++ 6f:02.0 System peripheral: Intel Corporation Device 0cfe ++ 74:02.0 System peripheral: Intel Corporation Device 0cfe ++ 79:02.0 System peripheral: Intel Corporation Device 0cfe ++ e7:02.0 System peripheral: Intel Corporation Device 0cfe ++ ec:02.0 System peripheral: Intel Corporation Device 0cfe ++ f1:02.0 System peripheral: Intel Corporation Device 0cfe ++ f6:02.0 System peripheral: Intel Corporation Device 0cfe ++ ++IAA Device Configuration And Enabling ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++The ``accel-config`` tool is used to enable ``IAA`` devices and configure ++``IAA`` hardware resources(work queues and engines). One ``IAA`` device ++has 8 work queues and 8 processing engines, multiple engines can be assigned ++to a work queue via ``group`` attribute. ++ ++For ``accel-config`` installation, please refer to `accel-config installation ++`_ ++ ++One example of configuring and enabling an ``IAA`` device. ++ ++.. code-block:: shell ++ ++ #accel-config config-engine iax1/engine1.0 -g 0 ++ #accel-config config-engine iax1/engine1.1 -g 0 ++ #accel-config config-engine iax1/engine1.2 -g 0 ++ #accel-config config-engine iax1/engine1.3 -g 0 ++ #accel-config config-engine iax1/engine1.4 -g 0 ++ #accel-config config-engine iax1/engine1.5 -g 0 ++ #accel-config config-engine iax1/engine1.6 -g 0 ++ #accel-config config-engine iax1/engine1.7 -g 0 ++ #accel-config config-wq iax1/wq1.0 -g 0 -s 128 -p 10 -b 1 -t 128 -m shared -y user -n app1 -d user ++ #accel-config enable-device iax1 ++ #accel-config enable-wq iax1/wq1.0 ++ ++.. note:: ++ IAX is an early name for IAA ++ ++- The ``IAA`` device index is 1, use ``ls -lh /sys/bus/dsa/devices/iax*`` ++ command to query the ``IAA`` device index. ++ ++- 8 engines and 1 work queue are configured in group 0, so all compression jobs ++ submitted to this work queue can be processed by all engines at the same time. ++ ++- Set work queue attributes including the work mode, work queue size and so on. ++ ++- Enable the ``IAA1`` device and work queue 1.0 ++ ++.. note:: ++ ++ Set work queue mode to shared mode, since ``QPL`` library only supports ++ shared mode ++ ++For more detailed configuration, please refer to `IAA Configuration Samples ++`_ ++ ++IAA Unit Test ++^^^^^^^^^^^^^ ++ ++- Enabling ``IAA`` devices for Xeon platform, please refer to `IAA User Guide ++ `_ ++ ++- ``IAA`` device driver is Intel Data Accelerator Driver (idxd), it is ++ recommended that the minimum version of Linux kernel is 5.18. ++ ++- Add ``"intel_iommu=on,sm_on"`` parameter to kernel command line ++ for ``SVM`` feature enabling. ++ ++Here is an easy way to verify ``IAA`` device driver and ``SVM`` with `iaa_test ++`_ ++ ++.. code-block:: shell ++ ++ #./test/iaa_test ++ [ info] alloc wq 0 shared size 128 addr 0x7f26cebe5000 batch sz 0xfffffffe xfer sz 0x80000000 ++ [ info] test noop: tflags 0x1 num_desc 1 ++ [ info] preparing descriptor for noop ++ [ info] Submitted all noop jobs ++ [ info] verifying task result for 0x16f7e20 ++ [ info] test with op 0 passed ++ ++ ++IAA Resources Allocation For Migration ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++There is no ``IAA`` resource configuration parameters for migration and ++``accel-config`` tool configuration cannot directly specify the ``IAA`` ++resources used for migration. ++ ++The multifd migration with ``QPL`` compression method will use all work ++queues that are enabled and shared mode. ++ ++.. note:: ++ ++ Accessing IAA resources requires ``sudo`` command or ``root`` privileges ++ by default. Administrators can modify the IAA device node ownership ++ so that QEMU can use IAA with specified user permissions. ++ ++ For example ++ ++ #chown -R qemu /dev/iax ++ ++Shared Virtual Memory(SVM) Introduction ++======================================= ++ ++An ability for an accelerator I/O device to operate in the same virtual ++memory space of applications on host processors. It also implies the ++ability to operate from pageable memory, avoiding functional requirements ++to pin memory for DMA operations. ++ ++When using ``SVM`` technology, users do not need to reserve memory for the ++``IAA`` device and perform pin memory operation. The ``IAA`` device can ++directly access data using the virtual address of the process. ++ ++For more ``SVM`` technology, please refer to ++`Shared Virtual Addressing (SVA) with ENQCMD ++`_ ++ ++ ++How To Use QPL Compression In Migration ++======================================= ++ ++1 - Installation of ``QPL`` library and ``accel-config`` library if using IAA ++ ++2 - Configure and enable ``IAA`` devices and work queues via ``accel-config`` ++ ++3 - Build ``QEMU`` with ``--enable-qpl`` parameter ++ ++ E.g. configure --target-list=x86_64-softmmu --enable-kvm ``--enable-qpl`` ++ ++4 - Enable ``QPL`` compression during migration ++ ++ Set ``migrate_set_parameter multifd-compression qpl`` when migrating, the ++ ``QPL`` compression does not support configuring the compression level, it ++ only supports one compression level. ++ ++The Difference Between QPL And ZLIB ++=================================== ++ ++Although both ``QPL`` and ``ZLIB`` are based on the deflate compression ++algorithm, and ``QPL`` can support the header and tail of ``ZLIB``, ``QPL`` ++is still not fully compatible with the ``ZLIB`` compression in the migration. ++ ++``QPL`` only supports 4K history buffer, and ``ZLIB`` is 32K by default. ++``ZLIB`` compresses data that ``QPL`` may not decompress correctly and ++vice versa. ++ ++``QPL`` does not support the ``Z_SYNC_FLUSH`` operation in ``ZLIB`` streaming ++compression, current ``ZLIB`` implementation uses ``Z_SYNC_FLUSH``, so each ++``multifd`` thread has a ``ZLIB`` streaming context, and all page compression ++and decompression are based on this stream. ``QPL`` cannot decompress such data ++and vice versa. ++ ++The introduction for ``Z_SYNC_FLUSH``, please refer to `Zlib Manual ++`_ ++ ++The Best Practices ++================== ++When user enables the IAA device for ``QPL`` compression, it is recommended ++to add ``-mem-prealloc`` parameter to the destination boot parameters. This ++parameter can avoid the occurrence of I/O page fault and reduce the overhead ++of IAA compression and decompression. ++ ++The example of booting with ``-mem-prealloc`` parameter ++ ++.. code-block:: shell ++ ++ $qemu-system-x86_64 --enable-kvm -cpu host --mem-prealloc ... ++ ++ ++An example about I/O page fault measurement of destination without ++``-mem-prealloc``, the ``svm_prq`` indicates the number of I/O page fault ++occurrences and processing time. ++ ++.. code-block:: shell ++ ++ #echo 1 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #echo 2 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #echo 3 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #echo 4 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #cat /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ IOMMU: dmar18 Register Base Address: c87fc000 ++ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms 1ms-10ms >=10ms min(us) max(us) average(us) ++ inv_iotlb 0 286 123 0 0 0 0 0 1 0 ++ inv_devtlb 0 276 133 0 0 0 0 0 2 0 ++ inv_iec 0 0 0 0 0 0 0 0 0 0 ++ svm_prq 0 0 25206 364 395 0 0 1 556 9 +-- +2.33.0 + diff --git a/docs-migration-add-uadk-compression-feature.patch b/docs-migration-add-uadk-compression-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..7972bfd771a59609b3c9d6190ead42eceba5ed52 --- /dev/null +++ b/docs-migration-add-uadk-compression-feature.patch @@ -0,0 +1,183 @@ +From 2d8e0ef9947bdb82ce70acd7d0605795bf775153 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:04 +0100 +Subject: [80/99] docs/migration: add uadk compression feature + +commit 3ae9bd97829213808298ae6d35ea26f8def15dc1 upstream. + +Document UADK(User Space Accelerator Development Kit) library details +and how to use that for migration. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +[s/Qemu/QEMU in docs] +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/uadk-compression.rst | 144 ++++++++++++++++++++++ + 2 files changed, 145 insertions(+) + create mode 100644 docs/devel/migration/uadk-compression.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index 9819393c12..0c9cb3dd6c 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -11,3 +11,4 @@ Migration has plenty of features to support different use cases. + vfio + virtio + qpl-compression ++ uadk-compression +diff --git a/docs/devel/migration/uadk-compression.rst b/docs/devel/migration/uadk-compression.rst +new file mode 100644 +index 0000000000..3f73345dd5 +--- /dev/null ++++ b/docs/devel/migration/uadk-compression.rst +@@ -0,0 +1,144 @@ ++========================================================= ++User Space Accelerator Development Kit (UADK) Compression ++========================================================= ++UADK is a general-purpose user space accelerator framework that uses shared ++virtual addressing (SVA) to provide a unified programming interface for ++hardware acceleration of cryptographic and compression algorithms. ++ ++UADK includes Unified/User-space-access-intended Accelerator Framework (UACCE), ++which enables hardware accelerators from different vendors that support SVA to ++adapt to UADK. ++ ++Currently, HiSilicon Kunpeng hardware accelerators have been registered with ++UACCE. Through the UADK framework, users can run cryptographic and compression ++algorithms using hardware accelerators instead of CPUs, freeing up CPU ++computing power and improving computing performance. ++ ++https://github.com/Linaro/uadk/tree/master/docs ++ ++UADK Framework ++============== ++UADK consists of UACCE, vendors' drivers, and an algorithm layer. UADK requires ++the hardware accelerator to support SVA, and the operating system to support ++IOMMU and SVA. Hardware accelerators from different vendors are registered as ++different character devices with UACCE by using kernel-mode drivers of the ++vendors. A user can access the hardware accelerators by performing user-mode ++operations on the character devices. ++ ++:: ++ ++ +----------------------------------+ ++ | apps | ++ +----+------------------------+----+ ++ | | ++ | | ++ +-------+--------+ +-------+-------+ ++ | scheduler | | alg libraries | ++ +-------+--------+ +-------+-------+ ++ | | ++ | | ++ | | ++ | +--------+------+ ++ | | vendor drivers| ++ | +-+-------------+ ++ | | ++ | | ++ +--+------------------+--+ ++ | libwd | ++ User +----+-------------+-----+ ++ -------------------------------------------------- ++ Kernel +--+-----+ +------+ ++ | uacce | | smmu | ++ +---+----+ +------+ ++ | ++ +---+------------------+ ++ | vendor kernel driver | ++ +----------------------+ ++ -------------------------------------------------- ++ +----------------------+ ++ | HW Accelerators | ++ +----------------------+ ++ ++UADK Installation ++----------------- ++Build UADK ++^^^^^^^^^^ ++ ++.. code-block:: shell ++ ++ git clone https://github.com/Linaro/uadk.git ++ cd uadk ++ mkdir build ++ ./autogen.sh ++ ./configure --prefix=$PWD/build ++ make ++ make install ++ ++Without --prefix, UADK will be installed to /usr/local/lib by default. ++If get error:"cannot find -lnuma", please install the libnuma-dev ++ ++Run pkg-config libwd to ensure env is setup correctly ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++* export PKG_CONFIG_PATH=$PWD/build/lib/pkgconfig ++* pkg-config libwd --cflags --libs ++ -I/usr/local/include -L/usr/local/lib -lwd ++ ++* export PKG_CONFIG_PATH is required on demand. ++ Not required if UADK is installed to /usr/local/lib ++ ++UADK Host Kernel Requirements ++----------------------------- ++User needs to make sure that ``UACCE`` is already supported in Linux kernel. ++The kernel version should be at least v5.9 with SVA (Shared Virtual ++Addressing) enabled. ++ ++Kernel Configuration ++^^^^^^^^^^^^^^^^^^^^ ++ ++``UACCE`` could be built as module or built-in. ++ ++Here's an example to enable UACCE with hardware accelerator in HiSilicon ++Kunpeng platform. ++ ++* CONFIG_IOMMU_SVA_LIB=y ++* CONFIG_ARM_SMMU=y ++* CONFIG_ARM_SMMU_V3=y ++* CONFIG_ARM_SMMU_V3_SVA=y ++* CONFIG_PCI_PASID=y ++* CONFIG_UACCE=y ++* CONFIG_CRYPTO_DEV_HISI_QM=y ++* CONFIG_CRYPTO_DEV_HISI_ZIP=y ++ ++Make sure all these above kernel configurations are selected. ++ ++Accelerator dev node permissions ++-------------------------------- ++Harware accelerators(eg: HiSilicon Kunpeng Zip accelerator) gets registered to ++UADK and char devices are created in dev directory. In order to access resources ++on hardware accelerator devices, write permission should be provided to user. ++ ++.. code-block:: shell ++ ++ $ sudo chmod 777 /dev/hisi_zip-* ++ ++How To Use UADK Compression In QEMU Migration ++--------------------------------------------- ++* Make sure UADK is installed as above ++* Build ``QEMU`` with ``--enable-uadk`` parameter ++ ++ E.g. configure --target-list=aarch64-softmmu --enable-kvm ``--enable-uadk`` ++ ++* Enable ``UADK`` compression during migration ++ ++ Set ``migrate_set_parameter multifd-compression uadk`` ++ ++Since UADK uses Shared Virtual Addressing(SVA) and device access virtual memory ++directly it is possible that SMMUv3 may enounter page faults while walking the ++IO page tables. This may impact the performance. In order to mitigate this, ++please make sure to specify ``-mem-prealloc`` parameter to the destination VM ++boot parameters. ++ ++Though both UADK and ZLIB are based on the deflate compression algorithm, UADK ++is not fully compatible with ZLIB. Hence, please make sure to use ``uadk`` on ++both source and destination during migration. +-- +2.33.0 + diff --git a/docs-specs-Add-ACPI-GED-documentation.patch b/docs-specs-Add-ACPI-GED-documentation.patch deleted file mode 100644 index 46e8c17483ba33af2b75e954233c3cbdc5c7cddc..0000000000000000000000000000000000000000 --- a/docs-specs-Add-ACPI-GED-documentation.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 9c1752703fb8a5b70985cf4c9caabc3388c5953b Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:31 +0100 -Subject: [PATCH] docs/specs: Add ACPI GED documentation - -Documents basic concepts of ACPI Generic Event device(GED) -and interface between QEMU and the ACPI BIOS. - -Signed-off-by: Shameer Kolothum -Reviewed-by: Eric Auger -Message-Id: <20190918130633.4872-10-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - docs/specs/acpi_hw_reduced_hotplug.rst | 70 ++++++++++++++++++++++++++ - docs/specs/index.rst | 1 + - 2 files changed, 71 insertions(+) - create mode 100644 docs/specs/acpi_hw_reduced_hotplug.rst - -diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst -new file mode 100644 -index 0000000000..911a98255b ---- /dev/null -+++ b/docs/specs/acpi_hw_reduced_hotplug.rst -@@ -0,0 +1,70 @@ -+================================================== -+QEMU and ACPI BIOS Generic Event Device interface -+================================================== -+ -+The ACPI *Generic Event Device* (GED) is a HW reduced platform -+specific device introduced in ACPI v6.1 that handles all platform -+events, including the hotplug ones. GED is modelled as a device -+in the namespace with a _HID defined to be ACPI0013. This document -+describes the interface between QEMU and the ACPI BIOS. -+ -+GED allows HW reduced platforms to handle interrupts in ACPI ASL -+statements. It follows a very similar approach to the _EVT method -+from GPIO events. All interrupts are listed in _CRS and the handler -+is written in _EVT method. However, the QEMU implementation uses a -+single interrupt for the GED device, relying on an IO memory region -+to communicate the type of device affected by the interrupt. This way, -+we can support up to 32 events with a unique interrupt. -+ -+**Here is an example,** -+ -+:: -+ -+ Device (\_SB.GED) -+ { -+ Name (_HID, "ACPI0013") -+ Name (_UID, Zero) -+ Name (_CRS, ResourceTemplate () -+ { -+ Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) -+ { -+ 0x00000029, -+ } -+ }) -+ OperationRegion (EREG, SystemMemory, 0x09080000, 0x04) -+ Field (EREG, DWordAcc, NoLock, WriteAsZeros) -+ { -+ ESEL, 32 -+ } -+ Method (_EVT, 1, Serialized) -+ { -+ Local0 = ESEL // ESEL = IO memory region which specifies the -+ // device type. -+ If (((Local0 & One) == One)) -+ { -+ MethodEvent1() -+ } -+ If ((Local0 & 0x2) == 0x2) -+ { -+ MethodEvent2() -+ } -+ ... -+ } -+ } -+ -+GED IO interface (4 byte access) -+-------------------------------- -+**read access:** -+ -+:: -+ -+ [0x0-0x3] Event selector bit field (32 bit) set by QEMU. -+ -+ bits: -+ 0: Memory hotplug event -+ 1: System power down event -+ 2-31: Reserved -+ -+**write_access:** -+ -+Nothing is expected to be written into GED IO memory -diff --git a/docs/specs/index.rst b/docs/specs/index.rst -index 40adb97c5e..984ba44029 100644 ---- a/docs/specs/index.rst -+++ b/docs/specs/index.rst -@@ -12,3 +12,4 @@ Contents: - - ppc-xive - ppc-spapr-xive -+ acpi_hw_reduced_hotplug --- -2.19.1 diff --git a/docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch b/docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch deleted file mode 100644 index f0be64a937fd5b1f78c54f5f74854f388c023786..0000000000000000000000000000000000000000 --- a/docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch +++ /dev/null @@ -1,66 +0,0 @@ -From dd7f6cc3bcd71681920e3530f2c53041c812c5d3 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 5 Mar 2020 17:51:46 +0100 -Subject: [PATCH 16/19] docs/specs/tpm: Document TPM_TIS sysbus device for ARM - -Update the documentation with recent changes related to the -sysbus TPM_TIS device addition and add the command line -to be used with arm VIRT. - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Message-id: 20200305165149.618-8-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - docs/specs/tpm.rst | 25 ++++++++++++++++++++++++- - 1 file changed, 24 insertions(+), 1 deletion(-) - -diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst -index 2bdf637f..da9eb39c 100644 ---- a/docs/specs/tpm.rst -+++ b/docs/specs/tpm.rst -@@ -18,9 +18,15 @@ The TIS interface makes a memory mapped IO region in the area - 0xfed40000-0xfed44fff available to the guest operating system. - - QEMU files related to TPM TIS interface: -- - ``hw/tpm/tpm_tis.c`` -+ - ``hw/tpm/tpm_tis_common.c`` -+ - ``hw/tpm/tpm_tis_isa.c`` -+ - ``hw/tpm/tpm_tis_sysbus.c`` - - ``hw/tpm/tpm_tis.h`` - -+Both an ISA device and a sysbus device are available. The former is -+used with pc/q35 machine while the latter can be instantiated in the -+ARM virt machine. -+ - CRB interface - ------------- - -@@ -325,6 +331,23 @@ In case a pSeries machine is emulated, use the following command line: - -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ - -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 - -+In case an ARM virt machine is emulated, use the following command line: -+ -+.. code-block:: console -+ -+ qemu-system-aarch64 -machine virt,gic-version=3,accel=kvm \ -+ -cpu host -m 4G \ -+ -nographic -no-acpi \ -+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -+ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -+ -device tpm-tis-device,tpmdev=tpm0 \ -+ -device virtio-blk-pci,drive=drv0 \ -+ -drive format=qcow2,file=hda.qcow2,if=none,id=drv0 \ -+ -drive if=pflash,format=raw,file=flash0.img,readonly \ -+ -drive if=pflash,format=raw,file=flash1.img -+ -+ On ARM, ACPI boot with TPM is not yet supported. -+ - In case SeaBIOS is used as firmware, it should show the TPM menu item - after entering the menu with 'ESC'. - --- -2.23.0 - diff --git a/docs-specs-tpm-reST-ify-TPM-documentation.patch b/docs-specs-tpm-reST-ify-TPM-documentation.patch deleted file mode 100644 index d4648994bde2fc4b68ce49f28f4a612f53e65551..0000000000000000000000000000000000000000 --- a/docs-specs-tpm-reST-ify-TPM-documentation.patch +++ /dev/null @@ -1,993 +0,0 @@ -From 5d1865496ca39f08142a0c1eb2c9b14ec1ec9140 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Tue, 21 Jan 2020 10:29:35 -0500 -Subject: [PATCH 09/19] docs/specs/tpm: reST-ify TPM documentation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Marc-André Lureau -Reviewed-by: Stefan Berger -Message-Id: <20200121152935.649898-7-stefanb@linux.ibm.com> -Signed-off-by: David Gibson -Signed-off-by: jiangfangjie ---- - docs/specs/index.rst | 1 + - docs/specs/tpm.rst | 503 +++++++++++++++++++++++++++++++++++++++++++ - docs/specs/tpm.txt | 445 -------------------------------------- - 3 files changed, 504 insertions(+), 445 deletions(-) - create mode 100644 docs/specs/tpm.rst - delete mode 100644 docs/specs/tpm.txt - -diff --git a/docs/specs/index.rst b/docs/specs/index.rst -index 984ba440..de46a8b5 100644 ---- a/docs/specs/index.rst -+++ b/docs/specs/index.rst -@@ -13,3 +13,4 @@ Contents: - ppc-xive - ppc-spapr-xive - acpi_hw_reduced_hotplug -+ tpm -diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst -new file mode 100644 -index 00000000..2bdf637f ---- /dev/null -+++ b/docs/specs/tpm.rst -@@ -0,0 +1,503 @@ -+=============== -+QEMU TPM Device -+=============== -+ -+Guest-side hardware interface -+============================= -+ -+TIS interface -+------------- -+ -+The QEMU TPM emulation implements a TPM TIS hardware interface -+following the Trusted Computing Group's specification "TCG PC Client -+Specific TPM Interface Specification (TIS)", Specification Version -+1.3, 21 March 2013. (see the `TIS specification`_, or a later version -+of it). -+ -+The TIS interface makes a memory mapped IO region in the area -+0xfed40000-0xfed44fff available to the guest operating system. -+ -+QEMU files related to TPM TIS interface: -+ - ``hw/tpm/tpm_tis.c`` -+ - ``hw/tpm/tpm_tis.h`` -+ -+CRB interface -+------------- -+ -+QEMU also implements a TPM CRB interface following the Trusted -+Computing Group's specification "TCG PC Client Platform TPM Profile -+(PTP) Specification", Family "2.0", Level 00 Revision 01.03 v22, May -+22, 2017. (see the `CRB specification`_, or a later version of it) -+ -+The CRB interface makes a memory mapped IO region in the area -+0xfed40000-0xfed40fff (1 locality) available to the guest -+operating system. -+ -+QEMU files related to TPM CRB interface: -+ - ``hw/tpm/tpm_crb.c`` -+ -+SPAPR interface -+--------------- -+ -+pSeries (ppc64) machines offer a tpm-spapr device model. -+ -+QEMU files related to the SPAPR interface: -+ - ``hw/tpm/tpm_spapr.c`` -+ -+fw_cfg interface -+================ -+ -+The bios/firmware may read the ``"etc/tpm/config"`` fw_cfg entry for -+configuring the guest appropriately. -+ -+The entry of 6 bytes has the following content, in little-endian: -+ -+.. code-block:: c -+ -+ #define TPM_VERSION_UNSPEC 0 -+ #define TPM_VERSION_1_2 1 -+ #define TPM_VERSION_2_0 2 -+ -+ #define TPM_PPI_VERSION_NONE 0 -+ #define TPM_PPI_VERSION_1_30 1 -+ -+ struct FwCfgTPMConfig { -+ uint32_t tpmppi_address; /* PPI memory location */ -+ uint8_t tpm_version; /* TPM version */ -+ uint8_t tpmppi_version; /* PPI version */ -+ }; -+ -+ACPI interface -+============== -+ -+The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT -+and passes it into the guest through the fw_cfg device. The device -+description contains the base address of the TIS interface 0xfed40000 -+and the size of the MMIO area (0x5000). In case a TPM2 is used by -+QEMU, a TPM2 ACPI table is also provided. The device is described to -+be used in polling mode rather than interrupt mode primarily because -+no unused IRQ could be found. -+ -+To support measurement logs to be written by the firmware, -+e.g. SeaBIOS, a TCPA table is implemented. This table provides a 64kb -+buffer where the firmware can write its log into. For TPM 2 only a -+more recent version of the TPM2 table provides support for -+measurements logs and a TCPA table does not need to be created. -+ -+The TCPA and TPM2 ACPI tables follow the Trusted Computing Group -+specification "TCG ACPI Specification" Family "1.2" and "2.0", Level -+00 Revision 00.37. (see the `ACPI specification`_, or a later version -+of it) -+ -+ACPI PPI Interface -+------------------ -+ -+QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and -+TPM 2. This interface requires ACPI and firmware support. (see the -+`PPI specification`_) -+ -+PPI enables a system administrator (root) to request a modification to -+the TPM upon reboot. The PPI specification defines the operation -+requests and the actions the firmware has to take. The system -+administrator passes the operation request number to the firmware -+through an ACPI interface which writes this number to a memory -+location that the firmware knows. Upon reboot, the firmware finds the -+number and sends commands to the TPM. The firmware writes the TPM -+result code and the operation request number to a memory location that -+ACPI can read from and pass the result on to the administrator. -+ -+The PPI specification defines a set of mandatory and optional -+operations for the firmware to implement. The ACPI interface also -+allows an administrator to list the supported operations. In QEMU the -+ACPI code is generated by QEMU, yet the firmware needs to implement -+support on a per-operations basis, and different firmwares may support -+a different subset. Therefore, QEMU introduces the virtual memory -+device for PPI where the firmware can indicate which operations it -+supports and ACPI can enable the ones that are supported and disable -+all others. This interface lies in main memory and has the following -+layout: -+ -+ +-------------+--------+--------+-------------------------------------------+ -+ | Field | Length | Offset | Description | -+ +=============+========+========+===========================================+ -+ | ``func`` | 0x100 | 0x000 | Firmware sets values for each supported | -+ | | | | operation. See defined values below. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``ppin`` | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. | -+ | | | | Not supported. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``ppip`` | 0x4 | 0x101 | ACPI function index to pass to SMM code. | -+ | | | | Set by ACPI. Not supported. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``pprp`` | 0x4 | 0x105 | Result of last executed operation. Set by | -+ | | | | firmware. See function index 5 for values.| -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``pprq`` | 0x4 | 0x109 | Operation request number to execute. See | -+ | | | | 'Physical Presence Interface Operation | -+ | | | | Summary' tables in specs. Set by ACPI. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``pprm`` | 0x4 | 0x10d | Operation request optional parameter. | -+ | | | | Values depend on operation. Set by ACPI. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``lppr`` | 0x4 | 0x111 | Last executed operation request number. | -+ | | | | Copied from pprq field by firmware. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``fret`` | 0x4 | 0x115 | Result code from SMM function. | -+ | | | | Not supported. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``res1`` | 0x40 | 0x119 | Reserved for future use | -+ +-------------+--------+--------+-------------------------------------------+ -+ |``next_step``| 0x1 | 0x159 | Operation to execute after reboot by | -+ | | | | firmware. Used by firmware. | -+ +-------------+--------+--------+-------------------------------------------+ -+ | ``movv`` | 0x1 | 0x15a | Memory overwrite variable | -+ +-------------+--------+--------+-------------------------------------------+ -+ -+The following values are supported for the ``func`` field. They -+correspond to the values used by ACPI function index 8. -+ -+ +----------+-------------------------------------------------------------+ -+ | Value | Description | -+ +==========+=============================================================+ -+ | 0 | Operation is not implemented. | -+ +----------+-------------------------------------------------------------+ -+ | 1 | Operation is only accessible through firmware. | -+ +----------+-------------------------------------------------------------+ -+ | 2 | Operation is blocked for OS by firmware configuration. | -+ +----------+-------------------------------------------------------------+ -+ | 3 | Operation is allowed and physically present user required. | -+ +----------+-------------------------------------------------------------+ -+ | 4 | Operation is allowed and physically present user is not | -+ | | required. | -+ +----------+-------------------------------------------------------------+ -+ -+The location of the table is given by the fw_cfg ``tpmppi_address`` -+field. The PPI memory region size is 0x400 (``TPM_PPI_ADDR_SIZE``) to -+leave enough room for future updates. -+ -+QEMU files related to TPM ACPI tables: -+ - ``hw/i386/acpi-build.c`` -+ - ``include/hw/acpi/tpm.h`` -+ -+TPM backend devices -+=================== -+ -+The TPM implementation is split into two parts, frontend and -+backend. The frontend part is the hardware interface, such as the TPM -+TIS interface described earlier, and the other part is the TPM backend -+interface. The backend interfaces implement the interaction with a TPM -+device, which may be a physical or an emulated device. The split -+between the front- and backend devices allows a frontend to be -+connected with any available backend. This enables the TIS interface -+to be used with the passthrough backend or the swtpm backend. -+ -+QEMU files related to TPM backends: -+ - ``backends/tpm.c`` -+ - ``include/sysemu/tpm_backend.h`` -+ - ``include/sysemu/tpm_backend_int.h`` -+ -+The QEMU TPM passthrough device -+------------------------------- -+ -+In case QEMU is run on Linux as the host operating system it is -+possible to make the hardware TPM device available to a single QEMU -+guest. In this case the user must make sure that no other program is -+using the device, e.g., /dev/tpm0, before trying to start QEMU with -+it. -+ -+The passthrough driver uses the host's TPM device for sending TPM -+commands and receiving responses from. Besides that it accesses the -+TPM device's sysfs entry for support of command cancellation. Since -+none of the state of a hardware TPM can be migrated between hosts, -+virtual machine migration is disabled when the TPM passthrough driver -+is used. -+ -+Since the host's TPM device will already be initialized by the host's -+firmware, certain commands, e.g. ``TPM_Startup()``, sent by the -+virtual firmware for device initialization, will fail. In this case -+the firmware should not use the TPM. -+ -+Sharing the device with the host is generally not a recommended usage -+scenario for a TPM device. The primary reason for this is that two -+operating systems can then access the device's single set of -+resources, such as platform configuration registers -+(PCRs). Applications or kernel security subsystems, such as the Linux -+Integrity Measurement Architecture (IMA), are not expecting to share -+PCRs. -+ -+QEMU files related to the TPM passthrough device: -+ - ``hw/tpm/tpm_passthrough.c`` -+ - ``hw/tpm/tpm_util.c`` -+ - ``hw/tpm/tpm_util.h`` -+ -+ -+Command line to start QEMU with the TPM passthrough device using the host's -+hardware TPM ``/dev/tpm0``: -+ -+.. code-block:: console -+ -+ qemu-system-x86_64 -display sdl -accel kvm \ -+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -+ -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ -+ -device tpm-tis,tpmdev=tpm0 test.img -+ -+ -+The following commands should result in similar output inside the VM -+with a Linux kernel that either has the TPM TIS driver built-in or -+available as a module: -+ -+.. code-block:: console -+ -+ # dmesg | grep -i tpm -+ [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) -+ -+ # dmesg | grep TCPA -+ [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ -+ BXPCTCPA 0000001 BXPC 00000001) -+ -+ # ls -l /dev/tpm* -+ crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 -+ -+ # find /sys/devices/ | grep pcrs$ | xargs cat -+ PCR-00: 35 4E 3B CE 23 9F 38 59 ... -+ ... -+ PCR-23: 00 00 00 00 00 00 00 00 ... -+ -+The QEMU TPM emulator device -+---------------------------- -+ -+The TPM emulator device uses an external TPM emulator called 'swtpm' -+for sending TPM commands to and receiving responses from. The swtpm -+program must have been started before trying to access it through the -+TPM emulator with QEMU. -+ -+The TPM emulator implements a command channel for transferring TPM -+commands and responses as well as a control channel over which control -+commands can be sent. (see the `SWTPM protocol`_ specification) -+ -+The control channel serves the purpose of resetting, initializing, and -+migrating the TPM state, among other things. -+ -+The swtpm program behaves like a hardware TPM and therefore needs to -+be initialized by the firmware running inside the QEMU virtual -+machine. One necessary step for initializing the device is to send -+the TPM_Startup command to it. SeaBIOS, for example, has been -+instrumented to initialize a TPM 1.2 or TPM 2 device using this -+command. -+ -+QEMU files related to the TPM emulator device: -+ - ``hw/tpm/tpm_emulator.c`` -+ - ``hw/tpm/tpm_util.c`` -+ - ``hw/tpm/tpm_util.h`` -+ -+The following commands start the swtpm with a UnixIO control channel over -+a socket interface. They do not need to be run as root. -+ -+.. code-block:: console -+ -+ mkdir /tmp/mytpm1 -+ swtpm socket --tpmstate dir=/tmp/mytpm1 \ -+ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ -+ --log level=20 -+ -+Command line to start QEMU with the TPM emulator device communicating -+with the swtpm (x86): -+ -+.. code-block:: console -+ -+ qemu-system-x86_64 -display sdl -accel kvm \ -+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -+ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -+ -device tpm-tis,tpmdev=tpm0 test.img -+ -+In case a pSeries machine is emulated, use the following command line: -+ -+.. code-block:: console -+ -+ qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \ -+ -m 1024 -bios slof.bin -boot menu=on \ -+ -nodefaults -device VGA -device pci-ohci -device usb-kbd \ -+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -+ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -+ -device tpm-spapr,tpmdev=tpm0 \ -+ -device spapr-vscsi,id=scsi0,reg=0x00002000 \ -+ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ -+ -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 -+ -+In case SeaBIOS is used as firmware, it should show the TPM menu item -+after entering the menu with 'ESC'. -+ -+.. code-block:: console -+ -+ Select boot device: -+ 1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD] -+ [...] -+ 5. Legacy option rom -+ -+ t. TPM Configuration -+ -+The following commands should result in similar output inside the VM -+with a Linux kernel that either has the TPM TIS driver built-in or -+available as a module: -+ -+.. code-block:: console -+ -+ # dmesg | grep -i tpm -+ [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) -+ -+ # dmesg | grep TCPA -+ [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ -+ BXPCTCPA 0000001 BXPC 00000001) -+ -+ # ls -l /dev/tpm* -+ crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 -+ -+ # find /sys/devices/ | grep pcrs$ | xargs cat -+ PCR-00: 35 4E 3B CE 23 9F 38 59 ... -+ ... -+ PCR-23: 00 00 00 00 00 00 00 00 ... -+ -+Migration with the TPM emulator -+=============================== -+ -+The TPM emulator supports the following types of virtual machine -+migration: -+ -+- VM save / restore (migration into a file) -+- Network migration -+- Snapshotting (migration into storage like QoW2 or QED) -+ -+The following command sequences can be used to test VM save / restore. -+ -+In a 1st terminal start an instance of a swtpm using the following command: -+ -+.. code-block:: console -+ -+ mkdir /tmp/mytpm1 -+ swtpm socket --tpmstate dir=/tmp/mytpm1 \ -+ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ -+ --log level=20 --tpm2 -+ -+In a 2nd terminal start the VM: -+ -+.. code-block:: console -+ -+ qemu-system-x86_64 -display sdl -accel kvm \ -+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -+ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -+ -device tpm-tis,tpmdev=tpm0 \ -+ -monitor stdio \ -+ test.img -+ -+Verify that the attached TPM is working as expected using applications -+inside the VM. -+ -+To store the state of the VM use the following command in the QEMU -+monitor in the 2nd terminal: -+ -+.. code-block:: console -+ -+ (qemu) migrate "exec:cat > testvm.bin" -+ (qemu) quit -+ -+At this point a file called ``testvm.bin`` should exists and the swtpm -+and QEMU processes should have ended. -+ -+To test 'VM restore' you have to start the swtpm with the same -+parameters as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 -+must now be passed again on the command line. -+ -+In the 1st terminal restart the swtpm with the same command line as -+before: -+ -+.. code-block:: console -+ -+ swtpm socket --tpmstate dir=/tmp/mytpm1 \ -+ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ -+ --log level=20 --tpm2 -+ -+In the 2nd terminal restore the state of the VM using the additional -+'-incoming' option. -+ -+.. code-block:: console -+ -+ qemu-system-x86_64 -display sdl -accel kvm \ -+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -+ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -+ -device tpm-tis,tpmdev=tpm0 \ -+ -incoming "exec:cat < testvm.bin" \ -+ test.img -+ -+Troubleshooting migration -+------------------------- -+ -+There are several reasons why migration may fail. In case of problems, -+please ensure that the command lines adhere to the following rules -+and, if possible, that identical versions of QEMU and swtpm are used -+at all times. -+ -+VM save and restore: -+ -+ - QEMU command line parameters should be identical apart from the -+ '-incoming' option on VM restore -+ -+ - swtpm command line parameters should be identical -+ -+VM migration to 'localhost': -+ -+ - QEMU command line parameters should be identical apart from the -+ '-incoming' option on the destination side -+ -+ - swtpm command line parameters should point to two different -+ directories on the source and destination swtpm (--tpmstate dir=...) -+ (especially if different versions of libtpms were to be used on the -+ same machine). -+ -+VM migration across the network: -+ -+ - QEMU command line parameters should be identical apart from the -+ '-incoming' option on the destination side -+ -+ - swtpm command line parameters should be identical -+ -+VM Snapshotting: -+ - QEMU command line parameters should be identical -+ -+ - swtpm command line parameters should be identical -+ -+ -+Besides that, migration failure reasons on the swtpm level may include -+the following: -+ -+ - the versions of the swtpm on the source and destination sides are -+ incompatible -+ -+ - downgrading of TPM state may not be supported -+ -+ - the source and destination libtpms were compiled with different -+ compile-time options and the destination side refuses to accept the -+ state -+ -+ - different migration keys are used on the source and destination side -+ and the destination side cannot decrypt the migrated state -+ (swtpm ... --migration-key ... ) -+ -+ -+.. _TIS specification: -+ https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/ -+ -+.. _CRB specification: -+ https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ -+ -+ -+.. _ACPI specification: -+ https://trustedcomputinggroup.org/tcg-acpi-specification/ -+ -+.. _PPI specification: -+ https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/ -+ -+.. _SWTPM protocol: -+ https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod -diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt -deleted file mode 100644 -index 9c3e67d8..00000000 ---- a/docs/specs/tpm.txt -+++ /dev/null -@@ -1,445 +0,0 @@ --QEMU TPM Device --=============== -- --= Guest-side Hardware Interface = -- --The QEMU TPM emulation implements a TPM TIS hardware interface following the --Trusted Computing Group's specification "TCG PC Client Specific TPM Interface --Specification (TIS)", Specification Version 1.3, 21 March 2013. This --specification, or a later version of it, can be accessed from the following --URL: -- --https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/ -- --The TIS interface makes a memory mapped IO region in the area 0xfed40000 - --0xfed44fff available to the guest operating system. -- -- --QEMU files related to TPM TIS interface: -- - hw/tpm/tpm_tis.c -- - hw/tpm/tpm_tis.h -- -- --QEMU also implements a TPM CRB interface following the Trusted Computing --Group's specification "TCG PC Client Platform TPM Profile (PTP) --Specification", Family "2.0", Level 00 Revision 01.03 v22, May 22, 2017. --This specification, or a later version of it, can be accessed from the --following URL: -- --https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ -- --The CRB interface makes a memory mapped IO region in the area 0xfed40000 - --0xfed40fff (1 locality) available to the guest operating system. -- --QEMU files related to TPM CRB interface: -- - hw/tpm/tpm_crb.c -- -- --pSeries (ppc64) machines offer a tpm-spapr device model. -- --QEMU files related to the SPAPR interface: -- - hw/tpm/tpm_spapr.c -- --= fw_cfg interface = -- --The bios/firmware may read the "etc/tpm/config" fw_cfg entry for --configuring the guest appropriately. -- --The entry of 6 bytes has the following content, in little-endian: -- -- #define TPM_VERSION_UNSPEC 0 -- #define TPM_VERSION_1_2 1 -- #define TPM_VERSION_2_0 2 -- -- #define TPM_PPI_VERSION_NONE 0 -- #define TPM_PPI_VERSION_1_30 1 -- -- struct FwCfgTPMConfig { -- uint32_t tpmppi_address; /* PPI memory location */ -- uint8_t tpm_version; /* TPM version */ -- uint8_t tpmppi_version; /* PPI version */ -- }; -- --= ACPI Interface = -- --The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT and passes --it into the guest through the fw_cfg device. The device description contains --the base address of the TIS interface 0xfed40000 and the size of the MMIO area --(0x5000). In case a TPM2 is used by QEMU, a TPM2 ACPI table is also provided. --The device is described to be used in polling mode rather than interrupt mode --primarily because no unused IRQ could be found. -- --To support measurement logs to be written by the firmware, e.g. SeaBIOS, a TCPA --table is implemented. This table provides a 64kb buffer where the firmware can --write its log into. For TPM 2 only a more recent version of the TPM2 table --provides support for measurements logs and a TCPA table does not need to be --created. -- --The TCPA and TPM2 ACPI tables follow the Trusted Computing Group specification --"TCG ACPI Specification" Family "1.2" and "2.0", Level 00 Revision 00.37. This --specification, or a later version of it, can be accessed from the following --URL: -- --https://trustedcomputinggroup.org/tcg-acpi-specification/ -- --== ACPI PPI Interface == -- --QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and TPM 2. This --interface requires ACPI and firmware support. The specification can be found at --the following URL: -- --https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/ -- --PPI enables a system administrator (root) to request a modification to the --TPM upon reboot. The PPI specification defines the operation requests and the --actions the firmware has to take. The system administrator passes the operation --request number to the firmware through an ACPI interface which writes this --number to a memory location that the firmware knows. Upon reboot, the firmware --finds the number and sends commands to the TPM. The firmware writes the TPM --result code and the operation request number to a memory location that ACPI can --read from and pass the result on to the administrator. -- --The PPI specification defines a set of mandatory and optional operations for --the firmware to implement. The ACPI interface also allows an administrator to --list the supported operations. In QEMU the ACPI code is generated by QEMU, yet --the firmware needs to implement support on a per-operations basis, and --different firmwares may support a different subset. Therefore, QEMU introduces --the virtual memory device for PPI where the firmware can indicate which --operations it supports and ACPI can enable the ones that are supported and --disable all others. This interface lies in main memory and has the following --layout: -- -- +----------+--------+--------+-------------------------------------------+ -- | Field | Length | Offset | Description | -- +----------+--------+--------+-------------------------------------------+ -- | func | 0x100 | 0x000 | Firmware sets values for each supported | -- | | | | operation. See defined values below. | -- +----------+--------+--------+-------------------------------------------+ -- | ppin | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. | -- | | | | Not supported. | -- +----------+--------+--------+-------------------------------------------+ -- | ppip | 0x4 | 0x101 | ACPI function index to pass to SMM code. | -- | | | | Set by ACPI. Not supported. | -- +----------+--------+--------+-------------------------------------------+ -- | pprp | 0x4 | 0x105 | Result of last executed operation. Set by | -- | | | | firmware. See function index 5 for values.| -- +----------+--------+--------+-------------------------------------------+ -- | pprq | 0x4 | 0x109 | Operation request number to execute. See | -- | | | | 'Physical Presence Interface Operation | -- | | | | Summary' tables in specs. Set by ACPI. | -- +----------+--------+--------+-------------------------------------------+ -- | pprm | 0x4 | 0x10d | Operation request optional parameter. | -- | | | | Values depend on operation. Set by ACPI. | -- +----------+--------+--------+-------------------------------------------+ -- | lppr | 0x4 | 0x111 | Last executed operation request number. | -- | | | | Copied from pprq field by firmware. | -- +----------+--------+--------+-------------------------------------------+ -- | fret | 0x4 | 0x115 | Result code from SMM function. | -- | | | | Not supported. | -- +----------+--------+--------+-------------------------------------------+ -- | res1 | 0x40 | 0x119 | Reserved for future use | -- +----------+--------+--------+-------------------------------------------+ -- | next_step| 0x1 | 0x159 | Operation to execute after reboot by | -- | | | | firmware. Used by firmware. | -- +----------+--------+--------+-------------------------------------------+ -- | movv | 0x1 | 0x15a | Memory overwrite variable | -- +----------+--------+--------+-------------------------------------------+ -- -- The following values are supported for the 'func' field. They correspond -- to the values used by ACPI function index 8. -- -- +----------+-------------------------------------------------------------+ -- | value | Description | -- +----------+-------------------------------------------------------------+ -- | 0 | Operation is not implemented. | -- +----------+-------------------------------------------------------------+ -- | 1 | Operation is only accessible through firmware. | -- +----------+-------------------------------------------------------------+ -- | 2 | Operation is blocked for OS by firmware configuration. | -- +----------+-------------------------------------------------------------+ -- | 3 | Operation is allowed and physically present user required. | -- +----------+-------------------------------------------------------------+ -- | 4 | Operation is allowed and physically present user is not | -- | | required. | -- +----------+-------------------------------------------------------------+ -- --The location of the table is given by the fw_cfg tpmppi_address field. --The PPI memory region size is 0x400 (TPM_PPI_ADDR_SIZE) to leave --enough room for future updates. -- -- --QEMU files related to TPM ACPI tables: -- - hw/i386/acpi-build.c -- - include/hw/acpi/tpm.h -- -- --= TPM backend devices = -- --The TPM implementation is split into two parts, frontend and backend. The --frontend part is the hardware interface, such as the TPM TIS interface --described earlier, and the other part is the TPM backend interface. The backend --interfaces implement the interaction with a TPM device, which may be a physical --or an emulated device. The split between the front- and backend devices allows --a frontend to be connected with any available backend. This enables the TIS --interface to be used with the passthrough backend or the (future) swtpm backend. -- -- --QEMU files related to TPM backends: -- - backends/tpm.c -- - include/sysemu/tpm_backend.h -- - include/sysemu/tpm_backend_int.h -- -- --== The QEMU TPM passthrough device == -- --In case QEMU is run on Linux as the host operating system it is possible to --make the hardware TPM device available to a single QEMU guest. In this case the --user must make sure that no other program is using the device, e.g., /dev/tpm0, --before trying to start QEMU with it. -- --The passthrough driver uses the host's TPM device for sending TPM commands --and receiving responses from. Besides that it accesses the TPM device's sysfs --entry for support of command cancellation. Since none of the state of a --hardware TPM can be migrated between hosts, virtual machine migration is --disabled when the TPM passthrough driver is used. -- --Since the host's TPM device will already be initialized by the host's firmware, --certain commands, e.g. TPM_Startup(), sent by the virtual firmware for device --initialization, will fail. In this case the firmware should not use the TPM. -- --Sharing the device with the host is generally not a recommended usage scenario --for a TPM device. The primary reason for this is that two operating systems can --then access the device's single set of resources, such as platform configuration --registers (PCRs). Applications or kernel security subsystems, such as the --Linux Integrity Measurement Architecture (IMA), are not expecting to share PCRs. -- -- --QEMU files related to the TPM passthrough device: -- - hw/tpm/tpm_passthrough.c -- - hw/tpm/tpm_util.c -- - hw/tpm/tpm_util.h -- -- --Command line to start QEMU with the TPM passthrough device using the host's --hardware TPM /dev/tpm0: -- --qemu-system-x86_64 -display sdl -accel kvm \ -- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -- -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ -- -device tpm-tis,tpmdev=tpm0 test.img -- --The following commands should result in similar output inside the VM with a --Linux kernel that either has the TPM TIS driver built-in or available as a --module: -- --#> dmesg | grep -i tpm --[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) -- --#> dmesg | grep TCPA --[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ -- BXPCTCPA 0000001 BXPC 00000001) -- --#> ls -l /dev/tpm* --crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 -- --#> find /sys/devices/ | grep pcrs$ | xargs cat --PCR-00: 35 4E 3B CE 23 9F 38 59 ... --... --PCR-23: 00 00 00 00 00 00 00 00 ... -- -- --== The QEMU TPM emulator device == -- --The TPM emulator device uses an external TPM emulator called 'swtpm' for --sending TPM commands to and receiving responses from. The swtpm program --must have been started before trying to access it through the TPM emulator --with QEMU. -- --The TPM emulator implements a command channel for transferring TPM commands --and responses as well as a control channel over which control commands can --be sent. The specification for the control channel can be found here: -- --https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod -- -- --The control channel serves the purpose of resetting, initializing, and --migrating the TPM state, among other things. -- --The swtpm program behaves like a hardware TPM and therefore needs to be --initialized by the firmware running inside the QEMU virtual machine. --One necessary step for initializing the device is to send the TPM_Startup --command to it. SeaBIOS, for example, has been instrumented to initialize --a TPM 1.2 or TPM 2 device using this command. -- -- --QEMU files related to the TPM emulator device: -- - hw/tpm/tpm_emulator.c -- - hw/tpm/tpm_util.c -- - hw/tpm/tpm_util.h -- -- --The following commands start the swtpm with a UnixIO control channel over --a socket interface. They do not need to be run as root. -- --mkdir /tmp/mytpm1 --swtpm socket --tpmstate dir=/tmp/mytpm1 \ -- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ -- --log level=20 -- --Command line to start QEMU with the TPM emulator device communicating with --the swtpm (x86): -- --qemu-system-x86_64 -display sdl -accel kvm \ -- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -- -tpmdev emulator,id=tpm0,chardev=chrtpm \ -- -device tpm-tis,tpmdev=tpm0 test.img -- --In case a pSeries machine is emulated, use the following command line: -- --qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \ -- -m 1024 -bios slof.bin -boot menu=on \ -- -nodefaults -device VGA -device pci-ohci -device usb-kbd \ -- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -- -tpmdev emulator,id=tpm0,chardev=chrtpm \ -- -device tpm-spapr,tpmdev=tpm0 \ -- -device spapr-vscsi,id=scsi0,reg=0x00002000 \ -- -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ -- -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 -- -- --In case SeaBIOS is used as firmware, it should show the TPM menu item --after entering the menu with 'ESC'. -- --Select boot device: --1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD] --[...] --5. Legacy option rom -- --t. TPM Configuration -- -- --The following commands should result in similar output inside the VM with a --Linux kernel that either has the TPM TIS driver built-in or available as a --module: -- --#> dmesg | grep -i tpm --[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) -- --#> dmesg | grep TCPA --[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ -- BXPCTCPA 0000001 BXPC 00000001) -- --#> ls -l /dev/tpm* --crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 -- --#> find /sys/devices/ | grep pcrs$ | xargs cat --PCR-00: 35 4E 3B CE 23 9F 38 59 ... --... --PCR-23: 00 00 00 00 00 00 00 00 ... -- -- --=== Migration with the TPM emulator === -- --The TPM emulator supports the following types of virtual machine migration: -- --- VM save / restore (migration into a file) --- Network migration --- Snapshotting (migration into storage like QoW2 or QED) -- --The following command sequences can be used to test VM save / restore. -- -- --In a 1st terminal start an instance of a swtpm using the following command: -- --mkdir /tmp/mytpm1 --swtpm socket --tpmstate dir=/tmp/mytpm1 \ -- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ -- --log level=20 --tpm2 -- --In a 2nd terminal start the VM: -- --qemu-system-x86_64 -display sdl -accel kvm \ -- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -- -tpmdev emulator,id=tpm0,chardev=chrtpm \ -- -device tpm-tis,tpmdev=tpm0 \ -- -monitor stdio \ -- test.img -- --Verify that the attached TPM is working as expected using applications inside --the VM. -- --To store the state of the VM use the following command in the QEMU monitor in --the 2nd terminal: -- --(qemu) migrate "exec:cat > testvm.bin" --(qemu) quit -- --At this point a file called 'testvm.bin' should exists and the swtpm and QEMU --processes should have ended. -- --To test 'VM restore' you have to start the swtpm with the same parameters --as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 must now be --passed again on the command line. -- --In the 1st terminal restart the swtpm with the same command line as before: -- --swtpm socket --tpmstate dir=/tmp/mytpm1 \ -- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ -- --log level=20 --tpm2 -- --In the 2nd terminal restore the state of the VM using the additional --'-incoming' option. -- --qemu-system-x86_64 -display sdl -accel kvm \ -- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -- -tpmdev emulator,id=tpm0,chardev=chrtpm \ -- -device tpm-tis,tpmdev=tpm0 \ -- -incoming "exec:cat < testvm.bin" \ -- test.img -- -- --Troubleshooting migration: -- --There are several reasons why migration may fail. In case of problems, --please ensure that the command lines adhere to the following rules and, --if possible, that identical versions of QEMU and swtpm are used at all --times. -- --VM save and restore: -- - QEMU command line parameters should be identical apart from the -- '-incoming' option on VM restore -- - swtpm command line parameters should be identical -- --VM migration to 'localhost': -- - QEMU command line parameters should be identical apart from the -- '-incoming' option on the destination side -- - swtpm command line parameters should point to two different -- directories on the source and destination swtpm (--tpmstate dir=...) -- (especially if different versions of libtpms were to be used on the -- same machine). -- --VM migration across the network: -- - QEMU command line parameters should be identical apart from the -- '-incoming' option on the destination side -- - swtpm command line parameters should be identical -- --VM Snapshotting: -- - QEMU command line parameters should be identical -- - swtpm command line parameters should be identical -- -- --Besides that, migration failure reasons on the swtpm level may include --the following: -- -- - the versions of the swtpm on the source and destination sides are -- incompatible -- - downgrading of TPM state may not be supported -- - the source and destination libtpms were compiled with different -- compile-time options and the destination side refuses to accept the -- state -- - different migration keys are used on the source and destination side -- and the destination side cannot decrypt the migrated state -- (swtpm ... --migration-key ... ) --- -2.23.0 - diff --git a/docs-sphinx-depfile.py-Handle-env.doc2path-returning.patch b/docs-sphinx-depfile.py-Handle-env.doc2path-returning.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb84a7040e2c143ea91173d1a15cfbb804fecd38 --- /dev/null +++ b/docs-sphinx-depfile.py-Handle-env.doc2path-returning.patch @@ -0,0 +1,52 @@ +From e16c3aa63a203e376a40404314252a11e85a5bda Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Mon, 29 Jul 2024 13:05:33 +0100 +Subject: [PATCH] docs/sphinx/depfile.py: Handle env.doc2path() returning a + Path not a str +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In newer versions of Sphinx the env.doc2path() API is going to change +to return a Path object rather than a str. This was originally visible +in Sphinx 8.0.0rc1, but has been rolled back for the final 8.0.0 +release. However it will probably emit a deprecation warning and is +likely to change for good in 9.0: + https://github.com/sphinx-doc/sphinx/issues/12686 + +Our use in depfile.py assumes a str, and if it is passed a Path +it will fall over: + Handler for event 'build-finished' threw an exception (exception: unsupported operand type(s) for +: 'PosixPath' and 'str') + +Wrapping the env.doc2path() call in str() will coerce a Path object +to the str we expect, and have no effect in older Sphinx versions +that do return a str. + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2458 +Signed-off-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240729120533.2486427-1-peter.maydell@linaro.org> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 48e5b5f994bccf161dd88a67fdd819d4bfb400f1) +Signed-off-by: zhujun2 +--- + docs/sphinx/depfile.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/sphinx/depfile.py b/docs/sphinx/depfile.py +index afdcbcec6e..e74be6af98 100644 +--- a/docs/sphinx/depfile.py ++++ b/docs/sphinx/depfile.py +@@ -19,7 +19,7 @@ + + def get_infiles(env): + for x in env.found_docs: +- yield env.doc2path(x) ++ yield str(env.doc2path(x)) + yield from ((os.path.join(env.srcdir, dep) + for dep in env.dependencies[x])) + for mod in sys.modules.values(): +-- +2.41.0.windows.1 + diff --git a/docs-tools-qemu-img.rst-fix-typo-sumarizes.patch b/docs-tools-qemu-img.rst-fix-typo-sumarizes.patch new file mode 100644 index 0000000000000000000000000000000000000000..780af09140b5285f0af19c228ab21cb1c5231f51 --- /dev/null +++ b/docs-tools-qemu-img.rst-fix-typo-sumarizes.patch @@ -0,0 +1,31 @@ +From ac7182ca1b9ed7dbb524da734a9f426b2ca07503 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Mon, 21 Oct 2024 09:48:30 +0800 +Subject: [PATCH] docs/tools/qemu-img.rst: fix typo (sumarizes) + +cheery-pick from 8a8be21dde814e7cef43acac8140a7ccd0c4f6fb + +Signed-off-by: Samuel Tardieu +Reviewed-by: Zhao Liu +Signed-off-by: Michael Tokarev +Signed-off-by: Zhang Jiao +--- + docs/tools/qemu-img.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst +index 4459c065f1..3653adb963 100644 +--- a/docs/tools/qemu-img.rst ++++ b/docs/tools/qemu-img.rst +@@ -406,7 +406,7 @@ Command description: + Compare exits with ``0`` in case the images are equal and with ``1`` + in case the images differ. Other exit codes mean an error occurred during + execution and standard error output should contain an error message. +- The following table sumarizes all exit codes of the compare subcommand: ++ The following table summarizes all exit codes of the compare subcommand: + + 0 + Images are identical (or requested help was printed) +-- +2.41.0.windows.1 + diff --git a/drive-backup-create-do_backup_common.patch b/drive-backup-create-do_backup_common.patch deleted file mode 100644 index cccbc2e967c1529f5072ac64cbad1f6de3c3aee1..0000000000000000000000000000000000000000 --- a/drive-backup-create-do_backup_common.patch +++ /dev/null @@ -1,163 +0,0 @@ -From 98dcfbd5ee53f3be705df7acf37e8706533f494f Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:52 -0400 -Subject: [PATCH] drive-backup: create do_backup_common - -Create a common core that comprises the actual meat of what the backup API -boundary needs to do, and then switch drive-backup to use it. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190709232550.10724-3-jsnow@redhat.com -Signed-off-by: John Snow ---- - blockdev.c | 102 ++++++++++++++++++++++++++++++----------------------- - 1 file changed, 57 insertions(+), 45 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 99c92b96d2..a29838a1c8 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3469,20 +3469,16 @@ out: - aio_context_release(aio_context); - } - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp) -+/* Common QMP interface for drive-backup and blockdev-backup */ -+static BlockJob *do_backup_common(BackupCommon *backup, -+ BlockDriverState *bs, -+ BlockDriverState *target_bs, -+ AioContext *aio_context, -+ JobTxn *txn, Error **errp) - { -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- BlockDriverState *source = NULL; - BlockJob *job = NULL; - BdrvDirtyBitmap *bmap = NULL; -- AioContext *aio_context; -- QDict *options = NULL; -- Error *local_err = NULL; -- int flags, job_flags = JOB_DEFAULT; -- int64_t size; -- bool set_backing_hd = false; -+ int job_flags = JOB_DEFAULT; - int ret; - - if (!backup->has_speed) { -@@ -3494,9 +3490,6 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - if (!backup->has_on_target_error) { - backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT; - } -- if (!backup->has_mode) { -- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -- } - if (!backup->has_job_id) { - backup->job_id = NULL; - } -@@ -3510,6 +3503,54 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - backup->compress = false; - } - -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ return NULL; -+ } -+ -+ if (backup->has_bitmap) { -+ bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); -+ if (!bmap) { -+ error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); -+ return NULL; -+ } -+ if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { -+ return NULL; -+ } -+ } -+ -+ if (!backup->auto_finalize) { -+ job_flags |= JOB_MANUAL_FINALIZE; -+ } -+ if (!backup->auto_dismiss) { -+ job_flags |= JOB_MANUAL_DISMISS; -+ } -+ -+ job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, -+ backup->sync, bmap, backup->compress, -+ backup->on_source_error, backup->on_target_error, -+ job_flags, NULL, NULL, txn, errp); -+ return job; -+} -+ -+static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -+ Error **errp) -+{ -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; -+ BlockDriverState *source = NULL; -+ BlockJob *job = NULL; -+ AioContext *aio_context; -+ QDict *options = NULL; -+ Error *local_err = NULL; -+ int flags; -+ int64_t size; -+ bool set_backing_hd = false; -+ -+ if (!backup->has_mode) { -+ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -+ } -+ - bs = bdrv_lookup_bs(backup->device, backup->device, errp); - if (!bs) { - return NULL; -@@ -3585,12 +3626,6 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - goto out; - } - -- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -- if (ret < 0) { -- bdrv_unref(target_bs); -- goto out; -- } -- - if (set_backing_hd) { - bdrv_set_backing_hd(target_bs, source, &local_err); - if (local_err) { -@@ -3598,31 +3633,8 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - } - } - -- if (backup->has_bitmap) { -- bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); -- if (!bmap) { -- error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); -- goto unref; -- } -- if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { -- goto unref; -- } -- } -- if (!backup->auto_finalize) { -- job_flags |= JOB_MANUAL_FINALIZE; -- } -- if (!backup->auto_dismiss) { -- job_flags |= JOB_MANUAL_DISMISS; -- } -- -- job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, -- backup->sync, bmap, backup->compress, -- backup->on_source_error, backup->on_target_error, -- job_flags, NULL, NULL, txn, &local_err); -- if (local_err != NULL) { -- error_propagate(errp, local_err); -- goto unref; -- } -+ job = do_backup_common(qapi_DriveBackup_base(backup), -+ bs, target_bs, aio_context, txn, errp); - - unref: - bdrv_unref(target_bs); --- -2.27.0 - diff --git a/edu-fix-DMA-range-upper-bound-check.patch b/edu-fix-DMA-range-upper-bound-check.patch new file mode 100644 index 0000000000000000000000000000000000000000..60ab815ed78b40addde2dce51e0d39f94732b593 --- /dev/null +++ b/edu-fix-DMA-range-upper-bound-check.patch @@ -0,0 +1,47 @@ +From edf3b2b0a9b9aa992592951a979d1b4642026fe5 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Fri, 18 Oct 2024 09:12:50 +0800 +Subject: [PATCH] edu: fix DMA range upper bound check + +cheery-pick from 2c5107e1b455d4a157124f021826ead4e04b4aea + +The edu_check_range function checks that start <= end1 < end2, where +end1 is the upper bound (exclusive) of the guest-supplied DMA range and +end2 is the upper bound (exclusive) of the device's allowed DMA range. +When the guest tries to transfer exactly DMA_SIZE (4096) bytes, end1 +will be equal to end2, so the check fails and QEMU aborts with this +puzzling error message (newlines added for formatting): + + qemu: hardware error: EDU: DMA range + 0x0000000000040000-0x0000000000040fff out of bounds + (0x0000000000040000-0x0000000000040fff)! + +By checking end1 <= end2 instead, guests will be allowed to transfer +exactly 4096 bytes. It is not necessary to explicitly check for +start <= end1 because the previous two checks (within(addr, start, end2) +and end1 > addr) imply start < end1. + +Fixes: b30934cb52a7 ("hw: misc, add educational driver", 2015-01-21) +Signed-off-by: Max Erenberg +Signed-off-by: Michael Tokarev +Signed-off-by: Zhang Jiao +--- + hw/misc/edu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/misc/edu.c b/hw/misc/edu.c +index a1f8bc77e7..e64a246d3f 100644 +--- a/hw/misc/edu.c ++++ b/hw/misc/edu.c +@@ -115,7 +115,7 @@ static void edu_check_range(uint64_t addr, uint64_t size1, uint64_t start, + uint64_t end2 = start + size2; + + if (within(addr, start, end2) && +- end1 > addr && within(end1, start, end2)) { ++ end1 > addr && end1 <= end2) { + return; + } + +-- +2.41.0.windows.1 + diff --git a/ehci-fix-queue-dev-null-ptr-dereference.patch b/ehci-fix-queue-dev-null-ptr-dereference.patch deleted file mode 100644 index 18114e984199f44d7689e939152ed160d6dc0292..0000000000000000000000000000000000000000 --- a/ehci-fix-queue-dev-null-ptr-dereference.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 901ac0dee4b17890db815d143a8efeeac5d105f7 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Wed, 21 Aug 2019 10:53:19 +0200 -Subject: [PATCH 1/5] ehci: fix queue->dev null ptr dereference - -In case we don't have a device for an active queue, just skip -processing the queue (same we do for inactive queues) and log -a guest bug. - -Reported-by: Guenter Roeck -Signed-off-by: Gerd Hoffmann -Tested-by: Guenter Roeck -Message-id: 20190821085319.13711-1-kraxel@redhat.com -(cherry-picked from commit 1be344b7ad25d572dadeee46d80f0103354352b2) ---- - hw/usb/hcd-ehci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c -index 62dab05..5f089f3 100644 ---- a/hw/usb/hcd-ehci.c -+++ b/hw/usb/hcd-ehci.c -@@ -1834,6 +1834,9 @@ static int ehci_state_fetchqtd(EHCIQueue *q) - ehci_set_state(q->ehci, q->async, EST_EXECUTING); - break; - } -+ } else if (q->dev == NULL) { -+ ehci_trace_guest_bug(q->ehci, "no device attached to queue"); -+ ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH); - } else { - p = ehci_alloc_packet(q); - p->qtdaddr = q->qtdaddr; --- -1.8.3.1 - diff --git a/elf2dmp-Fix-memory-leak-on-main-error-paths.patch b/elf2dmp-Fix-memory-leak-on-main-error-paths.patch deleted file mode 100644 index 219cec31e799c1f32912a717982740f08c70a3c0..0000000000000000000000000000000000000000 --- a/elf2dmp-Fix-memory-leak-on-main-error-paths.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 1f63f8c20a4cb7b752981ef07b2614bbea828b30 Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Wed, 26 Aug 2020 18:15:53 +0800 -Subject: [PATCH] elf2dmp: Fix memory leak on main() error paths - -The 'kdgb' is allocating memory in get_kdbg(), but it is not freed -in both fill_header() and fill_context() failed branches, fix it. - -Signed-off-by: AlexChen -Reviewed-by: Li Qiang -Reviewed-by: Viktor Prutyanov -Reviewed-by: Thomas Huth -Message-Id: <5F463659.8080101@huawei.com> -Signed-off-by: Laurent Vivier -(cherry-picked from commit 885538fdc9) ---- - contrib/elf2dmp/main.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c -index 9a2dbc2902..ac746e49e0 100644 ---- a/contrib/elf2dmp/main.c -+++ b/contrib/elf2dmp/main.c -@@ -568,12 +568,12 @@ int main(int argc, char *argv[]) - if (fill_header(&header, &ps, &vs, KdDebuggerDataBlock, kdbg, - KdVersionBlock, qemu_elf.state_nr)) { - err = 1; -- goto out_pdb; -+ goto out_kdbg; - } - - if (fill_context(kdbg, &vs, &qemu_elf)) { - err = 1; -- goto out_pdb; -+ goto out_kdbg; - } - - if (write_dump(&ps, &header, argv[2])) { --- -2.27.0 - diff --git a/enable-virtio-device-mmio-access-and-wait-util-virti.patch b/enable-virtio-device-mmio-access-and-wait-util-virti.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c3057fe320c927281c98d01910bc7693b2906e3 --- /dev/null +++ b/enable-virtio-device-mmio-access-and-wait-util-virti.patch @@ -0,0 +1,87 @@ +From 0a179313fde1f71a8a7520c3d3149aa2dc4e66b2 Mon Sep 17 00:00:00 2001 +From: fangyi +Date: Sat, 26 Oct 2024 15:32:02 +0800 +Subject: [PATCH] enable virtio device mmio access and wait util virtio device + reset done + +--- + roms/seabios/src/hw/blockcmd.h | 2 +- + roms/seabiossrc/hw/virtio-pci.c | 32 +++++++++++++++++++++++++++----- + 2 files changed, 28 insertions(+), 6 deletions(-) + +diff --git a/roms/seabios/src/hw/blockcmd.h b/roms/seabios/src/hw/blockcmd.h +index f18543ed..1063e6ab 100644 +--- a/roms/seabios/src/hw/blockcmd.h ++++ b/roms/seabios/src/hw/blockcmd.h +@@ -30,7 +30,7 @@ struct cdb_read_capacity { + struct cdbres_read_capacity { + u32 sectors; + u32 blksize; +-} PACKED; ++} __attribute__((aligned(4))); + + #define CDB_CMD_TEST_UNIT_READY 0x00 + #define CDB_CMD_INQUIRY 0x12 +diff --git a/roms/seabios/src/hw/virtio-pci.c b/roms/seabios/src/hw/virtio-pci.c +index 89a4f505..5d7b8428 100644 +--- a/roms/seabios/src/hw/virtio-pci.c ++++ b/roms/seabios/src/hw/virtio-pci.c +@@ -15,6 +15,7 @@ + * See the COPYING file in the top-level directory. + */ + ++#include "util.h" // msleep + #include "config.h" // CONFIG_DEBUG_LEVEL + #include "malloc.h" // free + #include "output.h" // dprintf +@@ -271,6 +272,10 @@ void vp_reset(struct vp_device *vp) + vp_read(&vp->common, virtio_mmio_cfg, irq_status); + } else if (vp->use_modern) { + vp_write(&vp->common, virtio_pci_common_cfg, device_status, 0); ++ dprintf(1, "vp start reset\n"); ++ while (vp_get_status(vp) != 0) ++ msleep(2); ++ dprintf(1, "vp reset finished\n"); + vp_read(&vp->isr, virtio_pci_isr, isr); + } else { + vp_write(&vp->legacy, virtio_pci_legacy, status, 0); +@@ -535,14 +540,31 @@ void vp_init_simple(struct vp_device *vp, struct pci_device *pci) + } else { + dprintf(1, "pci dev %pP using legacy (0.9.5) virtio mode\n", pci); + vp->legacy.bar = 0; +- vp->legacy.ioaddr = pci_enable_iobar(pci, PCI_BASE_ADDRESS_0); +- if (!vp->legacy.ioaddr) +- return; +- vp->legacy.mode = VP_ACCESS_IO; ++ ++ /* ++ * Extend the legacy virtio interface to support MMIO bar which ++ * is required by SR-IOV. ++ */ ++ addr = pci_config_readl(pci->bdf, PCI_BASE_ADDRESS_0); ++ if (addr & PCI_BASE_ADDRESS_SPACE_IO) { ++ dprintf(1, "legacy virtio: I/O BAR used\n"); ++ vp->legacy.ioaddr = pci_enable_iobar(pci, PCI_BASE_ADDRESS_0); ++ if (!vp->legacy.ioaddr) ++ return; ++ vp->legacy.mode = VP_ACCESS_IO; ++ } else { ++ dprintf(1, "legacy virtio: MMIO BAR used\n"); ++ vp->legacy.memaddr = pci_enable_membar(pci, PCI_BASE_ADDRESS_0); ++ if (!vp->legacy.memaddr) ++ return; ++ vp->legacy.mode = VP_ACCESS_MMIO; ++ } + } + +- vp_reset(vp); + pci_enable_busmaster(pci); ++ dprintf(1, "pci dev %pP start reset\n", pci); ++ vp_reset(vp); ++ dprintf(1, "pci dev %pP finish reset\n", pci); + vp_set_status(vp, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER ); + } +-- +2.43.0 + diff --git a/es1370-check-total-frame-count-against-current-frame.patch b/es1370-check-total-frame-count-against-current-frame.patch deleted file mode 100644 index fb1e7a7cdfa6f8046b6aa2ebb270b557dcae14a5..0000000000000000000000000000000000000000 --- a/es1370-check-total-frame-count-against-current-frame.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 22bbf1a90ac11fe30e1665c09f9ad904683b6ddc Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Fri, 15 May 2020 01:36:08 +0530 -Subject: [PATCH 1/9] es1370: check total frame count against current frame - -A guest user may set channel frame count via es1370_write() -such that, in es1370_transfer_audio(), total frame count -'size' is lesser than the number of frames that are processed -'cnt'. - - int cnt = d->frame_cnt >> 16; - int size = d->frame_cnt & 0xffff; - -if (size < cnt), it results in incorrect calculations leading -to OOB access issue(s). Add check to avoid it. - -Reported-by: Ren Ding -Reported-by: Hanqing Zhao -Signed-off-by: Prasad J Pandit -Message-id: 20200514200608.1744203-1-ppandit@redhat.com -Signed-off-by: Gerd Hoffmann ---- - hw/audio/es1370.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c -index 260c142b70..eff7d03ae1 100644 ---- a/hw/audio/es1370.c -+++ b/hw/audio/es1370.c -@@ -643,6 +643,9 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, - int csc_bytes = (csc + 1) << d->shift; - int cnt = d->frame_cnt >> 16; - int size = d->frame_cnt & 0xffff; -+ if (size < cnt) { -+ return; -+ } - int left = ((size - cnt + 1) << 2) + d->leftover; - int transferred = 0; - int temp = audio_MIN (max, audio_MIN (left, csc_bytes)); -@@ -651,7 +654,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, - addr += (cnt << 2) + d->leftover; - - if (index == ADC_CHANNEL) { -- while (temp) { -+ while (temp > 0) { - int acquired, to_copy; - - to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf)); -@@ -669,7 +672,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, - else { - SWVoiceOut *voice = s->dac_voice[index]; - -- while (temp) { -+ while (temp > 0) { - int copied, to_copy; - - to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf)); --- -2.25.1 - diff --git a/exec-memop-Remove-unused-memop_big_endian-helper.patch b/exec-memop-Remove-unused-memop_big_endian-helper.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e4176f089fb555f256d69f2db44542974df04fe --- /dev/null +++ b/exec-memop-Remove-unused-memop_big_endian-helper.patch @@ -0,0 +1,39 @@ +From 9a12c439cb9d1e59175be4b96adf0732dca39db3 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Tue, 12 Nov 2024 13:30:29 +0800 +Subject: [PATCH] exec/memop: Remove unused memop_big_endian() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 5caa0e1b1bf8597ea7277391b0e17e8584fad18f + +Last use of memop_big_endian() was removed in commit 592134617c9 +("accel/tcg: Reorg system mode store helpers"). + +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Thomas Huth +Message-Id: <20241003234211.53644-3-philmd@linaro.org> +Signed-off-by: Zhang Jiao +--- + include/exec/memop.h | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/include/exec/memop.h b/include/exec/memop.h +index a86dc6743a..5b9064819c 100644 +--- a/include/exec/memop.h ++++ b/include/exec/memop.h +@@ -164,10 +164,4 @@ static inline MemOp size_memop(unsigned size) + return ctz32(size); + } + +-/* Big endianness from MemOp. */ +-static inline bool memop_big_endian(MemOp op) +-{ +- return (op & MO_BSWAP) == MO_BE; +-} +- + #endif +-- +2.41.0.windows.1 + diff --git a/exec-set-map-length-to-zero-when-returning-NULL.patch b/exec-set-map-length-to-zero-when-returning-NULL.patch deleted file mode 100644 index 64c918e8d9de6eb3dd357c955d75488ff5f11c48..0000000000000000000000000000000000000000 --- a/exec-set-map-length-to-zero-when-returning-NULL.patch +++ /dev/null @@ -1,54 +0,0 @@ -From a1a9d6f908b21878daa7868313243c30b7a90fcf Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 26 May 2020 16:47:43 +0530 -Subject: [PATCH 2/9] exec: set map length to zero when returning NULL -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When mapping physical memory into host's virtual address space, -'address_space_map' may return NULL if BounceBuffer is in_use. -Set and return '*plen = 0' to avoid later NULL pointer dereference. - -Reported-by: Alexander Bulekov -Fixes: https://bugs.launchpad.net/qemu/+bug/1878259 -Suggested-by: Paolo Bonzini -Suggested-by: Peter Maydell -Signed-off-by: Prasad J Pandit -Message-Id: <20200526111743.428367-1-ppandit@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Paolo Bonzini ---- - exec.c | 1 + - include/exec/memory.h | 3 ++- - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/exec.c b/exec.c -index 3e78de3b8f..85c6d80353 100644 ---- a/exec.c -+++ b/exec.c -@@ -3739,6 +3739,7 @@ void *address_space_map(AddressSpace *as, - if (!memory_access_is_direct(mr, is_write)) { - if (atomic_xchg(&bounce.in_use, true)) { - rcu_read_unlock(); -+ *plen = 0; - return NULL; - } - /* Avoid unbounded allocations */ -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 611a89122d..dca8184277 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -2064,7 +2064,8 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, hwaddr len, - /* address_space_map: map a physical memory region into a host virtual address - * - * May map a subset of the requested range, given by and returned in @plen. -- * May return %NULL if resources needed to perform the mapping are exhausted. -+ * May return %NULL and set *@plen to zero(0), if resources needed to perform -+ * the mapping are exhausted. - * Use only for reads OR writes - not for read-modify-write operations. - * Use cpu_register_map_client() to know when retrying the map operation is - * likely to succeed. --- -2.25.1 - diff --git a/feature-Add-log-for-each-modules.patch b/feature-Add-log-for-each-modules.patch new file mode 100644 index 0000000000000000000000000000000000000000..477a2eaa6facf8366775e2c0c7a0f5c30e01b3c5 --- /dev/null +++ b/feature-Add-log-for-each-modules.patch @@ -0,0 +1,250 @@ +From 30cc47b6dd3e9ff4842eb1c2a918bbabfd8c593b Mon Sep 17 00:00:00 2001 +From: "wangxinxin.wang@huawei.com" +Date: Sun, 17 Mar 2024 15:44:28 +0800 +Subject: [PATCH] feature: Add log for each modules + +add log for each modules. + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + accel/kvm/kvm-all.c | 5 ++++- + hw/char/virtio-serial-bus.c | 5 +++++ + hw/pci/pci.c | 1 + + hw/usb/bus.c | 6 ++++++ + hw/usb/host-libusb.c | 5 +++++ + hw/virtio/virtio-scsi-pci.c | 3 +++ + monitor/qmp-cmds.c | 3 +++ + os-posix.c | 1 + + qapi/qmp-dispatch.c | 15 +++++++++++++++ + system/qdev-monitor.c | 5 +++++ + 10 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 33f4c6d547..d900df93a4 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1834,7 +1834,10 @@ void kvm_irqchip_commit_routes(KVMState *s) + s->irq_routes->flags = 0; + trace_kvm_irqchip_commit_routes(); + ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); +- assert(ret == 0); ++ if (ret < 0) { ++ error_report("Set GSI routing failed: %m"); ++ abort(); ++ } + } + + static void kvm_add_routing_entry(KVMState *s, +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index dd619f0731..44906057be 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -257,6 +257,8 @@ static size_t send_control_event(VirtIOSerial *vser, uint32_t port_id, + virtio_stw_p(vdev, &cpkt.value, value); + + trace_virtio_serial_send_control_event(port_id, event, value); ++ qemu_log("virtio serial port %d send control message" ++ " event = %d, value = %d\n", port_id, event, value); + return send_control_msg(vser, &cpkt, sizeof(cpkt)); + } + +@@ -364,6 +366,9 @@ static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len) + cpkt.value = virtio_lduw_p(vdev, &gcpkt->value); + + trace_virtio_serial_handle_control_message(cpkt.event, cpkt.value); ++ qemu_log("virtio serial port '%u' handle control message" ++ " event = %d, value = %d\n", ++ virtio_ldl_p(vdev, &gcpkt->id), cpkt.event, cpkt.value); + + if (cpkt.event == VIRTIO_CONSOLE_DEVICE_READY) { + if (!cpkt.value) { +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index c49417abb2..9da41088df 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2411,6 +2411,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + snprintf(name, sizeof(name), "%s.rom", + vmsd ? vmsd->name : object_get_typename(OBJECT(pdev))); + ++ qemu_log("add rom file: %s\n", name); + pdev->has_rom = true; + memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, + &error_fatal); +diff --git a/hw/usb/bus.c b/hw/usb/bus.c +index 92d6ed5626..20cd9b6e6f 100644 +--- a/hw/usb/bus.c ++++ b/hw/usb/bus.c +@@ -536,6 +536,10 @@ void usb_check_attach(USBDevice *dev, Error **errp) + bus->qbus.name, port->path, portspeed); + return; + } ++ ++ qemu_log("attach usb device \"%s\" (%s speed) to VM bus \"%s\", " ++ "port \"%s\" (%s speed)\n", dev->product_desc, devspeed, ++ bus->qbus.name, port->path, portspeed); + } + + void usb_device_attach(USBDevice *dev, Error **errp) +@@ -564,6 +568,8 @@ int usb_device_detach(USBDevice *dev) + + usb_detach(port); + dev->attached = false; ++ qemu_log("detach usb device \"%s\" from VM bus \"%s\", port \"%s\"\n", ++ dev->product_desc, bus->qbus.name, port->path); + return 0; + } + +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index dba469c1ef..11a246ac72 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -992,6 +992,8 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + + rc = libusb_open(dev, &s->dh); + if (rc != 0) { ++ qemu_log("libusb open usb device bus %d, device %d failed\n", ++ bus_num, addr); + goto fail; + } + } else { +@@ -1019,6 +1021,7 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + + libusb_get_device_descriptor(dev, &s->ddesc); + usb_host_get_port(s->dev, s->port, sizeof(s->port)); ++ qemu_log("open a host usb device on bus %d, device %d\n", bus_num, addr); + + usb_ep_init(udev); + usb_host_ep_update(s); +@@ -1146,6 +1149,8 @@ static int usb_host_close(USBHostDevice *s) + usb_device_detach(udev); + } + ++ qemu_log("begin to reset the usb device, bus : %d, device : %d\n", ++ s->bus_num, s->addr); + usb_host_release_interfaces(s); + libusb_reset_device(s->dh); + usb_host_attach_kernel(s); +diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c +index e8e3442f38..e542d47162 100644 +--- a/hw/virtio/virtio-scsi-pci.c ++++ b/hw/virtio/virtio-scsi-pci.c +@@ -20,6 +20,7 @@ + #include "qemu/module.h" + #include "hw/virtio/virtio-pci.h" + #include "qom/object.h" ++#include "qemu/log.h" + + typedef struct VirtIOSCSIPCI VirtIOSCSIPCI; + +@@ -51,6 +52,8 @@ static void virtio_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + VirtIOSCSIConf *conf = &dev->vdev.parent_obj.conf; + char *bus_name; + ++ qemu_log("virtio scsi HBA %s begin to initialize.\n", ++ !proxy->id ? "NULL" : proxy->id); + if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { + conf->num_queues = + virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED); +diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c +index b0f948d337..e78462b857 100644 +--- a/monitor/qmp-cmds.c ++++ b/monitor/qmp-cmds.c +@@ -32,6 +32,7 @@ + #include "hw/mem/memory-device.h" + #include "hw/intc/intc.h" + #include "hw/rdma/rdma.h" ++#include "qemu/log.h" + + NameInfo *qmp_query_name(Error **errp) + { +@@ -110,8 +111,10 @@ void qmp_cont(Error **errp) + } + + if (runstate_check(RUN_STATE_INMIGRATE)) { ++ qemu_log("qmp cont is received in migration\n"); + autostart = 1; + } else { ++ qemu_log("qmp cont is received and vm is started\n"); + vm_start(); + } + } +diff --git a/os-posix.c b/os-posix.c +index 52ef6990ff..8f70ee0534 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -306,6 +306,7 @@ int os_mlock(void) + #ifdef HAVE_MLOCKALL + int ret = 0; + ++ qemu_log("do mlockall\n"); + ret = mlockall(MCL_CURRENT | MCL_FUTURE); + if (ret < 0) { + error_report("mlockall: %s", strerror(errno)); +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 7a215cbfd7..e33efd3740 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -25,6 +25,7 @@ + #include "qemu/coroutine.h" + #include "qemu/main-loop.h" + #include "qemu/log.h" ++#include "qapi/qmp/qstring.h" + + Visitor *qobject_input_visitor_new_qmp(QObject *obj) + { +@@ -220,6 +221,20 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + + assert(!(oob && qemu_in_coroutine())); + assert(monitor_cur() == NULL); ++ ++ json = qobject_to_json(QOBJECT(args)); ++ if (json) { ++ if ((strcmp(command, "query-block-jobs") != 0) ++ && (strcmp(command, "query-migrate") != 0) ++ && (strcmp(command, "query-blockstats") != 0) ++ && (strcmp(command, "query-balloon") != 0) ++ && (strcmp(command, "set_password") != 0)) { ++ qemu_log("qmp_cmd_name: %s, arguments: %s\n", ++ command, json->str); ++ } ++ g_string_free(json, true); ++ } ++ + if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { + monitor_set_cur(qemu_coroutine_self(), cur_mon); + cmd->fn(args, &ret, &err); +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index b10e483a9a..5b35704b5e 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -644,6 +644,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { ++ qemu_log("can not find bus for %s\n", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +@@ -714,6 +715,8 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json, + errp); + if (*errp) { ++ qemu_log("the bus %s -driver %s set property failed\n", ++ bus ? bus->name : "None", driver); + goto err_del_dev; + } + qemu_log("add qdev %s:%s success\n", driver, dev->id ? dev->id : "none"); +@@ -738,6 +741,8 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + + ret = qdev_device_add_from_qdict(qdict, false, errp); + if (ret) { ++ qemu_log("add qdev %s:%s success\n", qemu_opt_get(opts, "driver"), ++ qemu_opts_id(opts) ? qemu_opts_id(opts) : "none"); + qemu_opts_del(opts); + } + qobject_unref(qdict); +-- +2.27.0 + diff --git a/feature-Add-logs-for-vm-start-and-destroy.patch b/feature-Add-logs-for-vm-start-and-destroy.patch new file mode 100644 index 0000000000000000000000000000000000000000..b3964bdf4efdb14f7e62d9f914938c5da7429296 --- /dev/null +++ b/feature-Add-logs-for-vm-start-and-destroy.patch @@ -0,0 +1,158 @@ +From 9a47271fb6c855ec92e087d59d65f3cc0c684725 Mon Sep 17 00:00:00 2001 +From: "wangxinxin.wang@huawei.com" +Date: Sun, 17 Mar 2024 15:04:09 +0800 +Subject: [PATCH] feature: Add logs for vm start and destroy + +Add QEMU_LOG for vm start and destroy + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + hw/acpi/core.c | 4 ++++ + hw/core/reset.c | 2 ++ + system/main.c | 2 ++ + system/runstate.c | 2 ++ + system/vl.c | 6 ++++++ + 5 files changed, 16 insertions(+) + +diff --git a/hw/acpi/core.c b/hw/acpi/core.c +index ec5e127d17..b6241f70e9 100644 +--- a/hw/acpi/core.c ++++ b/hw/acpi/core.c +@@ -24,6 +24,7 @@ + #include "hw/acpi/acpi.h" + #include "hw/nvram/fw_cfg.h" + #include "qemu/config-file.h" ++#include "qemu/log.h" + #include "qapi/error.h" + #include "qapi/opts-visitor.h" + #include "qapi/qapi-events-run-state.h" +@@ -588,13 +589,16 @@ static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, + uint16_t sus_typ = (val >> 10) & 7; + switch (sus_typ) { + case 0: /* soft power off */ ++ qemu_log("VM will be soft power off\n"); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + break; + case 1: ++ qemu_log("VM will be suspend state\n"); + qemu_system_suspend_request(); + break; + default: + if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ ++ qemu_log("VM will be S4 state\n"); + qapi_event_send_suspend_disk(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } +diff --git a/hw/core/reset.c b/hw/core/reset.c +index d3263b613e..fa63bfedb7 100644 +--- a/hw/core/reset.c ++++ b/hw/core/reset.c +@@ -25,6 +25,7 @@ + + #include "qemu/osdep.h" + #include "qemu/queue.h" ++#include "qemu/log.h" + #include "sysemu/reset.h" + + /* reset/shutdown handler */ +@@ -75,6 +76,7 @@ void qemu_devices_reset(ShutdownCause reason) + { + QEMUResetEntry *re, *nre; + ++ qemu_log("reset all devices\n"); + /* reset all devices */ + QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) { + if (reason == SHUTDOWN_CAUSE_SNAPSHOT_LOAD && +diff --git a/system/main.c b/system/main.c +index 9b91d21ea8..28bb283ebf 100644 +--- a/system/main.c ++++ b/system/main.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "qemu-main.h" + #include "sysemu/sysemu.h" + +@@ -34,6 +35,7 @@ int qemu_default_main(void) + { + int status; + ++ qemu_log("qemu enter main_loop\n"); + status = qemu_main_loop(); + qemu_cleanup(status); + +diff --git a/system/runstate.c b/system/runstate.c +index 62e6db8d42..538c645326 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -769,9 +769,11 @@ static bool main_loop_should_exit(int *status) + } + if (qemu_powerdown_requested()) { + qemu_system_powerdown(); ++ qemu_log("domain is power down by outside operation\n"); + } + if (qemu_vmstop_requested(&r)) { + vm_stop(r); ++ qemu_log("domain is stopped by outside operation\n"); + } + return false; + } +diff --git a/system/vl.c b/system/vl.c +index 2bcd9efb9a..165c3cae8a 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -26,6 +26,7 @@ + #include "qemu/help-texts.h" + #include "qemu/datadir.h" + #include "qemu/units.h" ++#include "qemu/log.h" + #include "exec/cpu-common.h" + #include "exec/page-vary.h" + #include "hw/qdev-properties.h" +@@ -2633,6 +2634,7 @@ static void qemu_create_cli_devices(void) + } + + /* init generic devices */ ++ qemu_log("device init start\n"); + rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); +@@ -2778,6 +2780,7 @@ void qemu_init(int argc, char **argv) + + qemu_init_subsystems(); + ++ qemu_log("qemu pid is %d, options parsing start\n", getpid()); + /* first pass of option parsing */ + optind = 1; + while (optind < argc) { +@@ -2997,6 +3000,7 @@ void qemu_init(int argc, char **argv) + exit(0); + break; + case QEMU_OPTION_m: ++ qemu_log("memory options parse start\n"); + opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), optarg, true); + if (opts == NULL) { + exit(1); +@@ -3714,6 +3718,7 @@ void qemu_init(int argc, char **argv) + */ + + machine_class = MACHINE_GET_CLASS(current_machine); ++ qemu_log("configure accelerator %s start\n", machine_class->name); + if (!qtest_enabled() && machine_class->deprecation_reason) { + warn_report("Machine type '%s' is deprecated: %s", + machine_class->name, machine_class->deprecation_reason); +@@ -3732,6 +3737,7 @@ void qemu_init(int argc, char **argv) + */ + migration_object_init(); + ++ qemu_log("machine init start\n"); + /* parse features once if machine provides default cpu_type */ + current_machine->cpu_type = machine_class->default_cpu_type; + if (cpu_option) { +-- +2.27.0 + diff --git a/file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch b/file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch deleted file mode 100644 index 28c1e3bc6837063888bb8c862fb1e629f70de8be..0000000000000000000000000000000000000000 --- a/file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 94be73a20d42482cdf30115e672c36af2fe9068d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 17 Jul 2020 12:54:26 +0200 -Subject: [PATCH 5/5] file-posix: Fix leaked fd in raw_open_common() error path - -Signed-off-by: Kevin Wolf -Message-Id: <20200717105426.51134-4-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -Signed-off-by: Zhenyu Ye ---- - block/file-posix.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 2184aa98..1259bf58 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -671,6 +671,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; - ret = 0; - fail: -+ if (ret < 0 && s->fd != -1) { -+ qemu_close(s->fd); -+ } - if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { - unlink(filename); - } --- -2.22.0.windows.1 - diff --git a/file-posix-Handle-undetectable-alignment.patch b/file-posix-Handle-undetectable-alignment.patch deleted file mode 100644 index 87a97dc4fdd67b160b89726f2ec9425d868df325..0000000000000000000000000000000000000000 --- a/file-posix-Handle-undetectable-alignment.patch +++ /dev/null @@ -1,158 +0,0 @@ -From b4bbef6714a45ffd7b4a57e5a0522c7006f504a6 Mon Sep 17 00:00:00 2001 -From: Nir Soffer -Date: Tue, 13 Aug 2019 21:21:03 +0300 -Subject: [PATCH] file-posix: Handle undetectable alignment - -In some cases buf_align or request_alignment cannot be detected: - -1. With Gluster, buf_align cannot be detected since the actual I/O is - done on Gluster server, and qemu buffer alignment does not matter. - Since we don't have alignment requirement, buf_align=1 is the best - value. - -2. With local XFS filesystem, buf_align cannot be detected if reading - from unallocated area. In this we must align the buffer, but we don't - know what is the correct size. Using the wrong alignment results in - I/O error. - -3. With Gluster backed by XFS, request_alignment cannot be detected if - reading from unallocated area. In this case we need to use the - correct alignment, and failing to do so results in I/O errors. - -4. With NFS, the server does not use direct I/O, so both buf_align cannot - be detected. In this case we don't need any alignment so we can use - buf_align=1 and request_alignment=1. - -These cases seems to work when storage sector size is 512 bytes, because -the current code starts checking align=512. If the check succeeds -because alignment cannot be detected we use 512. But this does not work -for storage with 4k sector size. - -To determine if we can detect the alignment, we probe first with -align=1. If probing succeeds, maybe there are no alignment requirement -(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we -don't have any way to tell, we treat this as undetectable alignment. If -probing with align=1 fails with EINVAL, but probing with one of the -expected alignments succeeds, we know that we found a working alignment. - -Practically the alignment requirements are the same for buffer -alignment, buffer length, and offset in file. So in case we cannot -detect buf_align, we can use request alignment. If we cannot detect -request alignment, we can fallback to a safe value. To use this logic, -we probe first request alignment instead of buf_align. - -Here is a table showing the behaviour with current code (the value in -parenthesis is the optimal value). - -Case Sector buf_align (opt) request_alignment (opt) result -====================================================================== -1 512 512 (1) 512 (512) OK -1 4096 512 (1) 4096 (4096) FAIL ----------------------------------------------------------------------- -2 512 512 (512) 512 (512) OK -2 4096 512 (4096) 4096 (4096) FAIL ----------------------------------------------------------------------- -3 512 512 (1) 512 (512) OK -3 4096 512 (1) 512 (4096) FAIL ----------------------------------------------------------------------- -4 512 512 (1) 512 (1) OK -4 4096 512 (1) 512 (1) OK - -Same cases with this change: - -Case Sector buf_align (opt) request_alignment (opt) result -====================================================================== -1 512 512 (1) 512 (512) OK -1 4096 4096 (1) 4096 (4096) OK ----------------------------------------------------------------------- -2 512 512 (512) 512 (512) OK -2 4096 4096 (4096) 4096 (4096) OK ----------------------------------------------------------------------- -3 512 4096 (1) 4096 (512) OK -3 4096 4096 (1) 4096 (4096) OK ----------------------------------------------------------------------- -4 512 4096 (1) 4096 (1) OK -4 4096 4096 (1) 4096 (1) OK - -I tested that provisioning VMs and copying disks on local XFS and -Gluster with 4k bytes sector size work now, resolving bugs [1],[2]. -I tested also on XFS, NFS, Gluster with 512 bytes sector size. - -[1] https://bugzilla.redhat.com/1737256 -[2] https://bugzilla.redhat.com/1738657 - -Signed-off-by: Nir Soffer -Signed-off-by: Kevin Wolf - -(cherry picked from commit a6b257a08e3d72219f03e461a52152672fec0612) - -Signed-off-by: Michael Roth ---- - block/file-posix.c | 36 +++++++++++++++++++++++++----------- - 1 file changed, 25 insertions(+), 11 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index c185f34..d5065c6 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -321,6 +321,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) - BDRVRawState *s = bs->opaque; - char *buf; - size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); -+ size_t alignments[] = {1, 512, 1024, 2048, 4096}; - - /* For SCSI generic devices the alignment is not really used. - With buffered I/O, we don't have any restrictions. */ -@@ -347,25 +348,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) - } - #endif - -- /* If we could not get the sizes so far, we can only guess them */ -- if (!s->buf_align) { -+ /* -+ * If we could not get the sizes so far, we can only guess them. First try -+ * to detect request alignment, since it is more likely to succeed. Then -+ * try to detect buf_align, which cannot be detected in some cases (e.g. -+ * Gluster). If buf_align cannot be detected, we fallback to the value of -+ * request_alignment. -+ */ -+ -+ if (!bs->bl.request_alignment) { -+ int i; - size_t align; -- buf = qemu_memalign(max_align, 2 * max_align); -- for (align = 512; align <= max_align; align <<= 1) { -- if (raw_is_io_aligned(fd, buf + align, max_align)) { -- s->buf_align = align; -+ buf = qemu_memalign(max_align, max_align); -+ for (i = 0; i < ARRAY_SIZE(alignments); i++) { -+ align = alignments[i]; -+ if (raw_is_io_aligned(fd, buf, align)) { -+ /* Fallback to safe value. */ -+ bs->bl.request_alignment = (align != 1) ? align : max_align; - break; - } - } - qemu_vfree(buf); - } - -- if (!bs->bl.request_alignment) { -+ if (!s->buf_align) { -+ int i; - size_t align; -- buf = qemu_memalign(s->buf_align, max_align); -- for (align = 512; align <= max_align; align <<= 1) { -- if (raw_is_io_aligned(fd, buf, align)) { -- bs->bl.request_alignment = align; -+ buf = qemu_memalign(max_align, 2 * max_align); -+ for (i = 0; i < ARRAY_SIZE(alignments); i++) { -+ align = alignments[i]; -+ if (raw_is_io_aligned(fd, buf + align, max_align)) { -+ /* Fallback to request_aligment. */ -+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment; - break; - } - } --- -1.8.3.1 - diff --git a/fix-compile-error-on-loongarch.patch b/fix-compile-error-on-loongarch.patch new file mode 100644 index 0000000000000000000000000000000000000000..62a75f8bdfc19697b4126611b17af573b6f81936 --- /dev/null +++ b/fix-compile-error-on-loongarch.patch @@ -0,0 +1,27 @@ +From 0826efefea34a6fb6e17502f3a293572f109a261 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Thu, 5 Dec 2024 14:18:01 +0800 +Subject: [PATCH] fix compile error on loongarch + +add cpu.h in loongarch_ipi.c + +Signed-off-by: Xianglai Li +--- + hw/intc/loongarch_ipi.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index e228669aa5..630bcb14ea 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -15,6 +15,7 @@ + #include "exec/address-spaces.h" + #include "hw/loongarch/virt.h" + #include "migration/vmstate.h" ++#include "target/loongarch/cpu.h" + #include "target/loongarch/internals.h" + #include "trace.h" + +-- +2.39.1 + diff --git a/fix-qemu-core-when-vhost-user-net-config-with-server.patch b/fix-qemu-core-when-vhost-user-net-config-with-server.patch new file mode 100644 index 0000000000000000000000000000000000000000..68ba7f2bc15cd6da54f26c3bf5886786bb609324 --- /dev/null +++ b/fix-qemu-core-when-vhost-user-net-config-with-server.patch @@ -0,0 +1,46 @@ +From 97335ac382e36db18a61d3891f1fafd15475822e Mon Sep 17 00:00:00 2001 +From: caojinhuahw +Date: Mon, 19 Dec 2022 12:35:50 +0000 +Subject: [PATCH] fix qemu-core when vhost-user-net config with server mode + +commit 3a223111d7 set default reconnect for vhost-user-net +device, if vhost-user-net config with server mode will +casuse the core when ovs client stop. +tcp_chr_disconnect ---> set tcp_char state disconnect +tcp_chr start reconnect ---> set tcp_char state connecting +tcp_char is listen ---> call tcp_chr_accept() +fun tcp_char_accept() set tcp_char state to connecting, but +current tcp_char state already is connecting, assert failed +in tcp_char_change_state() raise qemu core + assert(s->state == TCP_CHARDEV_STATE_DISCONNECTED) + +this commit check tcp_char mode, if tcp_char config with server +mode, dont set reconnect time for tcp_chr. + +fix: 3a223111d7 vhost-user: Add support reconnect vhost-user socket + +Signed-off-by: caojinhuahw +--- + chardev/char-socket.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 9c60e15c8e..0c9ab069ae 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -347,6 +347,12 @@ static void tcp_chr_set_reconnect_time(Chardev *chr, + void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time) + { + ChardevClass *cc = CHARDEV_GET_CLASS(chr); ++ SocketChardev *s = SOCKET_CHARDEV(chr); ++ ++ /* if sock dev is listen, dont set reconnect time */ ++ if (s->is_listen) { ++ return; ++ } + + if (cc->chr_set_reconnect_time) { + cc->chr_set_reconnect_time(chr, reconnect_time); +-- +2.27.0 + diff --git a/fix-vhost_user_blk_watch-crash.patch b/fix-vhost_user_blk_watch-crash.patch deleted file mode 100644 index 905cbe3c2542b7d59f8d69da720bf0639a4be9bb..0000000000000000000000000000000000000000 --- a/fix-vhost_user_blk_watch-crash.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 0b77995819a596f96c621697643e83624126e668 Mon Sep 17 00:00:00 2001 -From: Li Feng -Date: Mon, 23 Mar 2020 13:29:24 +0800 -Subject: [PATCH 13/14] fix vhost_user_blk_watch crash - -the G_IO_HUP is watched in tcp_chr_connect, and the callback -vhost_user_blk_watch is not needed, because tcp_chr_hup is registered as -callback. And it will close the tcp link. - -Signed-off-by: Li Feng -Message-Id: <20200323052924.29286-1-fengli@smartx.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Peng Liang ---- - hw/block/vhost-user-blk.c | 19 ------------------- - include/hw/virtio/vhost-user-blk.h | 1 - - 2 files changed, 20 deletions(-) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index 85bc4017e7e9..dc66f8a5febd 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -346,18 +346,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev) - vhost_dev_cleanup(&s->dev); - } - --static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, -- void *opaque) --{ -- DeviceState *dev = opaque; -- VirtIODevice *vdev = VIRTIO_DEVICE(dev); -- VHostUserBlk *s = VHOST_USER_BLK(vdev); -- -- qemu_chr_fe_disconnect(&s->chardev); -- -- return true; --} -- - static void vhost_user_blk_event(void *opaque, int event) - { - DeviceState *dev = opaque; -@@ -370,15 +358,9 @@ static void vhost_user_blk_event(void *opaque, int event) - qemu_chr_fe_disconnect(&s->chardev); - return; - } -- s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, -- vhost_user_blk_watch, dev); - break; - case CHR_EVENT_CLOSED: - vhost_user_blk_disconnect(dev); -- if (s->watch) { -- g_source_remove(s->watch); -- s->watch = 0; -- } - break; - } - } -@@ -419,7 +401,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - - s->inflight = g_new0(struct vhost_inflight, 1); - s->vqs = g_new(struct vhost_virtqueue, s->num_queues); -- s->watch = 0; - s->connected = false; - - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, -diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h -index 8dbf11c6f071..ad9b742a644c 100644 ---- a/include/hw/virtio/vhost-user-blk.h -+++ b/include/hw/virtio/vhost-user-blk.h -@@ -38,7 +38,6 @@ typedef struct VHostUserBlk { - struct vhost_inflight *inflight; - VhostUserState vhost_user; - struct vhost_virtqueue *vqs; -- guint watch; - bool connected; - } VHostUserBlk; - --- -2.26.2 - diff --git a/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch b/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffbb8a8643d72a257a6914007a971d6d63695704 --- /dev/null +++ b/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch @@ -0,0 +1,129 @@ +From 0a6baf4799dd6e70d7959002ea6ddb998eddbc6d Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Mon, 18 Mar 2024 15:53:43 +0800 +Subject: [PATCH] freeclock: add qmp command to get time offset of vm in + seconds + +When setting the system time in VM, a RTC_CHANGE event will be reported. +However, if libvirt is restarted while the event is be reporting, the +event will be lost and we will get the old time (not the time we set in +VM) after rebooting the VM. + +We save the delta time in QEMU and add a rtc-date-diff qmp to get the +delta time so that libvirt can get the latest time in VM according to +the qmp after libvirt is restarted. + +Signed-off-by: Peng Liang +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/core/machine-qmp-cmds.c | 6 ++++++ + include/sysemu/rtc.h | 4 +++- + qapi/misc.json | 9 +++++++++ + qapi/pragma.json | 3 ++- + system/rtc.c | 11 +++++++++++ + 5 files changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c +index 3860a50c3b..f1389ef644 100644 +--- a/hw/core/machine-qmp-cmds.c ++++ b/hw/core/machine-qmp-cmds.c +@@ -8,6 +8,7 @@ + */ + + #include "qemu/osdep.h" ++#include "sysemu/rtc.h" + #include "hw/acpi/vmgenid.h" + #include "hw/boards.h" + #include "hw/intc/intc.h" +@@ -373,6 +374,11 @@ HumanReadableText *qmp_x_query_irq(Error **errp) + return human_readable_text_from_str(buf); + } + ++int64_t qmp_query_rtc_date_diff(Error **errp) ++{ ++ return get_rtc_date_diff(); ++} ++ + GuidInfo *qmp_query_vm_generation_id(Error **errp) + { + GuidInfo *info; +diff --git a/include/sysemu/rtc.h b/include/sysemu/rtc.h +index 0fc8ad6fdf..3edae762d4 100644 +--- a/include/sysemu/rtc.h ++++ b/include/sysemu/rtc.h +@@ -54,5 +54,7 @@ void qemu_get_timedate(struct tm *tm, time_t offset); + * then this function will return 3600. + */ + time_t qemu_timedate_diff(struct tm *tm); +- ++time_t get_rtc_date_diff(void); ++void set_rtc_date_diff(time_t diff); ++int64_t qmp_query_rtc_date_diff(Error **errp); + #endif +diff --git a/qapi/misc.json b/qapi/misc.json +index cda2effa81..1832d5f460 100644 +--- a/qapi/misc.json ++++ b/qapi/misc.json +@@ -550,6 +550,15 @@ + 'returns': ['CommandLineOptionInfo'], + 'allow-preconfig': true} + ++## ++# @query-rtc-date-diff: ++# ++# get vm's time offset ++# ++# Since: 2.8 ++## ++{ 'command': 'query-rtc-date-diff', 'returns': 'int64' } ++ + ## + # @RTC_CHANGE: + # +diff --git a/qapi/pragma.json b/qapi/pragma.json +index 0aa4eeddd3..7a07b44bb1 100644 +--- a/qapi/pragma.json ++++ b/qapi/pragma.json +@@ -30,7 +30,8 @@ + 'qom-get', + 'query-tpm-models', + 'query-tpm-types', +- 'ringbuf-read' ], ++ 'ringbuf-read', ++ 'query-rtc-date-diff'], + # Externally visible types whose member names may use uppercase + 'member-name-exceptions': [ # visible in: + 'ACPISlotType', # query-acpi-ospm-status +diff --git a/system/rtc.c b/system/rtc.c +index 4904581abe..e16b5fffc5 100644 +--- a/system/rtc.c ++++ b/system/rtc.c +@@ -44,6 +44,7 @@ static time_t rtc_ref_start_datetime; + static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */ + static int rtc_host_datetime_offset = -1; /* valid & used only with + RTC_BASE_DATETIME */ ++static time_t rtc_date_diff = 0; + QEMUClockType rtc_clock; + /***********************************************************/ + /* RTC reference time/date access */ +@@ -108,6 +109,16 @@ time_t qemu_timedate_diff(struct tm *tm) + return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST); + } + ++time_t get_rtc_date_diff(void) ++{ ++ return rtc_date_diff; ++} ++ ++void set_rtc_date_diff(time_t diff) ++{ ++ rtc_date_diff = diff; ++} ++ + static void configure_rtc_base_datetime(const char *startdate) + { + time_t rtc_start_datetime; +-- +2.27.0 + diff --git a/freeclock-set-rtc_date_diff-for-X86.patch b/freeclock-set-rtc_date_diff-for-X86.patch new file mode 100644 index 0000000000000000000000000000000000000000..4711551f99a870a82f292cc3e9ba39e6f695c163 --- /dev/null +++ b/freeclock-set-rtc_date_diff-for-X86.patch @@ -0,0 +1,31 @@ +From 0a0010fe0656a63e82aea495ab0a59145d3b5750 Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Thu, 21 Mar 2024 12:26:38 +0800 +Subject: [PATCH] freeclock: set rtc_date_diff for X86 + +Set rtc_date_diff in mc146818rtc. + +Signed-off-by: l00500761 +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/rtc/mc146818rtc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 2d391a8396..e61c76d060 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -606,7 +606,8 @@ static void rtc_set_time(MC146818RtcState *s) + s->base_rtc = mktimegm(&tm); + s->last_update = qemu_clock_get_ns(rtc_clock); + +- qapi_event_send_rtc_change(qemu_timedate_diff(&tm), qom_path); ++ set_rtc_date_diff(qemu_timedate_diff(&tm)); ++ qapi_event_send_rtc_change(get_rtc_date_diff(), qom_path); + } + + static void rtc_set_cmos(MC146818RtcState *s, const struct tm *tm) +-- +2.27.0 + diff --git a/freeclock-set-rtc_date_diff-for-arm.patch b/freeclock-set-rtc_date_diff-for-arm.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c6b15ab26be27990b2d8028fc647dd48fca9312 --- /dev/null +++ b/freeclock-set-rtc_date_diff-for-arm.patch @@ -0,0 +1,31 @@ +From 156be254a48d1d9b7aadcbfa4423485c592bc75d Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Thu, 21 Mar 2024 11:21:14 +0800 +Subject: [PATCH] freeclock: set rtc_date_diff for arm + +Set rtc_date_diff in pl031. + +Signed-off-by: Peng Liang +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/rtc/pl031.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c +index b01d0e75d1..f2e6baebba 100644 +--- a/hw/rtc/pl031.c ++++ b/hw/rtc/pl031.c +@@ -144,7 +144,8 @@ static void pl031_write(void * opaque, hwaddr offset, + s->tick_offset += value - pl031_get_count(s); + + qemu_get_timedate(&tm, s->tick_offset); +- qapi_event_send_rtc_change(qemu_timedate_diff(&tm), qom_path); ++ set_rtc_date_diff(qemu_timedate_diff(&tm)); ++ qapi_event_send_rtc_change(get_rtc_date_diff(), qom_path); + + pl031_set_alarm(s); + break; +-- +2.27.0 + diff --git a/fw_cfg-Don-t-set-callback_opaque-NULL-in-fw_cfg_modi.patch b/fw_cfg-Don-t-set-callback_opaque-NULL-in-fw_cfg_modi.patch new file mode 100644 index 0000000000000000000000000000000000000000..dfab1f98a6752f0193bc372d763301a16383e346 --- /dev/null +++ b/fw_cfg-Don-t-set-callback_opaque-NULL-in-fw_cfg_modi.patch @@ -0,0 +1,64 @@ +From b93ac4e4fd07e36b95ce211faefd0c7912b6f62a Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Tue, 3 Dec 2024 13:18:06 +0000 +Subject: [PATCH] fw_cfg: Don't set callback_opaque NULL in + fw_cfg_modify_bytes_read() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On arm/virt platform, Chen Xiang reported a Guest crash while +attempting the below steps, + +1. Launch the Guest with nvdimm=on +2. Hot-add a NVDIMM dev +3. Reboot +4. Guest boots fine. +5. Reboot again. +6. Guest boot fails. + +QEMU_EFI reports the below error: +ProcessCmdAddPointer: invalid pointer value in "etc/acpi/tables" +OnRootBridgesConnected: InstallAcpiTables: Protocol Error + +Debugging shows that on first reboot(after hot adding NVDIMM), +Qemu updates the etc/table-loader len, + +qemu_ram_resize() +  fw_cfg_modify_file() +     fw_cfg_modify_bytes_read() + +And in fw_cfg_modify_bytes_read() we set the "callback_opaque" for +the key entry to NULL. Because of this, on the second reboot, +virt_acpi_build_update() is called with a NULL "build_state" and +returns without updating the ACPI tables. This seems to be +upsetting the firmware. + +To fix this, don't change the callback_opaque in fw_cfg_modify_bytes_read(). + +Fixes: bdbb5b1706d165 ("fw_cfg: add fw_cfg_machine_reset function") +Reported-by: chenxiang +Acked-by: Igor Mammedov +Acked-by: Gerd Hoffmann +Signed-off-by: Shameer Kolothum +Message-ID: <20241203131806.37548-1-shameerali.kolothum.thodi@huawei.com> +Signed-off-by: Philippe Mathieu-Daudé +--- + hw/nvram/fw_cfg.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c +index 4e4524673a..d32079ebdf 100644 +--- a/hw/nvram/fw_cfg.c ++++ b/hw/nvram/fw_cfg.c +@@ -729,7 +729,6 @@ static void *fw_cfg_modify_bytes_read(FWCfgState *s, uint16_t key, + ptr = s->entries[arch][key].data; + s->entries[arch][key].data = data; + s->entries[arch][key].len = len; +- s->entries[arch][key].callback_opaque = NULL; + s->entries[arch][key].allow_write = false; + + return ptr; +-- +2.41.0.windows.1 + diff --git a/gdbstub-Add-helper-function-to-unregister-GDB-regist.patch b/gdbstub-Add-helper-function-to-unregister-GDB-regist.patch new file mode 100644 index 0000000000000000000000000000000000000000..395816ac61816a72f887e1cbcc45e39092053613 --- /dev/null +++ b/gdbstub-Add-helper-function-to-unregister-GDB-regist.patch @@ -0,0 +1,87 @@ +From 7754cf384417295dc74add4e774c506d751671a9 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 16 Jul 2024 12:15:02 +0100 +Subject: [PATCH 67/78] gdbstub: Add helper function to unregister GDB register + space + +Add common function to help unregister the GDB register space. This shall be +done in context to the CPU unrealization. + +Note: These are common functions exported to arch specific code. For example, +for ARM this code is being referred in associated arch specific patch-set: + +Link: https://lore.kernel.org/qemu-devel/20230926103654.34424-1-salil.mehta@huawei.com/ + +Signed-off-by: Salil Mehta +Tested-by: Vishnu Pajjuri +Reviewed-by: Gavin Shan +Tested-by: Xianglai Li +Tested-by: Miguel Luis +Reviewed-by: Shaoqin Huang +Reviewed-by: Vishnu Pajjuri +Tested-by: Zhao Liu +Acked-by: Igor Mammedov +Message-Id: <20240716111502.202344-8-salil.mehta@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Xianglai Li +--- + gdbstub/gdbstub.c | 7 +++++++ + hw/core/cpu-common.c | 4 ++++ + include/exec/gdbstub.h | 5 +++++ + 3 files changed, 16 insertions(+) + +diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c +index f16006d2a8..31c3dae525 100644 +--- a/gdbstub/gdbstub.c ++++ b/gdbstub/gdbstub.c +@@ -584,8 +584,15 @@ void gdb_register_coprocessor(CPUState *cpu, + + void gdb_unregister_coprocessor_all(CPUState *cpu) + { ++ /* ++ * Safe to nuke everything. GDBRegisterState::xml is static const char so ++ * it won't be freed ++ */ + g_array_free(cpu->gdb_regs, true); ++ + cpu->gdb_regs = NULL; ++ cpu->gdb_num_regs = 0; ++ cpu->gdb_num_g_regs = 0; + } + + static void gdb_process_breakpoint_remove_all(GDBProcess *p) +diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c +index 82dae51a55..e36ca2c207 100644 +--- a/hw/core/cpu-common.c ++++ b/hw/core/cpu-common.c +@@ -262,6 +262,10 @@ static void cpu_common_finalize(Object *obj) + { + CPUState *cpu = CPU(obj); + ++ /* If cleanup didn't happen in context to gdb_unregister_coprocessor_all */ ++ if (cpu->gdb_regs) { ++ g_array_free(cpu->gdb_regs, TRUE); ++ } + qemu_lockcnt_destroy(&cpu->in_ioctl_lock); + qemu_mutex_destroy(&cpu->work_mutex); + } +diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h +index d123b838c2..e2e8dff051 100644 +--- a/include/exec/gdbstub.h ++++ b/include/exec/gdbstub.h +@@ -39,6 +39,11 @@ typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t *buf, int reg); + void gdb_register_coprocessor(CPUState *cpu, + gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg, + int num_regs, const char *xml, int g_pos); ++ ++/** ++ * gdb_unregister_coprocessor_all() - unregisters supplemental set of registers ++ * @cpu - the CPU associated with registers ++ */ + void gdb_unregister_coprocessor_all(CPUState *cpu); + + /** +-- +2.39.1 + diff --git a/gpex-acpi-Remove-duplicate-DSM-5.patch b/gpex-acpi-Remove-duplicate-DSM-5.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f54bec1e390fb898578588696a2e4a1bf810965 --- /dev/null +++ b/gpex-acpi-Remove-duplicate-DSM-5.patch @@ -0,0 +1,57 @@ +From b1087bb8a4edbacc7240c0fcab63bc1cf2624627 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Tue, 21 Jan 2025 14:42:45 +0000 +Subject: [PATCH] gpex-acpi: Remove duplicate DSM #5 + +It looks like acpi_dsdt_add_pci_osc() already builds the _DSM +for virt/gpex case, and we don't need to add duplicate DSM methods +for _DSM #5 case. + +And the acpi_dsdt_add_pci_osc() already adds _DSM #5 when +preserve_config is true. + +This is to get rid of the ACPI related error messages during boot: + +ACPI BIOS Error (bug): Failure creating named object [\_SB.PC08._DSM], AE_ALREADY_EXISTS +ACPI BIOS Error (bug): \_SB.PC08.PCI0._DSM: Excess arguments - ASL declared 5, ACPI requires 4 + +ToDo: Only sanity tested. + +Signed-off-by: Shameer Kolothum +--- + hw/pci-host/gpex-acpi.c | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c +index ce424fc9da..162f6221ab 100644 +--- a/hw/pci-host/gpex-acpi.c ++++ b/hw/pci-host/gpex-acpi.c +@@ -189,12 +189,6 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) + aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); + } + +- if (cfg->preserve_config) { +- method = aml_method("_DSM", 5, AML_SERIALIZED); +- aml_append(method, aml_return(aml_int(0))); +- aml_append(dev, method); +- } +- + acpi_dsdt_add_pci_route_table(dev, cfg->irq); + + /* +@@ -226,12 +220,6 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) + aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device"))); + aml_append(dev, aml_name_decl("_CCA", aml_int(1))); + +- if (cfg->preserve_config) { +- method = aml_method("_DSM", 5, AML_SERIALIZED); +- aml_append(method, aml_return(aml_int(0))); +- aml_append(dev, method); +- } +- + acpi_dsdt_add_pci_route_table(dev, cfg->irq); + + method = aml_method("_CBA", 0, AML_NOTSERIALIZED); +-- +2.41.0.windows.1 + diff --git a/hbitmap-handle-set-reset-with-zero-length.patch b/hbitmap-handle-set-reset-with-zero-length.patch deleted file mode 100644 index b346a970d8594e2fae6a730c8c66370dc66af0da..0000000000000000000000000000000000000000 --- a/hbitmap-handle-set-reset-with-zero-length.patch +++ /dev/null @@ -1,50 +0,0 @@ -From c0b35d87de345bd3b59a44c604b247a0497f2fc0 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Fri, 11 Oct 2019 12:07:07 +0300 -Subject: [PATCH] hbitmap: handle set/reset with zero length - -Passing zero length to these functions leads to unpredicted results. -Zero-length set/reset may occur in active-mirror, on zero-length write -(which is unlikely, but not guaranteed to never happen). - -Let's just do nothing on zero-length request. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-id: 20191011090711.19940-2-vsementsov@virtuozzo.com -Reviewed-by: Max Reitz -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -(cherry picked from commit fed33bd175f663cc8c13f8a490a4f35a19756cfe) -Signed-off-by: Michael Roth ---- - util/hbitmap.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/util/hbitmap.c b/util/hbitmap.c -index 71c6ba2c52..c059313b9e 100644 ---- a/util/hbitmap.c -+++ b/util/hbitmap.c -@@ -387,6 +387,10 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) - uint64_t first, n; - uint64_t last = start + count - 1; - -+ if (count == 0) { -+ return; -+ } -+ - trace_hbitmap_set(hb, start, count, - start >> hb->granularity, last >> hb->granularity); - -@@ -478,6 +482,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) - uint64_t last = start + count - 1; - uint64_t gran = 1ULL << hb->granularity; - -+ if (count == 0) { -+ return; -+ } -+ - assert(QEMU_IS_ALIGNED(start, gran)); - assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); - --- -2.23.0 diff --git a/hmp-vnc-Fix-info-vnc-list-leak.patch b/hmp-vnc-Fix-info-vnc-list-leak.patch deleted file mode 100644 index ccc4e1db511a18c5da864a1d8b2732e9a4cd8a1f..0000000000000000000000000000000000000000 --- a/hmp-vnc-Fix-info-vnc-list-leak.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 6cb599f75b7844aefd7823ad97fc3bae70eff11f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 23 Mar 2020 12:08:22 +0000 -Subject: [PATCH 06/14] hmp/vnc: Fix info vnc list leak - -We're iterating the list, and then freeing the iteration pointer rather -than the list head. - -Fixes: 0a9667ecdb6d ("hmp: Update info vnc") -Reported-by: Coverity (CID 1421932) -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200323120822.51266-1-dgilbert@redhat.com> -Reviewed-by: Peter Maydell -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Peng Liang ---- - monitor/hmp-cmds.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 5ca3ebe94272..fc5d6b92c4b6 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -745,10 +745,11 @@ static void hmp_info_vnc_servers(Monitor *mon, VncServerInfo2List *server) - - void hmp_info_vnc(Monitor *mon, const QDict *qdict) - { -- VncInfo2List *info2l; -+ VncInfo2List *info2l, *info2l_head; - Error *err = NULL; - - info2l = qmp_query_vnc_servers(&err); -+ info2l_head = info2l; - if (err) { - hmp_handle_error(mon, &err); - return; -@@ -777,7 +778,7 @@ void hmp_info_vnc(Monitor *mon, const QDict *qdict) - info2l = info2l->next; - } - -- qapi_free_VncInfo2List(info2l); -+ qapi_free_VncInfo2List(info2l_head); - - } - #endif --- -2.26.2 - diff --git a/hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch b/hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch deleted file mode 100644 index 0cbda2bf538fdf6eccab11faa319bc9a31be12f9..0000000000000000000000000000000000000000 --- a/hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch +++ /dev/null @@ -1,61 +0,0 @@ -From f14505f7f91edbce738202a6f658806d1074116c Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 11 Dec 2020 17:28:39 +0800 -Subject: [PATCH] hostmem: Fix up free host_nodes list right after visited - -In host_memory_backend_get_host_nodes, we build host_nodes -list and output it to v (a StringOutputVisitor) but forget -to free the list. This fixes the memory leak. - -The memory leak stack: - -Direct leak of 32 byte(s) in 2 object(s) allocated from: - #0 0xfffda30b3393 in __interceptor_calloc (/usr/lib64/libasan.so.4+0xd3393) - #1 0xfffda1d28b9b in g_malloc0 (/usr/lib64/libglib-2.0.so.0+0x58b9b) - #2 0xaaab05ca6e43 in host_memory_backend_get_host_nodes backends/hostmem.c:94 - #3 0xaaab061ddf83 in object_property_get_uint16List qom/object.c:1478 - #4 0xaaab05866513 in query_memdev hw/core/machine-qmp-cmds.c:312 - #5 0xaaab061d980b in do_object_child_foreach qom/object.c:1001 - #6 0xaaab0586779b in qmp_query_memdev hw/core/machine-qmp-cmds.c:328 - #7 0xaaab0615ed3f in qmp_marshal_query_memdev qapi/qapi-commands-machine.c:327 - #8 0xaaab0632d647 in do_qmp_dispatch qapi/qmp-dispatch.c:147 - #9 0xaaab0632d647 in qmp_dispatch qapi/qmp-dispatch.c:190 - #10 0xaaab0610f74b in monitor_qmp_dispatch monitor/qmp.c:120 - #11 0xaaab0611074b in monitor_qmp_bh_dispatcher monitor/qmp.c:209 - #12 0xaaab063caefb in aio_bh_poll util/async.c:117 - #13 0xaaab063d30fb in aio_dispatch util/aio-posix.c:459 - #14 0xaaab063cac8f in aio_ctx_dispatch util/async.c:268 - #15 0xfffda1d22a6b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a6b) - #16 0xaaab063d0e97 in glib_pollfds_poll util/main-loop.c:218 - #17 0xaaab063d0e97 in os_host_main_loop_wait util/main-loop.c:241 - #18 0xaaab063d0e97 in main_loop_wait util/main-loop.c:517 - #19 0xaaab05c8bfa7 in main_loop /root/rpmbuild/BUILD/qemu-4.1.0/vl.c:1791 - #20 0xaaab05713bc3 in main /root/rpmbuild/BUILD/qemu-4.1.0/vl.c:4473 - #21 0xfffda0a83ebf in __libc_start_main (/usr/lib64/libc.so.6+0x23ebf) - #22 0xaaab0571ed5f (aarch64-softmmu/qemu-system-aarch64+0x88ed5f) -SUMMARY: AddressSanitizer: 32 byte(s) leaked in 2 allocation(s). - -Fixes: 4cf1b76bf1e2 (hostmem: add properties for NUMA memory policy) -Reported-by: Euler Robot -Tested-by: Chen Qun -Reviewed-by: Igor Mammedov -Signed-off-by: Keqian Zhu ---- - backends/hostmem.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/backends/hostmem.c b/backends/hostmem.c -index 463102aa15..9e1b3a0afc 100644 ---- a/backends/hostmem.c -+++ b/backends/hostmem.c -@@ -108,6 +108,7 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, - - ret: - visit_type_uint16List(v, name, &host_nodes, errp); -+ qapi_free_uint16List(host_nodes); - } - - static void --- -2.27.0 - diff --git a/hppa-fix-leak-from-g_strdup_printf.patch b/hppa-fix-leak-from-g_strdup_printf.patch deleted file mode 100644 index b04193e380fe58ef14e91cb56d162abc264dce9b..0000000000000000000000000000000000000000 --- a/hppa-fix-leak-from-g_strdup_printf.patch +++ /dev/null @@ -1,54 +0,0 @@ -From b7ef7e6fb5a2b08268f4b19c07c07abd4fbb2064 Mon Sep 17 00:00:00 2001 -From: lizhengui -Date: Wed, 9 Sep 2020 14:48:49 +0800 -Subject: [PATCH] hppa: fix leak from g_strdup_printf -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -memory_region_init_* takes care of copying the name into memory it owns. -Free it in the caller. - -Signed-off-by: Paolo Bonzini -Reviewed-by: Philippe Mathieu-Daudé ---- - hw/hppa/dino.c | 1 + - hw/hppa/machine.c | 4 +++- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c -index e94614ab..ef923b49 100644 ---- a/hw/hppa/dino.c -+++ b/hw/hppa/dino.c -@@ -485,6 +485,7 @@ PCIBus *dino_init(MemoryRegion *addr_space, - memory_region_init_alias(&s->pci_mem_alias[i], OBJECT(s), - name, &s->pci_mem, addr, - DINO_MEM_CHUNK_SIZE); -+ g_free(name); - } - - /* Set up PCI view of memory: Bus master address space. */ -diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c -index 662838d8..9e25660e 100644 ---- a/hw/hppa/machine.c -+++ b/hw/hppa/machine.c -@@ -78,13 +78,15 @@ static void machine_hppa_init(MachineState *machine) - - /* Create CPUs. */ - for (i = 0; i < smp_cpus; i++) { -+ char *name = g_strdup_printf("cpu%ld-io-eir", i); - cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type)); - - cpu_region = g_new(MemoryRegion, 1); - memory_region_init_io(cpu_region, OBJECT(cpu[i]), &hppa_io_eir_ops, -- cpu[i], g_strdup_printf("cpu%ld-io-eir", i), 4); -+ cpu[i], name, 4); - memory_region_add_subregion(addr_space, CPU_HPA + i * 0x1000, - cpu_region); -+ g_free(name); - } - - /* Limit main memory. */ --- -2.19.1 - diff --git a/hvf-arm-Do-not-advance-PC-when-raising-an-exception.patch b/hvf-arm-Do-not-advance-PC-when-raising-an-exception.patch new file mode 100644 index 0000000000000000000000000000000000000000..a0970ce694995ff111cf70225e2ac985f243e78a --- /dev/null +++ b/hvf-arm-Do-not-advance-PC-when-raising-an-exception.patch @@ -0,0 +1,39 @@ +From 550d304465b366a116e02d2cb006475ea453a98a Mon Sep 17 00:00:00 2001 +From: guping +Date: Mon, 22 Jul 2024 00:37:30 +0000 +Subject: [PATCH] hvf: arm: Do not advance PC when raising an exception + cherry-pick from 30a1690f2402e6c1582d5b3ebcf7940bfe2fad4b + +hvf did not advance PC when raising an exception for most unhandled +system registers, but it mistakenly advanced PC when raising an +exception for GICv3 registers. + +Cc: qemu-stable@nongnu.org +Fixes: a2260983 + + ("hvf: arm: Add support for GICv3") +Signed-off-by: default avatarAkihiko Odaki +Message-id: 20240716-pmu-v3-4-8c7c1858a227@daynix.com +Reviewed-by: default avatarPeter Maydell +Signed-off-by: default avatarPeter Maydell + +Signed-off-by: guping +--- + target/arm/hvf/hvf.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c +index 757e13b0f9..b4e98a99e2 100644 +--- a/target/arm/hvf/hvf.c ++++ b/target/arm/hvf/hvf.c +@@ -1272,6 +1272,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) + /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */ + if (!hvf_sysreg_read_cp(cpu, reg, &val)) { + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); ++ return 1; + } + break; + case SYSREG_DBGBVR0_EL1: +-- +2.41.0.windows.1 + diff --git a/hvf-arm-Fix-encodings-for-ID_AA64PFR1_EL1-and-debug-.patch b/hvf-arm-Fix-encodings-for-ID_AA64PFR1_EL1-and-debug-.patch new file mode 100644 index 0000000000000000000000000000000000000000..a7f2fde7e7a2e4eb3fcc03c75e14b678fae74d2c --- /dev/null +++ b/hvf-arm-Fix-encodings-for-ID_AA64PFR1_EL1-and-debug-.patch @@ -0,0 +1,211 @@ +From ab7c657e05f896600c310c74e7584fc345ff235c Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Thu, 23 May 2024 16:06:19 +0100 +Subject: [PATCH] hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System + registers + +We wrongly encoded ID_AA64PFR1_EL1 using {3,0,0,4,2} in hvf_sreg_match[] so +we fail to get the expected ARMCPRegInfo from cp_regs hash table with the +wrong key. + +Fix it with the correct encoding {3,0,0,4,1}. With that fixed, the Linux +guest can properly detect FEAT_SSBS2 on my M1 HW. + +All DBG{B,W}{V,C}R_EL1 registers are also wrongly encoded with op0 == 14. +It happens to work because HVF_SYSREG(CRn, CRm, 14, op1, op2) equals to +HVF_SYSREG(CRn, CRm, 2, op1, op2), by definition. But we shouldn't rely on +it. + +Cc: qemu-stable@nongnu.org +Fixes: a1477da3ddeb ("hvf: Add Apple Silicon support") +Signed-off-by: Zenghui Yu +Reviewed-by: Alexander Graf +Message-id: 20240503153453.54389-1-zenghui.yu@linux.dev +Signed-off-by: Peter Maydell +(cherry picked from commit 19ed42e8adc87a3c739f61608b66a046bb9237e2) +Signed-off-by: zhujun2 +--- + target/arm/hvf/hvf.c | 160 +++++++++++++++++++++---------------------- + 1 file changed, 80 insertions(+), 80 deletions(-) + +diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c +index b4e98a99e2..d7cc00a084 100644 +--- a/target/arm/hvf/hvf.c ++++ b/target/arm/hvf/hvf.c +@@ -392,85 +392,85 @@ struct hvf_sreg_match { + }; + + static struct hvf_sreg_match hvf_sreg_match[] = { +- { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) }, +- +- { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) }, +- { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) }, +- { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) }, +- { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) }, ++ { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) }, ++ ++ { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) }, ++ { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) }, ++ { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) }, ++ { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) }, + + #ifdef SYNC_NO_RAW_REGS + /* +@@ -482,7 +482,7 @@ static struct hvf_sreg_match hvf_sreg_match[] = { + { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) }, + { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) }, + #endif +- { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) }, ++ { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) }, + { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) }, + { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) }, + { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) }, +-- +2.41.0.windows.1 + diff --git a/hvf-remove-unused-but-set-variable.patch b/hvf-remove-unused-but-set-variable.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d420243217ce943079ff43ee1801ac475493a84 --- /dev/null +++ b/hvf-remove-unused-but-set-variable.patch @@ -0,0 +1,57 @@ +From 885c1bf512582757f9d7e2e360701f72a9d6e95f Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 12 Dec 2024 11:27:23 +0800 +Subject: [PATCH] hvf: remove unused but set variable + +cheery-pick from 19d542cc0bce0b3641e80444374f9ffd8294a15b + +fixes associated warning when building on MacOS. + +Signed-off-by: Pierrick Bouvier +Link: https://lore.kernel.org/r/20241023182922.1040964-1-pierrick.bouvier@linaro.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Zhang Jiao +--- + target/i386/hvf/x86_task.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c +index f09bfbdda5..cdea2ea69d 100644 +--- a/target/i386/hvf/x86_task.c ++++ b/target/i386/hvf/x86_task.c +@@ -122,7 +122,6 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea + load_regs(cpu); + + struct x86_segment_descriptor curr_tss_desc, next_tss_desc; +- int ret; + x68_segment_selector old_tss_sel = vmx_read_segment_selector(cpu, R_TR); + uint64_t old_tss_base = vmx_read_segment_base(cpu, R_TR); + uint32_t desc_limit; +@@ -138,7 +137,7 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea + if (reason == TSR_IDT_GATE && gate_valid) { + int dpl; + +- ret = x86_read_call_gate(cpu, &task_gate_desc, gate); ++ x86_read_call_gate(cpu, &task_gate_desc, gate); + + dpl = task_gate_desc.dpl; + x68_segment_selector cs = vmx_read_segment_selector(cpu, R_CS); +@@ -167,11 +166,12 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea + x86_write_segment_descriptor(cpu, &next_tss_desc, tss_sel); + } + +- if (next_tss_desc.type & 8) +- ret = task_switch_32(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc); +- else ++ if (next_tss_desc.type & 8) { ++ task_switch_32(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc); ++ } else { + //ret = task_switch_16(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc); + VM_PANIC("task_switch_16"); ++ } + + macvm_set_cr0(cpu->accel->fd, rvmcs(cpu->accel->fd, VMCS_GUEST_CR0) | + CR0_TS_MASK); +-- +2.41.0.windows.1 + diff --git a/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch b/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch new file mode 100644 index 0000000000000000000000000000000000000000..34fa1c91b9d0e375f7f85a4477b3d897b63d936f --- /dev/null +++ b/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch @@ -0,0 +1,187 @@ +From 19a8fbccbc997110f472df308813ad2d7738065c Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 14 Nov 2022 02:25:28 +0000 +Subject: [PATCH] hw/acpi: ACPI/AML Changes to reflect the correct + _STA.{PRES,ENA} Bits to Guest + +ACPI AML changes to properly reflect the _STA.PRES and _STA.ENA Bits to the +guest during initialzation, when CPUs are hotplugged and after CPUs are +hot-unplugged. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 49 +++++++++++++++++++++++++++++++--- + hw/acpi/generic_event_device.c | 11 ++++++++ + include/hw/acpi/cpu.h | 2 ++ + 3 files changed, 58 insertions(+), 4 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index c8c11e51c6..991f1d4181 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -64,10 +64,11 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) + cdev = &cpu_st->devs[cpu_st->selector]; + switch (addr) { + case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ +- val |= cdev->cpu ? 1 : 0; ++ val |= cdev->is_enabled ? 1 : 0; + val |= cdev->is_inserting ? 2 : 0; + val |= cdev->is_removing ? 4 : 0; + val |= cdev->fw_remove ? 16 : 0; ++ val |= cdev->is_present ? 32 : 0; + trace_cpuhp_acpi_read_flags(cpu_st->selector, val); + break; + case ACPI_CPU_CMD_DATA_OFFSET_RW: +@@ -229,7 +230,21 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + struct CPUState *cpu = CPU(id_list->cpus[i].cpu); + if (qemu_present_cpu(cpu)) { + state->devs[i].cpu = cpu; ++ state->devs[i].is_present = true; ++ } else { ++ if (qemu_persistent_cpu(cpu)) { ++ state->devs[i].is_present = true; ++ } else { ++ state->devs[i].is_present = false; ++ } + } ++ ++ if (qemu_enabled_cpu(cpu)) { ++ state->devs[i].is_enabled = true; ++ } else { ++ state->devs[i].is_enabled = false; ++ } ++ + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +@@ -262,6 +277,8 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + } + + cdev->cpu = CPU(dev); ++ cdev->is_present = true; ++ cdev->is_enabled = true; + if (dev->hotplugged) { + cdev->is_inserting = true; + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); +@@ -293,6 +310,11 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + return; + } + ++ cdev->is_enabled = false; ++ if (!qemu_persistent_cpu(CPU(dev))) { ++ cdev->is_present = false; ++ } ++ + cdev->cpu = NULL; + } + +@@ -303,6 +325,8 @@ static const VMStateDescription vmstate_cpuhp_sts = { + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_inserting, AcpiCpuStatus), + VMSTATE_BOOL(is_removing, AcpiCpuStatus), ++ VMSTATE_BOOL(is_present, AcpiCpuStatus), ++ VMSTATE_BOOL(is_enabled, AcpiCpuStatus), + VMSTATE_UINT32(ost_event, AcpiCpuStatus), + VMSTATE_UINT32(ost_status, AcpiCpuStatus), + VMSTATE_END_OF_LIST() +@@ -340,6 +364,7 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_REMOVE_EVENT "CRMV" + #define CPU_EJECT_EVENT "CEJ0" + #define CPU_FW_EJECT_EVENT "CEJF" ++#define CPU_PRESENT "CPRS" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + build_madt_cpu_fn build_madt_cpu, +@@ -400,7 +425,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1)); + /* tell firmware to do device eject, write only */ + aml_append(field, aml_named_field(CPU_FW_EJECT_EVENT, 1)); +- aml_append(field, aml_reserved_field(3)); ++ /* 1 if present, read only */ ++ aml_append(field, aml_named_field(CPU_PRESENT, 1)); ++ aml_append(field, aml_reserved_field(2)); + aml_append(field, aml_named_field(CPU_COMMAND, 8)); + aml_append(cpu_ctrl_dev, field); + +@@ -430,6 +457,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK); + Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR); + Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED); ++ Aml *is_present = aml_name("%s.%s", cphp_res_path, CPU_PRESENT); + Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND); + Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA); + Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT); +@@ -458,13 +486,26 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + { + Aml *idx = aml_arg(0); + Aml *sta = aml_local(0); ++ Aml *ifctx2; ++ Aml *else_ctx; + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(idx, cpu_selector)); + aml_append(method, aml_store(zero, sta)); +- ifctx = aml_if(aml_equal(is_enabled, one)); ++ ifctx = aml_if(aml_equal(is_present, one)); + { +- aml_append(ifctx, aml_store(aml_int(0xF), sta)); ++ ifctx2 = aml_if(aml_equal(is_enabled, one)); ++ { ++ /* cpu is present and enabled */ ++ aml_append(ifctx2, aml_store(aml_int(0xF), sta)); ++ } ++ aml_append(ifctx, ifctx2); ++ else_ctx = aml_else(); ++ { ++ /* cpu is present but disabled */ ++ aml_append(else_ctx, aml_store(aml_int(0xD), sta)); ++ } ++ aml_append(ifctx, else_ctx); + } + aml_append(method, ifctx); + aml_append(method, aml_release(ctrl_lock)); +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index d2fa1d0e4a..b84602b238 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -319,6 +319,16 @@ static const VMStateDescription vmstate_memhp_state = { + } + }; + ++static const VMStateDescription vmstate_cpuhp_state = { ++ .name = "acpi-ged/cpuhp", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static const VMStateDescription vmstate_ged_state = { + .name = "acpi-ged-state", + .version_id = 1, +@@ -367,6 +377,7 @@ static const VMStateDescription vmstate_acpi_ged = { + }, + .subsections = (const VMStateDescription * []) { + &vmstate_memhp_state, ++ &vmstate_cpuhp_state, + &vmstate_ghes_state, + NULL + } +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index b31a2e50d9..fced952152 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -23,6 +23,8 @@ typedef struct AcpiCpuStatus { + uint64_t arch_id; + bool is_inserting; + bool is_removing; ++ bool is_present; ++ bool is_enabled; + bool fw_remove; + uint32_t ost_event; + uint32_t ost_status; +-- +2.27.0 + diff --git a/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch b/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch new file mode 100644 index 0000000000000000000000000000000000000000..072d4eb9bac4ea0a553e14b6e2ce85a6961cc19b --- /dev/null +++ b/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch @@ -0,0 +1,34 @@ +From e442d0f8670dc4218ab4beebe645e369f925410d Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 19 Aug 2023 00:26:20 +0000 +Subject: [PATCH] hw/acpi: Add ACPI CPU hotplug init stub + +ACPI CPU hotplug related initialization should only happend if ACPI_CPU_HOTPLUG +support has been enabled for particular architecture. Add cpu_hotplug_hw_init() +stub to avoid compilation break. + +Signed-off-by: Salil Mehta +--- + hw/acpi/acpi-cpu-hotplug-stub.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c +index 3fc4b14c26..c6c61bb9cd 100644 +--- a/hw/acpi/acpi-cpu-hotplug-stub.c ++++ b/hw/acpi/acpi-cpu-hotplug-stub.c +@@ -19,6 +19,12 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, + return; + } + ++void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, ++ CPUHotplugState *state, hwaddr base_addr) ++{ ++ return; ++} ++ + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) + { + return; +-- +2.27.0 + diff --git a/hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch b/hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch deleted file mode 100644 index dc57aa64dd12427afc1e59af0476206317065d1b..0000000000000000000000000000000000000000 --- a/hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch +++ /dev/null @@ -1,478 +0,0 @@ -From b12d9edd0079d4ee136c25e95333918b0c6d3cd9 Mon Sep 17 00:00:00 2001 -From: Samuel Ortiz -Date: Wed, 18 Sep 2019 14:06:25 +0100 -Subject: [PATCH] hw/acpi: Add ACPI Generic Event Device Support - -The ACPI Generic Event Device (GED) is a hardware-reduced specific -device[ACPI v6.1 Section 5.6.9] that handles all platform events, -including the hotplug ones. This patch generates the AML code that -defines GEDs. - -Platforms need to specify their own GED Event bitmap to describe -what kind of events they want to support through GED. Also this -uses a a single interrupt for the GED device, relying on IO -memory region to communicate the type of device affected by the -interrupt. This way, we can support up to 32 events with a unique -interrupt. - -This supports only memory hotplug for now. - -Signed-off-by: Samuel Ortiz -Signed-off-by: Sebastien Boeuf -Signed-off-by: Shameer Kolothum -Reviewed-by: Eric Auger -Message-Id: <20190918130633.4872-4-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov ---- - hw/acpi/Kconfig | 4 + - hw/acpi/Makefile.objs | 1 + - hw/acpi/generic_event_device.c | 303 +++++++++++++++++++++++++ - include/hw/acpi/generic_event_device.h | 100 ++++++++ - 4 files changed, 408 insertions(+) - create mode 100644 hw/acpi/generic_event_device.c - create mode 100644 include/hw/acpi/generic_event_device.h - -diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig -index 7c59cf900b..12e3f1e86e 100644 ---- a/hw/acpi/Kconfig -+++ b/hw/acpi/Kconfig -@@ -31,3 +31,7 @@ config ACPI_VMGENID - bool - default y - depends on PC -+ -+config ACPI_HW_REDUCED -+ bool -+ depends on ACPI -diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs -index 1a720c381e..e4b5d101a4 100644 ---- a/hw/acpi/Makefile.objs -+++ b/hw/acpi/Makefile.objs -@@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o - common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o - common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o - common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o -+common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o - common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o - - common-obj-y += acpi_interface.o -diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c -new file mode 100644 -index 0000000000..b94500b08d ---- /dev/null -+++ b/hw/acpi/generic_event_device.c -@@ -0,0 +1,303 @@ -+/* -+ * -+ * Copyright (c) 2018 Intel Corporation -+ * Copyright (c) 2019 Huawei Technologies R & D (UK) Ltd -+ * Written by Samuel Ortiz, Shameer Kolothum -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms and conditions of the GNU General Public License, -+ * version 2 or later, as published by the Free Software Foundation. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "exec/address-spaces.h" -+#include "hw/acpi/acpi.h" -+#include "hw/acpi/generic_event_device.h" -+#include "hw/irq.h" -+#include "hw/mem/pc-dimm.h" -+#include "hw/qdev-properties.h" -+#include "migration/vmstate.h" -+#include "qemu/error-report.h" -+ -+static const uint32_t ged_supported_events[] = { -+ ACPI_GED_MEM_HOTPLUG_EVT, -+}; -+ -+/* -+ * The ACPI Generic Event Device (GED) is a hardware-reduced specific -+ * device[ACPI v6.1 Section 5.6.9] that handles all platform events, -+ * including the hotplug ones. Platforms need to specify their own -+ * GED Event bitmap to describe what kind of events they want to support -+ * through GED. This routine uses a single interrupt for the GED device, -+ * relying on IO memory region to communicate the type of device -+ * affected by the interrupt. This way, we can support up to 32 events -+ * with a unique interrupt. -+ */ -+void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, -+ uint32_t ged_irq, AmlRegionSpace rs, hwaddr ged_base) -+{ -+ AcpiGedState *s = ACPI_GED(hotplug_dev); -+ Aml *crs = aml_resource_template(); -+ Aml *evt, *field; -+ Aml *dev = aml_device("%s", name); -+ Aml *evt_sel = aml_local(0); -+ Aml *esel = aml_name(AML_GED_EVT_SEL); -+ -+ /* _CRS interrupt */ -+ aml_append(crs, aml_interrupt(AML_CONSUMER, AML_EDGE, AML_ACTIVE_HIGH, -+ AML_EXCLUSIVE, &ged_irq, 1)); -+ -+ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0013"))); -+ aml_append(dev, aml_name_decl("_UID", aml_string(GED_DEVICE))); -+ aml_append(dev, aml_name_decl("_CRS", crs)); -+ -+ /* Append IO region */ -+ aml_append(dev, aml_operation_region(AML_GED_EVT_REG, rs, -+ aml_int(ged_base + ACPI_GED_EVT_SEL_OFFSET), -+ ACPI_GED_EVT_SEL_LEN)); -+ field = aml_field(AML_GED_EVT_REG, AML_DWORD_ACC, AML_NOLOCK, -+ AML_WRITE_AS_ZEROS); -+ aml_append(field, aml_named_field(AML_GED_EVT_SEL, -+ ACPI_GED_EVT_SEL_LEN * BITS_PER_BYTE)); -+ aml_append(dev, field); -+ -+ /* -+ * For each GED event we: -+ * - Add a conditional block for each event, inside a loop. -+ * - Call a method for each supported GED event type. -+ * -+ * The resulting ASL code looks like: -+ * -+ * Local0 = ESEL -+ * If ((Local0 & One) == One) -+ * { -+ * MethodEvent0() -+ * } -+ * -+ * If ((Local0 & 0x2) == 0x2) -+ * { -+ * MethodEvent1() -+ * } -+ * ... -+ */ -+ evt = aml_method("_EVT", 1, AML_SERIALIZED); -+ { -+ Aml *if_ctx; -+ uint32_t i; -+ uint32_t ged_events = ctpop32(s->ged_event_bitmap); -+ -+ /* Local0 = ESEL */ -+ aml_append(evt, aml_store(esel, evt_sel)); -+ -+ for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) { -+ uint32_t event = s->ged_event_bitmap & ged_supported_events[i]; -+ -+ if (!event) { -+ continue; -+ } -+ -+ if_ctx = aml_if(aml_equal(aml_and(evt_sel, aml_int(event), NULL), -+ aml_int(event))); -+ switch (event) { -+ case ACPI_GED_MEM_HOTPLUG_EVT: -+ aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." -+ MEMORY_SLOT_SCAN_METHOD)); -+ break; -+ default: -+ /* -+ * Please make sure all the events in ged_supported_events[] -+ * are handled above. -+ */ -+ g_assert_not_reached(); -+ } -+ -+ aml_append(evt, if_ctx); -+ ged_events--; -+ } -+ -+ if (ged_events) { -+ error_report("Unsupported events specified"); -+ abort(); -+ } -+ } -+ -+ /* Append _EVT method */ -+ aml_append(dev, evt); -+ -+ aml_append(table, dev); -+} -+ -+/* Memory read by the GED _EVT AML dynamic method */ -+static uint64_t ged_read(void *opaque, hwaddr addr, unsigned size) -+{ -+ uint64_t val = 0; -+ GEDState *ged_st = opaque; -+ -+ switch (addr) { -+ case ACPI_GED_EVT_SEL_OFFSET: -+ /* Read the selector value and reset it */ -+ val = ged_st->sel; -+ ged_st->sel = 0; -+ break; -+ default: -+ break; -+ } -+ -+ return val; -+} -+ -+/* Nothing is expected to be written to the GED memory region */ -+static void ged_write(void *opaque, hwaddr addr, uint64_t data, -+ unsigned int size) -+{ -+} -+ -+static const MemoryRegionOps ged_ops = { -+ .read = ged_read, -+ .write = ged_write, -+ .endianness = DEVICE_LITTLE_ENDIAN, -+ .valid = { -+ .min_access_size = 4, -+ .max_access_size = 4, -+ }, -+}; -+ -+static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, -+ DeviceState *dev, Error **errp) -+{ -+ AcpiGedState *s = ACPI_GED(hotplug_dev); -+ -+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -+ acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); -+ } else { -+ error_setg(errp, "virt: device plug request for unsupported device" -+ " type: %s", object_get_typename(OBJECT(dev))); -+ } -+} -+ -+static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) -+{ -+ AcpiGedState *s = ACPI_GED(adev); -+ GEDState *ged_st = &s->ged_state; -+ uint32_t sel; -+ -+ if (ev & ACPI_MEMORY_HOTPLUG_STATUS) { -+ sel = ACPI_GED_MEM_HOTPLUG_EVT; -+ } else { -+ /* Unknown event. Return without generating interrupt. */ -+ warn_report("GED: Unsupported event %d. No irq injected", ev); -+ return; -+ } -+ -+ /* -+ * Set the GED selector field to communicate the event type. -+ * This will be read by GED aml code to select the appropriate -+ * event method. -+ */ -+ ged_st->sel |= sel; -+ -+ /* Trigger the event by sending an interrupt to the guest. */ -+ qemu_irq_pulse(s->irq); -+} -+ -+static Property acpi_ged_properties[] = { -+ DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0), -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ -+static const VMStateDescription vmstate_memhp_state = { -+ .name = "acpi-ged/memhp", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .fields = (VMStateField[]) { -+ VMSTATE_MEMORY_HOTPLUG(memhp_state, AcpiGedState), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ -+static const VMStateDescription vmstate_ged_state = { -+ .name = "acpi-ged-state", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .fields = (VMStateField[]) { -+ VMSTATE_UINT32(sel, GEDState), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ -+static const VMStateDescription vmstate_acpi_ged = { -+ .name = "acpi-ged", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .fields = (VMStateField[]) { -+ VMSTATE_STRUCT(ged_state, AcpiGedState, 1, vmstate_ged_state, GEDState), -+ VMSTATE_END_OF_LIST(), -+ }, -+ .subsections = (const VMStateDescription * []) { -+ &vmstate_memhp_state, -+ NULL -+ } -+}; -+ -+static void acpi_ged_initfn(Object *obj) -+{ -+ DeviceState *dev = DEVICE(obj); -+ AcpiGedState *s = ACPI_GED(dev); -+ SysBusDevice *sbd = SYS_BUS_DEVICE(obj); -+ GEDState *ged_st = &s->ged_state; -+ -+ memory_region_init_io(&ged_st->io, obj, &ged_ops, ged_st, -+ TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); -+ sysbus_init_mmio(sbd, &ged_st->io); -+ -+ sysbus_init_irq(sbd, &s->irq); -+ -+ s->memhp_state.is_enabled = true; -+ /* -+ * GED handles memory hotplug event and acpi-mem-hotplug -+ * memory region gets initialized here. Create an exclusive -+ * container for memory hotplug IO and expose it as GED sysbus -+ * MMIO so that boards can map it separately. -+ */ -+ memory_region_init(&s->container_memhp, OBJECT(dev), "memhp container", -+ MEMORY_HOTPLUG_IO_LEN); -+ sysbus_init_mmio(sbd, &s->container_memhp); -+ acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), -+ &s->memhp_state, 0); -+} -+ -+static void acpi_ged_class_init(ObjectClass *class, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(class); -+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(class); -+ AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class); -+ -+ dc->desc = "ACPI Generic Event Device"; -+ dc->props = acpi_ged_properties; -+ dc->vmsd = &vmstate_acpi_ged; -+ -+ hc->plug = acpi_ged_device_plug_cb; -+ -+ adevc->send_event = acpi_ged_send_event; -+} -+ -+static const TypeInfo acpi_ged_info = { -+ .name = TYPE_ACPI_GED, -+ .parent = TYPE_SYS_BUS_DEVICE, -+ .instance_size = sizeof(AcpiGedState), -+ .instance_init = acpi_ged_initfn, -+ .class_init = acpi_ged_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_HOTPLUG_HANDLER }, -+ { TYPE_ACPI_DEVICE_IF }, -+ { } -+ } -+}; -+ -+static void acpi_ged_register_types(void) -+{ -+ type_register_static(&acpi_ged_info); -+} -+ -+type_init(acpi_ged_register_types) -diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h -new file mode 100644 -index 0000000000..2049e8d873 ---- /dev/null -+++ b/include/hw/acpi/generic_event_device.h -@@ -0,0 +1,100 @@ -+/* -+ * -+ * Copyright (c) 2018 Intel Corporation -+ * Copyright (c) 2019 Huawei Technologies R & D (UK) Ltd -+ * Written by Samuel Ortiz, Shameer Kolothum -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms and conditions of the GNU General Public License, -+ * version 2 or later, as published by the Free Software Foundation. -+ * -+ * The ACPI Generic Event Device (GED) is a hardware-reduced specific -+ * device[ACPI v6.1 Section 5.6.9] that handles all platform events, -+ * including the hotplug ones. Generic Event Device allows platforms -+ * to handle interrupts in ACPI ASL statements. It follows a very -+ * similar approach like the _EVT method from GPIO events. All -+ * interrupts are listed in _CRS and the handler is written in _EVT -+ * method. Here, we use a single interrupt for the GED device, relying -+ * on IO memory region to communicate the type of device affected by -+ * the interrupt. This way, we can support up to 32 events with a -+ * unique interrupt. -+ * -+ * Here is an example. -+ * -+ * Device (\_SB.GED) -+ * { -+ * Name (_HID, "ACPI0013") -+ * Name (_UID, Zero) -+ * Name (_CRS, ResourceTemplate () -+ * { -+ * Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) -+ * { -+ * 0x00000029, -+ * } -+ * }) -+ * OperationRegion (EREG, SystemMemory, 0x09080000, 0x04) -+ * Field (EREG, DWordAcc, NoLock, WriteAsZeros) -+ * { -+ * ESEL, 32 -+ * } -+ * -+ * Method (_EVT, 1, Serialized) // _EVT: Event -+ * { -+ * Local0 = ESEL // ESEL = IO memory region which specifies the -+ * // device type. -+ * If (((Local0 & One) == One)) -+ * { -+ * MethodEvent1() -+ * } -+ * If ((Local0 & 0x2) == 0x2) -+ * { -+ * MethodEvent2() -+ * } -+ * ... -+ * } -+ * } -+ * -+ */ -+ -+#ifndef HW_ACPI_GED_H -+#define HW_ACPI_GED_H -+ -+#include "hw/sysbus.h" -+#include "hw/acpi/memory_hotplug.h" -+ -+#define TYPE_ACPI_GED "acpi-ged" -+#define ACPI_GED(obj) \ -+ OBJECT_CHECK(AcpiGedState, (obj), TYPE_ACPI_GED) -+ -+#define ACPI_GED_EVT_SEL_OFFSET 0x0 -+#define ACPI_GED_EVT_SEL_LEN 0x4 -+ -+#define GED_DEVICE "GED" -+#define AML_GED_EVT_REG "EREG" -+#define AML_GED_EVT_SEL "ESEL" -+ -+/* -+ * Platforms need to specify the GED event bitmap -+ * to describe what kind of events they want to support -+ * through GED. -+ */ -+#define ACPI_GED_MEM_HOTPLUG_EVT 0x1 -+ -+typedef struct GEDState { -+ MemoryRegion io; -+ uint32_t sel; -+} GEDState; -+ -+typedef struct AcpiGedState { -+ SysBusDevice parent_obj; -+ MemHotplugState memhp_state; -+ MemoryRegion container_memhp; -+ GEDState ged_state; -+ uint32_t ged_event_bitmap; -+ qemu_irq irq; -+} AcpiGedState; -+ -+void build_ged_aml(Aml *table, const char* name, HotplugHandler *hotplug_dev, -+ uint32_t ged_irq, AmlRegionSpace rs, hwaddr ged_base); -+ -+#endif --- -2.19.1 diff --git a/hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch b/hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch deleted file mode 100644 index 292687199fbe797b86b25ac2fb56063f7080a0f0..0000000000000000000000000000000000000000 --- a/hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 7a08983315bf4d624966a89112259e2b4949de91 Mon Sep 17 00:00:00 2001 -From: Samuel Ortiz -Date: Wed, 18 Sep 2019 14:06:24 +0100 -Subject: [PATCH] hw/acpi: Do not create memory hotplug method when handler is - not defined - -With Hardware-reduced ACPI, the GED device will manage ACPI -hotplug entirely. As a consequence, make the memory specific -events AML generation optional. The code will only be added -when the method name is not NULL. - -Signed-off-by: Samuel Ortiz -Signed-off-by: Shameer Kolothum -Reviewed-by: Eric Auger -Reviewed-by: Igor Mammedov -Message-Id: <20190918130633.4872-3-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/acpi/memory_hotplug.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c -index 9a515c0484..8b30356c1a 100644 ---- a/hw/acpi/memory_hotplug.c -+++ b/hw/acpi/memory_hotplug.c -@@ -711,10 +711,12 @@ void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, - } - aml_append(table, dev_container); - -- method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); -- aml_append(method, -- aml_call0(MEMORY_DEVICES_CONTAINER "." MEMORY_SLOT_SCAN_METHOD)); -- aml_append(table, method); -+ if (event_handler_method) { -+ method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); -+ aml_append(method, aml_call0(MEMORY_DEVICES_CONTAINER "." -+ MEMORY_SLOT_SCAN_METHOD)); -+ aml_append(table, method); -+ } - - g_free(mhp_res_path); - } --- -2.19.1 diff --git a/hw-acpi-Fix-the-memory-leak-issue.patch b/hw-acpi-Fix-the-memory-leak-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..614f03008ae8c38da717cec1786cc0e5545290fe --- /dev/null +++ b/hw-acpi-Fix-the-memory-leak-issue.patch @@ -0,0 +1,50 @@ +From 569786d7c883154effcb215bd74f30f680f9e540 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Wed, 30 Jul 2025 16:03:57 +0800 +Subject: [PATCH] hw/acpi: Fix the memory leak issue + +During the creation process of the acpi ged device, +the function cpu_hotplug_hw_init was wrongly called multiple times, +resulting in a memory leak. + +Now, delete the redundant calls of the function cpu_hotplug_hw_init +to solve the memory leak problem. + +Fixes: ac96f2161550 ("hw/acpi: Update ACPI GED framework to support vCPU Hotplug") +Fixes: 6e17d32d6df2 ("acpi/ged: Init cpu hotplug only when machine support it") +Signed-off-by: Xianglai Li +--- + hw/acpi/generic_event_device.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 755653dc26..61a4c9e643 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -439,7 +439,6 @@ static void acpi_ged_initfn(Object *obj) + AcpiGedState *s = ACPI_GED(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + GEDState *ged_st = &s->ged_state; +- MachineClass *mc; + + memory_region_init_io(&ged_st->evt, obj, &ged_evt_ops, ged_st, + TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); +@@ -463,15 +462,6 @@ static void acpi_ged_initfn(Object *obj) + memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st, + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); +- +- mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (mc->possible_cpu_arch_ids) { +- memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", +- ACPI_CPU_HOTPLUG_REG_LEN); +- sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); +- cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), +- &s->cpuhp_state, 0); +- } + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +-- +2.33.0 + diff --git a/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch b/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch new file mode 100644 index 0000000000000000000000000000000000000000..191328fd094c386d672ca9c32341105fa1cc0b1a --- /dev/null +++ b/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch @@ -0,0 +1,81 @@ +From de1c8d6be3de67ff9854e9b008a000e1898aaacb Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 8 Jun 2020 21:50:08 +0100 +Subject: [PATCH] hw/acpi: Init GED framework with cpu hotplug events + +ACPI GED(as described in the ACPI 6.2 spec) can be used to generate ACPI events +when OSPM/guest receives an interrupt listed in the _CRS object of GED. OSPM +then maps or demultiplexes the event by evaluating _EVT method. + +This change adds the support of cpu hotplug event initialization in the +existing GED framework. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 8 ++++++++ + include/hw/acpi/generic_event_device.h | 5 +++++ + 2 files changed, 13 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index a3d31631fe..d2fa1d0e4a 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -25,6 +25,7 @@ static const uint32_t ged_supported_events[] = { + ACPI_GED_MEM_HOTPLUG_EVT, + ACPI_GED_PWR_DOWN_EVT, + ACPI_GED_NVDIMM_HOTPLUG_EVT, ++ ACPI_GED_CPU_HOTPLUG_EVT, + }; + + /* +@@ -400,6 +401,13 @@ static void acpi_ged_initfn(Object *obj) + memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st, + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); ++ ++ s->cpuhp.device = OBJECT(s); ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index ba84ce0214..a803ea818e 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -60,6 +60,7 @@ + #define HW_ACPI_GENERIC_EVENT_DEVICE_H + + #include "hw/sysbus.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/acpi/memory_hotplug.h" + #include "hw/acpi/ghes.h" + #include "qom/object.h" +@@ -95,6 +96,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) + #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 + #define ACPI_GED_PWR_DOWN_EVT 0x2 + #define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4 ++#define ACPI_GED_CPU_HOTPLUG_EVT 0x8 + + typedef struct GEDState { + MemoryRegion evt; +@@ -106,6 +108,9 @@ struct AcpiGedState { + SysBusDevice parent_obj; + MemHotplugState memhp_state; + MemoryRegion container_memhp; ++ CPUHotplugState cpuhp_state; ++ MemoryRegion container_cpuhp; ++ AcpiCpuHotplug cpuhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; +-- +2.27.0 + diff --git a/hw-acpi-Make-ACPI-IO-address-space-configurable.patch b/hw-acpi-Make-ACPI-IO-address-space-configurable.patch deleted file mode 100644 index cdf597b51566400b17ddac3e27dc93cb65a61bd6..0000000000000000000000000000000000000000 --- a/hw-acpi-Make-ACPI-IO-address-space-configurable.patch +++ /dev/null @@ -1,196 +0,0 @@ -From 6cd7281c73ca462b2f27969f1e28f1afd3ebe82d Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:23 +0100 -Subject: [PATCH] hw/acpi: Make ACPI IO address space configurable - -This is in preparation for adding support for ARM64 platforms -where it doesn't use port mapped IO for ACPI IO space. We are -making changes so that MMIO region can be accommodated -and board can pass the base address into the aml build function. - -Also move few MEMORY_* definitions to header so that other memory -hotplug event signalling mechanisms (eg. Generic Event Device on -HW-reduced acpi platforms) can use the same from their respective -event handler code. - -Signed-off-by: Shameer Kolothum -Reviewed-by: Eric Auger -Reviewed-by: Igor Mammedov -Message-Id: <20190918130633.4872-2-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/acpi/memory_hotplug.c | 33 ++++++++++++++------------------ - hw/i386/acpi-build.c | 7 ++++++- - hw/i386/pc.c | 3 +++ - include/hw/acpi/memory_hotplug.h | 9 +++++++-- - include/hw/i386/pc.h | 3 +++ - 5 files changed, 33 insertions(+), 22 deletions(-) - -diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c -index 297812d5f7..9a515c0484 100644 ---- a/hw/acpi/memory_hotplug.c -+++ b/hw/acpi/memory_hotplug.c -@@ -29,12 +29,7 @@ - #define MEMORY_SLOT_PROXIMITY_METHOD "MPXM" - #define MEMORY_SLOT_EJECT_METHOD "MEJ0" - #define MEMORY_SLOT_NOTIFY_METHOD "MTFY" --#define MEMORY_SLOT_SCAN_METHOD "MSCN" - #define MEMORY_HOTPLUG_DEVICE "MHPD" --#define MEMORY_HOTPLUG_IO_LEN 24 --#define MEMORY_DEVICES_CONTAINER "\\_SB.MHPC" -- --static uint16_t memhp_io_base; - - static ACPIOSTInfo *acpi_memory_device_status(int slot, MemStatus *mdev) - { -@@ -209,7 +204,7 @@ static const MemoryRegionOps acpi_memory_hotplug_ops = { - }; - - void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, -- MemHotplugState *state, uint16_t io_base) -+ MemHotplugState *state, hwaddr io_base) - { - MachineState *machine = MACHINE(qdev_get_machine()); - -@@ -218,12 +213,10 @@ void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, - return; - } - -- assert(!memhp_io_base); -- memhp_io_base = io_base; - state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); - memory_region_init_io(&state->io, owner, &acpi_memory_hotplug_ops, state, - "acpi-mem-hotplug", MEMORY_HOTPLUG_IO_LEN); -- memory_region_add_subregion(as, memhp_io_base, &state->io); -+ memory_region_add_subregion(as, io_base, &state->io); - } - - /** -@@ -342,7 +335,8 @@ const VMStateDescription vmstate_memory_hotplug = { - - void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, - const char *res_root, -- const char *event_handler_method) -+ const char *event_handler_method, -+ AmlRegionSpace rs, hwaddr memhp_io_base) - { - int i; - Aml *ifctx; -@@ -351,10 +345,6 @@ void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, - Aml *mem_ctrl_dev; - char *mhp_res_path; - -- if (!memhp_io_base) { -- return; -- } -- - mhp_res_path = g_strdup_printf("%s." MEMORY_HOTPLUG_DEVICE, res_root); - mem_ctrl_dev = aml_device("%s", mhp_res_path); - { -@@ -365,14 +355,19 @@ void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, - aml_name_decl("_UID", aml_string("Memory hotplug resources"))); - - crs = aml_resource_template(); -- aml_append(crs, -- aml_io(AML_DECODE16, memhp_io_base, memhp_io_base, 0, -- MEMORY_HOTPLUG_IO_LEN) -- ); -+ if (rs == AML_SYSTEM_IO) { -+ aml_append(crs, -+ aml_io(AML_DECODE16, memhp_io_base, memhp_io_base, 0, -+ MEMORY_HOTPLUG_IO_LEN) -+ ); -+ } else { -+ aml_append(crs, aml_memory32_fixed(memhp_io_base, -+ MEMORY_HOTPLUG_IO_LEN, AML_READ_WRITE)); -+ } - aml_append(mem_ctrl_dev, aml_name_decl("_CRS", crs)); - - aml_append(mem_ctrl_dev, aml_operation_region( -- MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO, -+ MEMORY_HOTPLUG_IO_REGION, rs, - aml_int(memhp_io_base), MEMORY_HOTPLUG_IO_LEN) - ); - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index f3fdfefcd5..749218561a 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -1871,7 +1871,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, - build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base, - "\\_SB.PCI0", "\\_GPE._E02"); - } -- build_memory_hotplug_aml(dsdt, nr_mem, "\\_SB.PCI0", "\\_GPE._E03"); -+ -+ if (pcms->memhp_io_base && nr_mem) { -+ build_memory_hotplug_aml(dsdt, nr_mem, "\\_SB.PCI0", -+ "\\_GPE._E03", AML_SYSTEM_IO, -+ pcms->memhp_io_base); -+ } - - scope = aml_scope("_GPE"); - { -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index d011733ff7..8a914130b0 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1936,6 +1936,9 @@ void pc_memory_init(PCMachineState *pcms, - - /* Init default IOAPIC address space */ - pcms->ioapic_as = &address_space_memory; -+ -+ /* Init ACPI memory hotplug IO base address */ -+ pcms->memhp_io_base = ACPI_MEMORY_HOTPLUG_BASE; - } - - /* -diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h -index 77c65765d6..dfe9cf3fde 100644 ---- a/include/hw/acpi/memory_hotplug.h -+++ b/include/hw/acpi/memory_hotplug.h -@@ -5,6 +5,10 @@ - #include "hw/acpi/acpi.h" - #include "hw/acpi/aml-build.h" - -+#define MEMORY_SLOT_SCAN_METHOD "MSCN" -+#define MEMORY_DEVICES_CONTAINER "\\_SB.MHPC" -+#define MEMORY_HOTPLUG_IO_LEN 24 -+ - /** - * MemStatus: - * @is_removing: the memory device in slot has been requested to be ejected. -@@ -29,7 +33,7 @@ typedef struct MemHotplugState { - } MemHotplugState; - - void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, -- MemHotplugState *state, uint16_t io_base); -+ MemHotplugState *state, hwaddr io_base); - - void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st, - DeviceState *dev, Error **errp); -@@ -48,5 +52,6 @@ void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list); - - void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, - const char *res_root, -- const char *event_handler_method); -+ const char *event_handler_method, -+ AmlRegionSpace rs, hwaddr memhp_io_base); - #endif -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 859b64c51d..49b47535cf 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -69,6 +69,9 @@ struct PCMachineState { - /* Address space used by IOAPIC device. All IOAPIC interrupts - * will be translated to MSI messages in the address space. */ - AddressSpace *ioapic_as; -+ -+ /* ACPI Memory hotplug IO base address */ -+ hwaddr memhp_io_base; - }; - - #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" --- -2.19.1 diff --git a/hw-acpi-Make-_MAT-method-optional.patch b/hw-acpi-Make-_MAT-method-optional.patch new file mode 100644 index 0000000000000000000000000000000000000000..5695a1981c6efbe5ed71981f97dcf81d8eee5e8e --- /dev/null +++ b/hw-acpi-Make-_MAT-method-optional.patch @@ -0,0 +1,41 @@ +From e9b0d476172e872bf695780a9ffa8072faeb3cd0 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 25 Apr 2022 17:40:57 +0100 +Subject: [PATCH] hw/acpi: Make _MAT method optional + +The GICC interface on arm64 vCPUs is statically defined in the MADT, and +doesn't require a _MAT entry. Although the GICC is indicated as present +by the MADT entry, it can only be used from vCPU sysregs, which aren't +accessible until hot-add. + +Co-developed-by: Jean-Philippe Brucker +Signed-off-by: Jean-Philippe Brucker +Co-developed-by: Jonathan Cameron +Signed-off-by: Jonathan Cameron +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 991f1d4181..c922c380aa 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -720,9 +720,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, method); + + /* build _MAT object */ +- build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ +- aml_append(dev, aml_name_decl("_MAT", +- aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ if (build_madt_cpu) { ++ build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ ++ aml_append(dev, aml_name_decl("_MAT", ++ aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ } + g_array_free(madt_buf, true); + + if (CPU(arch_ids->cpus[i].cpu) != first_cpu) { +-- +2.27.0 + diff --git a/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c-sync-upstream.patch b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c-sync-upstream.patch new file mode 100644 index 0000000000000000000000000000000000000000..a81b5f1806e515000954cc8b82632ad2ef52ffeb --- /dev/null +++ b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c-sync-upstream.patch @@ -0,0 +1,49 @@ +From a8416845f721aa5ba03446b3ccf83b096b7a0d77 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 16 Jul 2024 12:14:57 +0100 +Subject: [PATCH 63/78] hw/acpi: Move CPU ctrl-dev MMIO region len macro to + common header file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +CPU ctrl-dev MMIO region length could be used in ACPI GED and various other +architecture specific places. Move ACPI_CPU_HOTPLUG_REG_LEN macro to more +appropriate common header file. + +Signed-off-by: Salil Mehta +Reviewed-by: Alex Bennée +Reviewed-by: Jonathan Cameron +Reviewed-by: Gavin Shan +Reviewed-by: David Hildenbrand +Reviewed-by: Shaoqin Huang +Tested-by: Vishnu Pajjuri +Tested-by: Xianglai Li +Tested-by: Miguel Luis +Tested-by: Zhao Liu +Reviewed-by: Zhao Liu +Reviewed-by: Igor Mammedov +Message-Id: <20240716111502.202344-3-salil.mehta@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Xianglai Li +--- + include/hw/acpi/cpu.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index fced952152..fa5b5e5f01 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -18,6 +18,8 @@ + #include "hw/boards.h" + #include "hw/hotplug.h" + ++#define ACPI_CPU_HOTPLUG_REG_LEN 12 ++ + typedef struct AcpiCpuStatus { + CPUState *cpu; + uint64_t arch_id; +-- +2.39.1 + diff --git a/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..9bfb91e15162c90365c621927205f857269bc4c3 --- /dev/null +++ b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch @@ -0,0 +1,52 @@ +From fd6e7e7278e1c0fb08e0a09d9e22157e11b36ece Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 20 Aug 2023 17:11:04 +0000 +Subject: [PATCH] hw/acpi: Move CPU ctrl-dev MMIO region len macro to common + header file + +CPU ctrl-dev MMIO region length could be used in ACPI GED (common ACPI code +across architectures) and various other architecture specific places. To make +these code places independent of compilation order, ACPI_CPU_HOTPLUG_REG_LEN +macro should be moved to a header file. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 2 +- + include/hw/acpi/cpu_hotplug.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 011d2c6c2d..4b24a25003 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -1,13 +1,13 @@ + #include "qemu/osdep.h" + #include "migration/vmstate.h" + #include "hw/acpi/cpu.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/core/cpu.h" + #include "qapi/error.h" + #include "qapi/qapi-events-acpi.h" + #include "trace.h" + #include "sysemu/numa.h" + +-#define ACPI_CPU_HOTPLUG_REG_LEN 12 + #define ACPI_CPU_SELECTOR_OFFSET_WR 0 + #define ACPI_CPU_FLAGS_OFFSET_RW 4 + #define ACPI_CPU_CMD_OFFSET_WR 5 +diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h +index 3b932abbbb..48b291e45e 100644 +--- a/include/hw/acpi/cpu_hotplug.h ++++ b/include/hw/acpi/cpu_hotplug.h +@@ -19,6 +19,8 @@ + #include "hw/hotplug.h" + #include "hw/acpi/cpu.h" + ++#define ACPI_CPU_HOTPLUG_REG_LEN 12 ++ + typedef struct AcpiCpuHotplug { + Object *device; + MemoryRegion io; +-- +2.27.0 + diff --git a/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho-sync-upstream.patch b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho-sync-upstream.patch new file mode 100644 index 0000000000000000000000000000000000000000..41bb4e97dabd2fb0c66f448cb2362301a271f883 --- /dev/null +++ b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho-sync-upstream.patch @@ -0,0 +1,128 @@ +From ac96f216155002d0c874ff88e301e83495093085 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 16 Jul 2024 12:14:58 +0100 +Subject: [PATCH 64/78] hw/acpi: Update ACPI GED framework to support vCPU + Hotplug + +ACPI GED (as described in the ACPI 6.4 spec) uses an interrupt listed in the +_CRS object of GED to intimate OSPM about an event. Later then demultiplexes the +notified event by evaluating ACPI _EVT method to know the type of event. Use +ACPI GED to also notify the guest kernel about any CPU hot(un)plug events. + +Note, GED interface is used by many hotplug events like memory hotplug, NVDIMM +hotplug and non-hotplug events like system power down event. Each of these can +be selected using a bit in the 32 bit GED IO interface. A bit has been reserved +for the CPU hotplug event. + +ACPI CPU hotplug related initialization should only happen if ACPI_CPU_HOTPLUG +support has been enabled for particular architecture. Add cpu_hotplug_hw_init() +stub to avoid compilation break. + +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +Reviewed-by: Jonathan Cameron +Reviewed-by: Gavin Shan +Reviewed-by: David Hildenbrand +Reviewed-by: Shaoqin Huang +Tested-by: Vishnu Pajjuri +Tested-by: Xianglai Li +Tested-by: Miguel Luis +Reviewed-by: Vishnu Pajjuri +Tested-by: Zhao Liu +Reviewed-by: Zhao Liu +Message-Id: <20240716111502.202344-4-salil.mehta@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Igor Mammedov +Signed-off-by: Xianglai Li +--- + docs/specs/acpi_hw_reduced_hotplug.rst | 3 ++- + hw/acpi/generic_event_device.c | 37 ++++++++++++++++++++++++++ + include/hw/acpi/generic_event_device.h | 1 + + 3 files changed, 40 insertions(+), 1 deletion(-) + +diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst +index 0bd3f9399f..3acd6fcd8b 100644 +--- a/docs/specs/acpi_hw_reduced_hotplug.rst ++++ b/docs/specs/acpi_hw_reduced_hotplug.rst +@@ -64,7 +64,8 @@ GED IO interface (4 byte access) + 0: Memory hotplug event + 1: System power down event + 2: NVDIMM hotplug event +- 3-31: Reserved ++ 3: CPU hotplug event ++ 4-31: Reserved + + **write_access:** + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 2ce7031f1a..755653dc26 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -397,6 +397,42 @@ static const VMStateDescription vmstate_acpi_ged = { + } + }; + ++static void acpi_ged_realize(DeviceState *dev, Error **errp) ++{ ++ SysBusDevice *sbd = SYS_BUS_DEVICE(dev); ++ AcpiGedState *s = ACPI_GED(dev); ++ uint32_t ged_events; ++ int i; ++ ++ ged_events = ctpop32(s->ged_event_bitmap); ++ ++ for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) { ++ uint32_t event = s->ged_event_bitmap & ged_supported_events[i]; ++ ++ if (!event) { ++ continue; ++ } ++ ++ switch (event) { ++ case ACPI_GED_CPU_HOTPLUG_EVT: ++ /* initialize CPU Hotplug related regions */ ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), ++ "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(sbd, &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); ++ break; ++ } ++ ged_events--; ++ } ++ ++ if (ged_events) { ++ error_report("Unsupported events specified"); ++ abort(); ++ } ++} ++ + static void acpi_ged_initfn(Object *obj) + { + DeviceState *dev = DEVICE(obj); +@@ -447,6 +483,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) + dc->desc = "ACPI Generic Event Device"; + device_class_set_props(dc, acpi_ged_properties); + dc->vmsd = &vmstate_acpi_ged; ++ dc->realize = acpi_ged_realize; + + hc->plug = acpi_ged_device_plug_cb; + hc->unplug_request = acpi_ged_unplug_request_cb; +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index 8ed9534c57..d1df3c12e5 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -63,6 +63,7 @@ + #include "hw/acpi/cpu_hotplug.h" + #include "hw/acpi/memory_hotplug.h" + #include "hw/acpi/ghes.h" ++#include "hw/acpi/cpu.h" + #include "qom/object.h" + + #define ACPI_POWER_BUTTON_DEVICE "PWRB" +-- +2.39.1 + diff --git a/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b2a0a023e0ed032379dd39051804d267edad0f1 --- /dev/null +++ b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch @@ -0,0 +1,77 @@ +From 0bdb1861985704af9b82e35053b5ab99f7880eb6 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Thu, 7 May 2020 21:30:09 +0100 +Subject: [PATCH] hw/acpi: Update ACPI GED framework to support vCPU Hotplug + +ACPI GED shall be used to convey to the guest kernel about any CPU hot-(un)plug +events. Therefore, existing ACPI GED framework inside QEMU needs to be enhanced +to support CPU hotplug state and events. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index ad252e6a91..0266733a54 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -12,6 +12,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "hw/acpi/acpi.h" ++#include "hw/acpi/cpu.h" + #include "hw/acpi/generic_event_device.h" + #include "hw/irq.h" + #include "hw/mem/pc-dimm.h" +@@ -239,6 +240,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, + } else { + acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); + } ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "virt: device plug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -253,6 +256,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev, + if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && + !(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)))) { + acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "acpi: device unplug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -266,6 +271,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_unplug_cb(&s->memhp_state, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "acpi: device unplug for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -277,6 +284,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) + AcpiGedState *s = ACPI_GED(adev); + + acpi_memory_ospm_status(&s->memhp_state, list); ++ acpi_cpu_ospm_status(&s->cpuhp_state, list); + } + + static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -291,6 +299,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + sel = ACPI_GED_PWR_DOWN_EVT; + } else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) { + sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; ++ } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { ++ sel = ACPI_GED_CPU_HOTPLUG_EVT; + } else { + /* Unknown event. Return without generating interrupt. */ + warn_report("GED: Unsupported event %d. No irq injected", ev); +-- +2.27.0 + diff --git a/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change-sync-upstream.patch b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change-sync-upstream.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec7cc8f38408f8e9f3a7808ce1cd700f0822ed64 --- /dev/null +++ b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change-sync-upstream.patch @@ -0,0 +1,54 @@ +From 16d44ddb63becd559cc2185549c4b18d26feab60 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 16 Jul 2024 12:15:00 +0100 +Subject: [PATCH 65/78] hw/acpi: Update CPUs AML with cpu-(ctrl)dev change + +CPUs Control device(\\_SB.PCI0) register interface for the x86 arch is IO port +based and existing CPUs AML code assumes _CRS objects would evaluate to a system +resource which describes IO Port address. But on ARM arch CPUs control +device(\\_SB.PRES) register interface is memory-mapped hence _CRS object should +evaluate to system resource which describes memory-mapped base address. Update +build CPUs AML function to accept both IO/MEMORY region spaces and accordingly +update the _CRS object. + +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +Reviewed-by: Gavin Shan +Tested-by: Vishnu Pajjuri +Reviewed-by: Jonathan Cameron +Tested-by: Xianglai Li +Tested-by: Miguel Luis +Reviewed-by: Shaoqin Huang +Tested-by: Zhao Liu +Reviewed-by: Igor Mammedov +Message-Id: <20240716111502.202344-6-salil.mehta@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Xianglai Li +--- + hw/acpi/cpu.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 292e1daca2..5e9093991e 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -392,11 +392,13 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_name_decl("_UID", aml_string("CPU Hotplug resources"))); + aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); + ++ assert((rs == AML_SYSTEM_IO) || (rs == AML_SYSTEM_MEMORY)); ++ + crs = aml_resource_template(); + if (rs == AML_SYSTEM_IO) { + aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1, + ACPI_CPU_HOTPLUG_REG_LEN)); +- } else { ++ } else if (rs == AML_SYSTEM_MEMORY) { + aml_append(crs, aml_memory32_fixed(base_addr, + ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); + } +-- +2.39.1 + diff --git a/acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch similarity index 44% rename from acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch rename to hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch index 0506d1b73c1586699dd0fe92254baa5d0af26a7b..91d4c4d78e6922d07c7981c2a886c70d07c45d2d 100644 --- a/acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch +++ b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch @@ -1,34 +1,43 @@ -From 107c267ebe5b8c461268a4ff8384ad2f2b9e8ce0 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Wed, 22 Apr 2020 16:11:13 +0800 -Subject: [PATCH] acpi/cpu: Prepare build_cpus_aml for arm virt +From 06059c960d863c21c7d9cf4829ad2078692ed9e1 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 13:27:57 +0100 +Subject: [PATCH] hw/acpi: Update CPUs AML with cpu-(ctrl)dev change -We will reuse build_cpus_aml to build DSDT cpus aml in arm/virt -ACPI to realize cpu hotplug. Three points are added. +CPUs Control device(\\_SB.PCI0) register interface for the x86 arch is based on +PCI and is IO port based and hence existing cpus AML code assumes _CRS objects +would evaluate to a system resource which describes IO Port address. But on ARM +arch CPUs control device(\\_SB.PRES) register interface is memory-mapped hence +_CRS object should evaluate to system resource which describes memory-mapped +base address. -1. Make ACPI IO address space configurable, because ARM64 platforms - don't use port IO for ACPI IO space. -2. Add GICC struct building support in _MAT of cpu aml. -3. Let the hotplug method parameter can be NULL, because ACPI GED - will realize it. +This cpus AML code change updates the existing inerface of the build cpus AML +function to accept both IO/MEMORY type regions and update the _CRS object +correspondingly. -Besides, CPU CPPC building is injected. +NOTE: Beside above CPU scan shall be triggered when OSPM evaluates _EVT method + part of the GED framework which is covered in subsequent patch. +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu Signed-off-by: Keqian Zhu Signed-off-by: Salil Mehta --- - hw/acpi/cpu.c | 32 +++++++++++++++++++++++++------- - hw/i386/acpi-build.c | 2 +- - include/hw/acpi/cpu.h | 3 ++- - 3 files changed, 28 insertions(+), 9 deletions(-) + hw/acpi/cpu.c | 23 ++++++++++++++++------- + hw/i386/acpi-build.c | 3 ++- + include/hw/acpi/cpu.h | 5 +++-- + 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c -index 0c0bfe479a..72ad1fcff2 100644 +index cabeb4e86b..cf0c7e8538 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c -@@ -314,7 +314,8 @@ const VMStateDescription vmstate_cpu_hotplug = { +@@ -342,9 +342,10 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_FW_EJECT_EVENT "CEJF" + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - hwaddr io_base, +- build_madt_cpu_fn build_madt_cpu, hwaddr io_base, ++ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, const char *res_root, - const char *event_handler_method) + const char *event_handler_method, @@ -36,56 +45,32 @@ index 0c0bfe479a..72ad1fcff2 100644 { Aml *ifctx; Aml *field; -@@ -342,13 +343,18 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +@@ -369,13 +370,19 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); - + crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, -- ACPI_CPU_HOTPLUG_REG_LEN)); + if (rs == AML_SYSTEM_IO) { -+ aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, -+ ACPI_CPU_HOTPLUG_REG_LEN)); ++ aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1, + ACPI_CPU_HOTPLUG_REG_LEN)); + } else { -+ aml_append(crs, aml_memory32_fixed(io_base, -+ ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); ++ aml_append(crs, aml_memory32_fixed(base_addr, ++ ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); + } ++ aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs)); - + /* declare CPU hotplug MMIO region with related access fields */ aml_append(cpu_ctrl_dev, - aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base), -+ aml_operation_region("PRST", rs, aml_int(io_base), ++ aml_operation_region("PRST", rs, aml_int(base_addr), ACPI_CPU_HOTPLUG_REG_LEN)); - + field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, -@@ -517,6 +523,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - aml_append(dev, aml_name_decl("_UID", uid)); - } - -+ assert(adevc); -+ if (adevc->cpu_cppc) { -+ adevc->cpu_cppc(adev, i, arch_ids->len, dev); -+ } -+ - method = aml_method("_STA", 0, AML_SERIALIZED); - aml_append(method, aml_return(aml_call1(CPU_STS_METHOD, uid))); - aml_append(dev, method); -@@ -535,6 +546,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - apic->flags = cpu_to_le32(1); - break; - } -+ case ACPI_APIC_GENERIC_CPU_INTERFACE: { -+ AcpiMadtGenericCpuInterface *gicc = (void *)madt_buf->data; -+ gicc->flags = cpu_to_le32(1); -+ break; -+ } - default: - assert(0); - } -@@ -570,9 +586,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +@@ -699,9 +706,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(sb_scope, cpus_dev); aml_append(table, sb_scope); - + - method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); - aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); - aml_append(table, method); @@ -94,35 +79,40 @@ index 0c0bfe479a..72ad1fcff2 100644 + aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); + aml_append(table, method); + } - + g_free(cphp_res_path); } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 749218561a..c97731ecb3 100644 +index 80db183b78..db4ca8a66a 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -1869,7 +1869,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, - .acpi_1_compatible = true, .has_legacy_cphp = true +@@ -1546,7 +1546,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + .fw_unplugs_cpu = pm->smi_on_cpu_unplug, }; - build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base, -- "\\_SB.PCI0", "\\_GPE._E02"); -+ "\\_SB.PCI0", "\\_GPE._E02", AML_SYSTEM_IO); + build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, +- pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02"); ++ pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", ++ AML_SYSTEM_IO); } - + if (pcms->memhp_io_base && nr_mem) { diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h -index 62f0278ba2..a30ec84a4f 100644 +index 209e1773f8..76bc7eb251 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h -@@ -55,7 +55,8 @@ typedef struct CPUHotplugFeatures { +@@ -60,9 +60,10 @@ typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - hwaddr io_base, +- build_madt_cpu_fn build_madt_cpu, hwaddr io_base, ++ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, const char *res_root, - const char *event_handler_method); + const char *event_handler_method, + AmlRegionSpace rs); - + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list); + +-- +2.27.0 --- -2.19.1 diff --git a/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch b/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2dceaf31fbf7503d09b1410428b17ffe03f6338 --- /dev/null +++ b/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch @@ -0,0 +1,53 @@ +From cfdb0f24431ae0f5115f905a1411509c01a50e88 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 9 Jun 2020 00:50:36 +0100 +Subject: [PATCH] hw/acpi: Update GED _EVT method AML with cpu scan + +OSPM evaluates _EVT method to map the event. The cpu hotplug event eventually +results in start of the cpu scan. Scan figures out the cpu and the kind of +event(plug/unplug) and notifies it back to the guest. + +The change in this patch updates the GED AML _EVT method with the call to +\\_SB.CPUS.CSCN which will do above. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 4 ++++ + include/hw/acpi/cpu_hotplug.h | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index b84602b238..ad252e6a91 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -108,6 +108,10 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, + aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." + MEMORY_SLOT_SCAN_METHOD)); + break; ++ case ACPI_GED_CPU_HOTPLUG_EVT: ++ aml_append(if_ctx, aml_call0(ACPI_CPU_CONTAINER "." ++ ACPI_CPU_SCAN_METHOD)); ++ break; + case ACPI_GED_PWR_DOWN_EVT: + aml_append(if_ctx, + aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), +diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h +index 48b291e45e..ef631750b4 100644 +--- a/include/hw/acpi/cpu_hotplug.h ++++ b/include/hw/acpi/cpu_hotplug.h +@@ -20,6 +20,8 @@ + #include "hw/acpi/cpu.h" + + #define ACPI_CPU_HOTPLUG_REG_LEN 12 ++#define ACPI_CPU_SCAN_METHOD "CSCN" ++#define ACPI_CPU_CONTAINER "\\_SB.CPUS" + + typedef struct AcpiCpuHotplug { + Object *device; +-- +2.27.0 + diff --git a/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch b/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..93772f029c4a74fcc623b2cd8138ac2063e63049 --- /dev/null +++ b/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch @@ -0,0 +1,37 @@ +From 576a2a88625978f1befde11f0823f32bbc54cad1 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 28 Aug 2023 20:00:08 +0000 +Subject: [PATCH] hw/acpi: Use qemu_present_cpu() API in ACPI CPU hotplug init + +ACPI CPU Hotplug code assumes a virtual CPU is unplugged if the CPUState object +is absent in the list of ths possible CPUs(CPUArchIdList *possible_cpus) +maintained on per-machine basis. Use the earlier introduced qemu_present_cpu() +API to check this state. + +This change should have no bearing on the functionality of any architecture and +is mere a representational change. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 4b24a25003..cabeb4e86b 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -226,7 +226,10 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + state->dev_count = id_list->len; + state->devs = g_new0(typeof(*state->devs), state->dev_count); + for (i = 0; i < id_list->len; i++) { +- state->devs[i].cpu = CPU(id_list->cpus[i].cpu); ++ struct CPUState *cpu = CPU(id_list->cpus[i].cpu); ++ if (qemu_present_cpu(cpu)) { ++ state->devs[i].cpu = cpu; ++ } + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +-- +2.27.0 + diff --git a/hw-acpi-cpu-Use-CPUState-typedef.patch b/hw-acpi-cpu-Use-CPUState-typedef.patch new file mode 100644 index 0000000000000000000000000000000000000000..06f9df398f89a68317504335a9e84d38859724ff --- /dev/null +++ b/hw-acpi-cpu-Use-CPUState-typedef.patch @@ -0,0 +1,34 @@ +From 105ea4d8301791bbb5a76df1f527fb5df439c565 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 16:01:50 +0800 +Subject: [PATCH] hw/acpi/cpu: Use CPUState typedef +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from b8492bd430ecc1ceb80cac19b46870d423f1e854 +QEMU coding style recommend using structure typedefs: +https://www.qemu.org/docs/master/devel/style.html#typedefs + +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: dinglimin +--- + include/hw/acpi/cpu.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index bc901660fb..209e1773f8 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -19,7 +19,7 @@ + #include "hw/hotplug.h" + + typedef struct AcpiCpuStatus { +- struct CPUState *cpu; ++ CPUState *cpu; + uint64_t arch_id; + bool is_inserting; + bool is_removing; +-- +2.27.0 + diff --git a/hw-arm-Activate-IOMMUFD-for-virt-machines.patch b/hw-arm-Activate-IOMMUFD-for-virt-machines.patch new file mode 100644 index 0000000000000000000000000000000000000000..b8c27675786fa59ffafde7505b45cfc9f62ad8bc --- /dev/null +++ b/hw-arm-Activate-IOMMUFD-for-virt-machines.patch @@ -0,0 +1,34 @@ +From bcb031b40fe40d5b6347b2134fb039945b87e8a3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Sat, 11 Jan 2025 10:52:55 +0800 +Subject: [PATCH] hw/arm: Activate IOMMUFD for virt machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/arm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index c0a7d0bd58..4a0ea0628f 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -8,6 +8,7 @@ config ARM_VIRT + imply TPM_TIS_SYSBUS + imply TPM_TIS_I2C + imply NVDIMM ++ imply IOMMUFD + select ARM_GIC + select ACPI + select ARM_SMMUV3 +-- +2.41.0.windows.1 + diff --git a/hw-arm-Changes-required-for-reset-and-to-support-nex.patch b/hw-arm-Changes-required-for-reset-and-to-support-nex.patch new file mode 100644 index 0000000000000000000000000000000000000000..66816ac14e0fd5e975d63b1786c03028908f8569 --- /dev/null +++ b/hw-arm-Changes-required-for-reset-and-to-support-nex.patch @@ -0,0 +1,111 @@ +From 3e5f043c493fa4765c5637bec66be2bd620bc53f Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 18:10:24 +0100 +Subject: [PATCH] hw/arm: Changes required for reset and to support next boot + +Updates the firmware config with the next boot cpus information and also +registers the reset callback to be called when guest reboots to reset the cpu. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/boot.c | 2 +- + hw/arm/virt.c | 18 +++++++++++++++--- + include/hw/arm/boot.h | 2 ++ + include/hw/arm/virt.h | 1 + + 4 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index d1671e1d42..345c7cfa19 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -683,7 +683,7 @@ fail: + return -1; + } + +-static void do_cpu_reset(void *opaque) ++void do_cpu_reset(void *opaque) + { + ARMCPU *cpu = opaque; + CPUState *cs = CPU(cpu); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 60cd560ab9..eedff8e525 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -46,6 +46,8 @@ + #include "sysemu/device_tree.h" + #include "sysemu/numa.h" + #include "sysemu/runstate.h" ++#include "sysemu/reset.h" ++#include "sysemu/sysemu.h" + #include "sysemu/tpm.h" + #include "sysemu/tcg.h" + #include "sysemu/kvm.h" +@@ -1453,7 +1455,7 @@ static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as) + char *nodename; + + fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as); +- fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus); ++ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); + + nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -3276,7 +3278,13 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (local_err) { + goto fail; + } +- /* TODO: register cpu for reset & update F/W info for the next boot */ ++ /* register this cpu for reset & update F/W info for the next boot */ ++ qemu_register_reset(do_cpu_reset, ARM_CPU(cs)); ++ } ++ ++ vms->boot_cpus++; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); + } + + cs->disabled = false; +@@ -3351,7 +3359,11 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + unwire_gic_cpu_irqs(vms, cs); + virt_update_gic(vms, cs); + +- /* TODO: unregister cpu for reset & update F/W info for the next boot */ ++ qemu_unregister_reset(do_cpu_reset, ARM_CPU(cs)); ++ vms->boot_cpus--; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); ++ } + + qobject_unref(dev->opts); + dev->opts = NULL; +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 80c492d742..f81326a1dc 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -178,6 +178,8 @@ AddressSpace *arm_boot_address_space(ARMCPU *cpu, + int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + hwaddr addr_limit, AddressSpace *as, MachineState *ms); + ++void do_cpu_reset(void *opaque); ++ + /* Write a secure board setup routine with a dummy handler for SMCs */ + void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu, + const struct arm_boot_info *info, +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 069c9f2a09..ae0f5beb26 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -167,6 +167,7 @@ struct VirtMachineState { + MemMapEntry *memmap; + char *pciehb_nodename; + const int *irqmap; ++ uint16_t boot_cpus; + int fdt_size; + uint32_t clock_phandle; + uint32_t gic_phandle; +-- +2.27.0 + diff --git a/hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch b/hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch deleted file mode 100644 index f2c0f3900d05652a977be13df0babb8210c65b1b..0000000000000000000000000000000000000000 --- a/hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch +++ /dev/null @@ -1,72 +0,0 @@ -From e6b1fd7bfbfe116e9d5df590f7069336c1eb1983 Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:29 +0100 -Subject: [PATCH] hw/arm: Factor out powerdown notifier from GPIO - -This is in preparation of using GED device for -system_powerdown event. Make the powerdown notifier -registration independent of create_gpio() fn. - -Signed-off-by: Shameer Kolothum -Reviewed-by: Eric Auger -Reviewed-by: Igor Mammedov -Message-Id: <20190918130633.4872-8-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/arm/virt.c | 12 ++++-------- - include/hw/arm/virt.h | 1 + - 2 files changed, 5 insertions(+), 8 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ab33cce4b3..aaefa5578e 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -910,10 +910,6 @@ static void virt_powerdown_req(Notifier *n, void *opaque) - qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1); - } - --static Notifier virt_system_powerdown_notifier = { -- .notify = virt_powerdown_req --}; -- - static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) - { - char *nodename; -@@ -954,10 +950,6 @@ static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) - KEY_POWER); - qemu_fdt_setprop_cells(vms->fdt, "/gpio-keys/poweroff", - "gpios", phandle, 3, 0); -- -- /* connect powerdown request */ -- qemu_register_powerdown_notifier(&virt_system_powerdown_notifier); -- - g_free(nodename); - } - -@@ -1856,6 +1848,10 @@ static void machvirt_init(MachineState *machine) - vms->acpi_dev = create_acpi_ged(vms, pic); - } - -+ /* connect powerdown request */ -+ vms->powerdown_notifier.notify = virt_powerdown_req; -+ qemu_register_powerdown_notifier(&vms->powerdown_notifier); -+ - /* Create mmio transports, so the user can create virtio backends - * (which will be automatically plugged in to the transports). If - * no backend is created the transport will just sit harmlessly idle. -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 0350285136..dcceb9c615 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -139,6 +139,7 @@ typedef struct { - int psci_conduit; - hwaddr highest_gpa; - DeviceState *acpi_dev; -+ Notifier powerdown_notifier; - } VirtMachineState; - - #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) --- -2.19.1 diff --git a/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch b/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch new file mode 100644 index 0000000000000000000000000000000000000000..75abc4e58ce1f30527638b0c62f94c50b033911b --- /dev/null +++ b/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch @@ -0,0 +1,98 @@ +From 8e1b8d624128523654786953b381557c82654a57 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 18:03:11 +0100 +Subject: [PATCH] hw/arm: MADT Tbl change to size the guest with possible vCPUs + +Changes required during building of MADT Table by QEMU to accomodate disabled +possible vCPUs. This info shall be used by the guest kernel to size up its +resources during boot time. This pre-sizing of the guest kernel done on +possible vCPUs will facilitate hotplug of the disabled vCPUs. + +This change also caters ACPI MADT GIC CPU Interface flag related changes +recently introduced in the UEFI ACPI 6.5 Specification which allows deferred +virtual CPU online'ing in the Guest Kernel. + +Link: https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#gic-cpu-interface-gicc-structure + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 36 ++++++++++++++++++++++++++++++------ + 1 file changed, 30 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index d88f3cded1..2870c1ec5a 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -779,6 +779,29 @@ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) + build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */ + } + ++static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ ++ /* can only exist in 'enabled' state */ ++ if (!mc->has_hotpluggable_cpus) { ++ return 1; ++ } ++ ++ /* ++ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot ++ * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the ++ * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged. ++ * Though as-of-now this is only used as a debugging feature. ++ * ++ * UEFI ACPI Specification 6.5 ++ * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure ++ * Table: 5.37 GICC CPU Interface Flags ++ * Link: https://uefi.org/specs/ACPI/6.5 ++ */ ++ return cpu && !cpu->cpu_index ? 1 : (1 << 3); ++} ++ + static void + build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { +@@ -805,12 +828,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vms->gic_version, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + +- for (i = 0; i < MACHINE(vms)->smp.cpus; i++) { +- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); ++ for (i = 0; i < MACHINE(vms)->smp.max_cpus; i++) { ++ CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; +- uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ? +- VIRTUAL_PMU_IRQ : 0; ++ uint32_t pmu_interrupt = vms->pmu ? VIRTUAL_PMU_IRQ : 0; ++ uint32_t flags = virt_acpi_get_gicc_flags(cpu); ++ uint64_t mpidr = qemu_get_cpu_archid(i); + + if (vms->gic_version == VIRT_GIC_VERSION_2) { + physical_base_address = memmap[VIRT_GIC_CPU].base; +@@ -825,7 +849,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, i, 4); /* GIC ID */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + /* Flags */ +- build_append_int_noprefix(table_data, 1, 4); /* Enabled */ ++ build_append_int_noprefix(table_data, flags, 4); + /* Parking Protocol Version */ + build_append_int_noprefix(table_data, 0, 4); + /* Performance Interrupt GSIV */ +@@ -839,7 +863,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vgic_interrupt, 4); + build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/ + /* MPIDR */ +- build_append_int_noprefix(table_data, armcpu->mp_affinity, 8); ++ build_append_int_noprefix(table_data, mpidr, 8); + /* Processor Power Efficiency Class */ + build_append_int_noprefix(table_data, 0, 1); + /* Reserved */ +-- +2.27.0 + diff --git a/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch b/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..87e236b6f83accff9b839e2c363e47949feb1f9b --- /dev/null +++ b/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch @@ -0,0 +1,128 @@ +From c5dfec0bfd78f7e8f84a527a1aa73896f69b2367 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Thu, 10 Aug 2023 01:15:31 +0000 +Subject: [PATCH] hw/arm: Support hotplug capability check using _OSC method + +Physical CPU hotplug results in (un)setting of ACPI _STA.Present bit. AARCH64 +platforms do not support physical CPU hotplug. Virtual CPU hotplug support being +implemented toggles ACPI _STA.Enabled Bit to achieve hotplug functionality. This +is not same as physical CPU hotplug support. + +In future, if ARM architecture supports physical CPU hotplug then the current +design of virtual CPU hotplug can be used unchanged. Hence, there is a need for +firmware/VMM/Qemu to support evaluation of platform wide capabilitiy related to +the *type* of CPU hotplug support present on the platform. OSPM might need this +during boot time to correctly initialize the CPUs and other related components +in the kernel. + +NOTE: This implementation will be improved to add the support of *query* in the +subsequent versions. This is very minimal support to assist kernel. + +ASL for the implemented _OSC method: + +Method (_OSC, 4, NotSerialized) // _OSC: Operating System Capabilities +{ + CreateDWordField (Arg3, Zero, CDW1) + If ((Arg0 == ToUUID ("0811b06e-4a27-44f9-8d60-3cbbc22e7b48") /* Platform-wide Capabilities */)) + { + CreateDWordField (Arg3, 0x04, CDW2) + Local0 = CDW2 /* \_SB_._OSC.CDW2 */ + If ((Arg1 != One)) + { + CDW1 |= 0x08 + } + + Local0 &= 0x00800000 + If ((CDW2 != Local0)) + { + CDW1 |= 0x10 + } + + CDW2 = Local0 + } + Else + { + CDW1 |= 0x04 + } + + Return (Arg3) +} + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 52 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 2870c1ec5a..c402e102c4 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -940,6 +940,55 @@ static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker, + build_fadt(table_data, linker, &fadt, vms->oem_id, vms->oem_table_id); + } + ++static void build_virt_osc_method(Aml *scope, VirtMachineState *vms) ++{ ++ Aml *if_uuid, *else_uuid, *if_rev, *if_caps_masked, *method; ++ Aml *a_cdw1 = aml_name("CDW1"); ++ Aml *a_cdw2 = aml_local(0); ++ ++ method = aml_method("_OSC", 4, AML_NOTSERIALIZED); ++ aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); ++ ++ /* match UUID */ ++ if_uuid = aml_if(aml_equal( ++ aml_arg(0), aml_touuid("0811B06E-4A27-44F9-8D60-3CBBC22E7B48"))); ++ ++ aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); ++ aml_append(if_uuid, aml_store(aml_name("CDW2"), a_cdw2)); ++ ++ /* check unknown revision in arg(1) */ ++ if_rev = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); ++ /* set revision error bits, DWORD1 Bit[3] */ ++ aml_append(if_rev, aml_or(a_cdw1, aml_int(0x08), a_cdw1)); ++ aml_append(if_uuid, if_rev); ++ ++ /* ++ * check support for vCPU hotplug type(=enabled) platform-wide capability ++ * in DWORD2 as sepcified in the below ACPI Specification ECR, ++ * # https://bugzilla.tianocore.org/show_bug.cgi?id=4481 ++ */ ++ if (vms->acpi_dev) { ++ aml_append(if_uuid, aml_and(a_cdw2, aml_int(0x800000), a_cdw2)); ++ /* check if OSPM specified hotplug capability bits were masked */ ++ if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW2"), a_cdw2))); ++ aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1)); ++ aml_append(if_uuid, if_caps_masked); ++ } ++ aml_append(if_uuid, aml_store(a_cdw2, aml_name("CDW2"))); ++ ++ aml_append(method, if_uuid); ++ else_uuid = aml_else(); ++ ++ /* set unrecognized UUID error bits, DWORD1 Bit[2] */ ++ aml_append(else_uuid, aml_or(a_cdw1, aml_int(4), a_cdw1)); ++ aml_append(method, else_uuid); ++ ++ aml_append(method, aml_return(aml_arg(3))); ++ aml_append(scope, method); ++ ++ return; ++} ++ + /* DSDT */ + static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) +@@ -974,6 +1023,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + } else { + acpi_dsdt_add_cpus(scope, vms); + } ++ ++ build_virt_osc_method(scope, vms); ++ + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/hw-arm-Use-GED-for-system_powerdown-event.patch b/hw-arm-Use-GED-for-system_powerdown-event.patch deleted file mode 100644 index 140f59a2c71da40f47a2edd2bd2dd529227c3ddc..0000000000000000000000000000000000000000 --- a/hw-arm-Use-GED-for-system_powerdown-event.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 0b77f242b180f1ae40b9752999cef4894113df8e Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:30 +0100 -Subject: [PATCH] hw/arm: Use GED for system_powerdown event - -For machines 4.2 or higher with ACPI boot use GED for system_powerdown -event instead of GPIO. Guest boot with DT still uses GPIO. - -Signed-off-by: Shameer Kolothum -Reviewed-by: Eric Auger -Reviewed-by: Igor Mammedov -Message-Id: <20190918130633.4872-9-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/acpi/generic_event_device.c | 8 ++++++++ - hw/arm/virt-acpi-build.c | 6 +++--- - hw/arm/virt.c | 18 ++++++++++++------ - include/hw/acpi/acpi_dev_interface.h | 1 + - include/hw/acpi/generic_event_device.h | 3 +++ - 5 files changed, 27 insertions(+), 9 deletions(-) - -diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c -index b94500b08d..9cee90cc70 100644 ---- a/hw/acpi/generic_event_device.c -+++ b/hw/acpi/generic_event_device.c -@@ -22,6 +22,7 @@ - - static const uint32_t ged_supported_events[] = { - ACPI_GED_MEM_HOTPLUG_EVT, -+ ACPI_GED_PWR_DOWN_EVT, - }; - - /* -@@ -104,6 +105,11 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, - aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." - MEMORY_SLOT_SCAN_METHOD)); - break; -+ case ACPI_GED_PWR_DOWN_EVT: -+ aml_append(if_ctx, -+ aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), -+ aml_int(0x80))); -+ break; - default: - /* - * Please make sure all the events in ged_supported_events[] -@@ -184,6 +190,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) - - if (ev & ACPI_MEMORY_HOTPLUG_STATUS) { - sel = ACPI_GED_MEM_HOTPLUG_EVT; -+ } else if (ev & ACPI_POWER_DOWN_STATUS) { -+ sel = ACPI_GED_PWR_DOWN_EVT; - } else { - /* Unknown event. Return without generating interrupt. */ - warn_report("GED: Unsupported event %d. No irq injected", ev); -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index 9622994e50..f48733d9f2 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -50,7 +50,6 @@ - #include "hw/acpi/acpi-defs.h" - - #define ARM_SPI_BASE 32 --#define ACPI_POWER_BUTTON_DEVICE "PWRB" - - static void acpi_dsdt_add_psd(Aml *dev, int cpus) - { -@@ -813,13 +812,14 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS); - acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE), - vms->highmem, vms->highmem_ecam); -- acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO], -- (irqmap[VIRT_GPIO] + ARM_SPI_BASE)); - if (vms->acpi_dev) { - build_ged_aml(scope, "\\_SB."GED_DEVICE, - HOTPLUG_HANDLER(vms->acpi_dev), - irqmap[VIRT_ACPI_GED] + ARM_SPI_BASE, AML_SYSTEM_MEMORY, - memmap[VIRT_ACPI_GED].base); -+ } else { -+ acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO], -+ (irqmap[VIRT_GPIO] + ARM_SPI_BASE)); - } - - if (vms->acpi_dev) { -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index aaefa5578e..18321e522b 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -639,10 +639,10 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) - DeviceState *dev; - MachineState *ms = MACHINE(vms); - int irq = vms->irqmap[VIRT_ACPI_GED]; -- uint32_t event = 0; -+ uint32_t event = ACPI_GED_PWR_DOWN_EVT; - - if (ms->ram_slots) { -- event = ACPI_GED_MEM_HOTPLUG_EVT; -+ event |= ACPI_GED_MEM_HOTPLUG_EVT; - } - - dev = qdev_create(NULL, TYPE_ACPI_GED); -@@ -906,8 +906,14 @@ static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) - static DeviceState *gpio_key_dev; - static void virt_powerdown_req(Notifier *n, void *opaque) - { -- /* use gpio Pin 3 for power button event */ -- qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1); -+ VirtMachineState *s = container_of(n, VirtMachineState, powerdown_notifier); -+ -+ if (s->acpi_dev) { -+ acpi_send_event(s->acpi_dev, ACPI_POWER_DOWN_STATUS); -+ } else { -+ /* use gpio Pin 3 for power button event */ -+ qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1); -+ } - } - - static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) -@@ -1842,10 +1848,10 @@ static void machvirt_init(MachineState *machine) - - create_pcie(vms, pic); - -- create_gpio(vms, pic); -- - if (has_ged && aarch64 && firmware_loaded && acpi_enabled) { - vms->acpi_dev = create_acpi_ged(vms, pic); -+ } else { -+ create_gpio(vms, pic); - } - - /* connect powerdown request */ -diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h -index 43ff119179..adcb3a816c 100644 ---- a/include/hw/acpi/acpi_dev_interface.h -+++ b/include/hw/acpi/acpi_dev_interface.h -@@ -11,6 +11,7 @@ typedef enum { - ACPI_MEMORY_HOTPLUG_STATUS = 8, - ACPI_NVDIMM_HOTPLUG_STATUS = 16, - ACPI_VMGENID_CHANGE_STATUS = 32, -+ ACPI_POWER_DOWN_STATUS = 64, - } AcpiEventStatusBits; - - #define TYPE_ACPI_DEVICE_IF "acpi-device-interface" -diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h -index 2049e8d873..d157eac088 100644 ---- a/include/hw/acpi/generic_event_device.h -+++ b/include/hw/acpi/generic_event_device.h -@@ -62,6 +62,8 @@ - #include "hw/sysbus.h" - #include "hw/acpi/memory_hotplug.h" - -+#define ACPI_POWER_BUTTON_DEVICE "PWRB" -+ - #define TYPE_ACPI_GED "acpi-ged" - #define ACPI_GED(obj) \ - OBJECT_CHECK(AcpiGedState, (obj), TYPE_ACPI_GED) -@@ -79,6 +81,7 @@ - * through GED. - */ - #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 -+#define ACPI_GED_PWR_DOWN_EVT 0x2 - - typedef struct GEDState { - MemoryRegion io; --- -2.19.1 diff --git a/hw-arm-acpi-enable-SHPC-native-hot-plug.patch b/hw-arm-acpi-enable-SHPC-native-hot-plug.patch deleted file mode 100644 index 2b2e530bb8e3555b4f9cf2a807b060ac62ccd9de..0000000000000000000000000000000000000000 --- a/hw-arm-acpi-enable-SHPC-native-hot-plug.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 1ad2e774f4fd3f720d5db07e86fe60df13f21a6d Mon Sep 17 00:00:00 2001 -From: Heyi Guo -Date: Mon, 9 Dec 2019 14:37:19 +0800 -Subject: [PATCH] hw/arm/acpi: enable SHPC native hot plug - -After the introduction of generic PCIe root port and PCIe-PCI bridge, -we will also have SHPC controller on ARM, so just enable SHPC native -hot plug. - -Also update tests/data/acpi/virt/DSDT* to pass "make check". - -Cc: Shannon Zhao -Cc: Peter Maydell -Cc: "Michael S. Tsirkin" -Cc: Igor Mammedov -Reviewed-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov -Signed-off-by: Heyi Guo -Message-id: 20191209063719.23086-3-guoheyi@huawei.com -Signed-off-by: Peter Maydell ---- - hw/arm/virt-acpi-build.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index 2cfac7b84f..588e7f2680 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -347,7 +347,12 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, - aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); - aml_append(ifctx, aml_store(aml_name("CDW2"), aml_name("SUPP"))); - aml_append(ifctx, aml_store(aml_name("CDW3"), aml_name("CTRL"))); -- aml_append(ifctx, aml_store(aml_and(aml_name("CTRL"), aml_int(0x1D), NULL), -+ -+ /* -+ * Allow OS control for all 5 features: -+ * PCIeHotplug SHPCHotplug PME AER PCIeCapability. -+ */ -+ aml_append(ifctx, aml_store(aml_and(aml_name("CTRL"), aml_int(0x1F), NULL), - aml_name("CTRL"))); - - ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1)))); --- -2.23.0 - diff --git a/hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch b/hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch deleted file mode 100644 index 95abd07e5a2b62acd14c1a45e61ce471707f173e..0000000000000000000000000000000000000000 --- a/hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch +++ /dev/null @@ -1,115 +0,0 @@ -From de86ba0ff72a51b0c1cdbebf790869aea73ae9d3 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Thu, 9 Apr 2020 09:31:22 +0800 -Subject: [PATCH] hw/arm/boot: Add manually register and trigger of CPU reset - -We need to register and trigger CPU reset manually for hotplugged -CPU. Besides, we gather CPU reset handlers of all CPUs because CPU -reset should happen before GIC reset. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/boot.c | 18 ++++++++++++++++++ - hw/core/reset.c | 25 +++++++++++++++++++++++++ - include/hw/arm/boot.h | 3 +++ - include/sysemu/reset.h | 4 ++++ - 4 files changed, 50 insertions(+) - -diff --git a/hw/arm/boot.c b/hw/arm/boot.c -index fc4e021a38..3ab9de6456 100644 ---- a/hw/arm/boot.c -+++ b/hw/arm/boot.c -@@ -789,6 +789,24 @@ static void do_cpu_reset(void *opaque) - } - } - -+void cpu_hotplug_register_reset(int ncpu) -+{ -+ CPUState *cpu_0 = qemu_get_cpu(0); -+ CPUState *cpu = qemu_get_cpu(ncpu); -+ QEMUResetEntry *entry = qemu_get_reset_entry(do_cpu_reset, cpu_0); -+ -+ assert(entry); -+ /* Gather the reset handlers of all CPUs */ -+ qemu_register_reset_after(entry, do_cpu_reset, cpu); -+} -+ -+void cpu_hotplug_reset_manually(int ncpu) -+{ -+ CPUState *cpu = qemu_get_cpu(ncpu); -+ -+ do_cpu_reset(cpu); -+} -+ - /** - * load_image_to_fw_cfg() - Load an image file into an fw_cfg entry identified - * by key. -diff --git a/hw/core/reset.c b/hw/core/reset.c -index 9c477f2bf5..0efaf2d76c 100644 ---- a/hw/core/reset.c -+++ b/hw/core/reset.c -@@ -47,6 +47,31 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque) - QTAILQ_INSERT_TAIL(&reset_handlers, re, entry); - } - -+QEMUResetEntry *qemu_get_reset_entry(QEMUResetHandler *func, -+ void *opaque) -+{ -+ QEMUResetEntry *re; -+ -+ QTAILQ_FOREACH(re, &reset_handlers, entry) { -+ if (re->func == func && re->opaque == opaque) { -+ return re; -+ } -+ } -+ -+ return NULL; -+} -+ -+void qemu_register_reset_after(QEMUResetEntry *entry, -+ QEMUResetHandler *func, -+ void *opaque) -+{ -+ QEMUResetEntry *re = g_malloc0(sizeof(QEMUResetEntry)); -+ -+ re->func = func; -+ re->opaque = opaque; -+ QTAILQ_INSERT_AFTER(&reset_handlers, entry, re, entry); -+} -+ - void qemu_unregister_reset(QEMUResetHandler *func, void *opaque) - { - QEMUResetEntry *re; -diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h -index c48cc4c2bc..9452ccd1fa 100644 ---- a/include/hw/arm/boot.h -+++ b/include/hw/arm/boot.h -@@ -118,6 +118,9 @@ struct arm_boot_info { - arm_endianness endianness; - }; - -+void cpu_hotplug_register_reset(int ncpu); -+void cpu_hotplug_reset_manually(int ncpu); -+ - /** - * arm_load_kernel - Loads memory with everything needed to boot - * -diff --git a/include/sysemu/reset.h b/include/sysemu/reset.h -index 0b0d6d7598..f3ff26c637 100644 ---- a/include/sysemu/reset.h -+++ b/include/sysemu/reset.h -@@ -2,7 +2,11 @@ - #define QEMU_SYSEMU_RESET_H - - typedef void QEMUResetHandler(void *opaque); -+typedef struct QEMUResetEntry QEMUResetEntry; - -+QEMUResetEntry *qemu_get_reset_entry(QEMUResetHandler *func, void *opaque); -+void qemu_register_reset_after(QEMUResetEntry *entry, -+ QEMUResetHandler *func, void *opaque); - void qemu_register_reset(QEMUResetHandler *func, void *opaque); - void qemu_unregister_reset(QEMUResetHandler *func, void *opaque); - void qemu_devices_reset(void); --- -2.19.1 diff --git a/hw-arm-boot-Load-DTB-as-is-for-confidential-VMs.patch b/hw-arm-boot-Load-DTB-as-is-for-confidential-VMs.patch new file mode 100644 index 0000000000000000000000000000000000000000..4798f129fb1a656cad719727f50d4fc66376c8dc --- /dev/null +++ b/hw-arm-boot-Load-DTB-as-is-for-confidential-VMs.patch @@ -0,0 +1,41 @@ +From ac5a8a0a35b5f41a2b86f5b0681519123dc7da57 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 21 Feb 2024 13:58:14 +0000 +Subject: [PATCH] hw/arm/boot: Load DTB as is for confidential VMs + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/2c85282f4b10b301880b5067834ef83ad368d50a + +For confidential VMs it may be necessary to measure the DTB, to ensure a +malicious host does not insert harmful information in there. In case an +external tool can generated and measured the DTB, load it as is without +patching it. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/arm/boot.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 1e931d91d3..e2fbde1699 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -527,7 +527,14 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + char **node_path; + Error *err = NULL; + +- if (binfo->dtb_filename) { ++ if (binfo->dtb_filename && binfo->confidential) { ++ /* ++ * If the user is providing a DTB for a confidential VM, it is already ++ * tailored to this configuration and measured. Load it as is, without ++ * any modification. ++ */ ++ return rom_add_file_fixed_as(binfo->dtb_filename, addr, -1, as); ++ } else if (binfo->dtb_filename) { + char *filename; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename); + if (!filename) { +-- +2.33.0 + diff --git a/hw-arm-boot-Mark-all-guest-memory-as-RIPAS_RAM.patch b/hw-arm-boot-Mark-all-guest-memory-as-RIPAS_RAM.patch new file mode 100644 index 0000000000000000000000000000000000000000..06774e779fb29613c2de073a4142448bf0be25ac --- /dev/null +++ b/hw-arm-boot-Mark-all-guest-memory-as-RIPAS_RAM.patch @@ -0,0 +1,39 @@ +From 080ba1535c68e2d819dc8e7597aa941f478d0296 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 14 Jun 2023 16:36:52 +0100 +Subject: [PATCH] hw/arm/boot: Mark all guest memory as RIPAS_RAM. + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/7dd79c57dd097f2de2cb4c3ce428dad78ca452f3 + +All Realm IPA states are by default RIPAS_EMPTY, and accessing them in +that state causes injection of synchronous exception. Either the loader +or the guest needs to set IPA state to RIPAS_RAM before accessing it. +Since a Linux guest needs all memory ready at boot [1], initialize it +here. + +[1] https://docs.kernel.org/arch/arm64/booting.html + https://lore.kernel.org/all/20241004144307.66199-12-steven.price@arm.com/ + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/arm/boot.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 9a33601d35..1e931d91d3 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -1330,6 +1330,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) + } + } + ++ /* Mark all Realm memory as RAM */ ++ kvm_arm_rme_init_guest_ram(info->loader_start, info->ram_size); ++ + /* Load the kernel. */ + if (!info->kernel_filename || info->firmware_loaded) { + arm_setup_firmware_boot(cpu, info, ms->firmware); +-- +2.33.0 + diff --git a/hw-arm-boot-Skip-bootloader-for-confidential-guests.patch b/hw-arm-boot-Skip-bootloader-for-confidential-guests.patch new file mode 100644 index 0000000000000000000000000000000000000000..40ec5b5945d20baa5f1b8e19dffe02eb10113461 --- /dev/null +++ b/hw-arm-boot-Skip-bootloader-for-confidential-guests.patch @@ -0,0 +1,117 @@ +From 215b18636f45a1ecdad8abba5db383075efa722b Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Fri, 26 Apr 2024 16:11:59 +0100 +Subject: [PATCH] hw/arm/boot: Skip bootloader for confidential guests + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/79359e41a418cffbb2f2ae0314599a29d9f183a7 + +An independent verifier needs to reconstruct the content of guest memory +in order to attest that it is running trusted code. To avoid having to +reconstruct the bootloader generated by QEMU, skip this step and jump +directly to the kernel, with the DTB address in x0 as specified by the +Linux boot protocol [1]. + +[1] https://docs.kernel.org/arch/arm64/booting.html + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/arm/boot.c | 23 +++++++++++++++++------ + hw/arm/virt.c | 1 + + include/hw/arm/boot.h | 6 ++++++ + 3 files changed, 24 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index e2fbde1699..6980aebe1e 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -766,7 +766,13 @@ void do_cpu_reset(void *opaque) + if (cs == first_cpu) { + AddressSpace *as = arm_boot_address_space(cpu, info); + +- cpu_set_pc(cs, info->loader_start); ++ if (info->skip_bootloader) { ++ assert(is_a64(env)); ++ env->xregs[0] = info->dtb_start; ++ cpu_set_pc(cs, info->entry); ++ } else { ++ cpu_set_pc(cs, info->loader_start); ++ } + + if (!have_dtb(info)) { + if (old_param) { +@@ -858,7 +864,8 @@ static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, + } + + static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, +- hwaddr *entry, AddressSpace *as) ++ hwaddr *entry, AddressSpace *as, ++ bool skip_bootloader) + { + hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR; + uint64_t kernel_size = 0; +@@ -910,7 +917,8 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, + * bootloader, we can just load it starting at 2MB+offset rather + * than 0MB + offset. + */ +- if (kernel_load_offset < BOOTLOADER_MAX_SIZE) { ++ if (kernel_load_offset < BOOTLOADER_MAX_SIZE && ++ !skip_bootloader) { + kernel_load_offset += 2 * MiB; + } + } +@@ -994,7 +1002,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + } + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) { + kernel_size = load_aarch64_image(info->kernel_filename, +- info->loader_start, &entry, as); ++ info->loader_start, &entry, as, ++ info->skip_bootloader); + is_linux = 1; + if (kernel_size >= 0) { + image_low_addr = entry; +@@ -1134,8 +1143,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + fixupcontext[FIXUP_ENTRYPOINT_LO] = entry; + fixupcontext[FIXUP_ENTRYPOINT_HI] = entry >> 32; + +- arm_write_bootloader("bootloader", as, info->loader_start, +- primary_loader, fixupcontext); ++ if (!info->skip_bootloader) { ++ arm_write_bootloader("bootloader", as, info->loader_start, ++ primary_loader, fixupcontext); ++ } + + if (info->write_board_setup) { + info->write_board_setup(cpu, info); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8423912c89..e6053acec6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2911,6 +2911,7 @@ static void machvirt_init(MachineState *machine) + vms->bootinfo.confidential = virtcca_cvm_enabled(); + vms->bootinfo.psci_conduit = vms->psci_conduit; + vms->bootinfo.confidential = virt_machine_is_confidential(vms); ++ vms->bootinfo.skip_bootloader = vms->bootinfo.confidential; + arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); + + vms->machine_done.notify = virt_machine_done; +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 0cbae4685b..326c92782e 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -137,6 +137,12 @@ struct arm_boot_info { + /* Used when loading firmware into RAM */ + hwaddr firmware_base; + hwaddr firmware_max_size; ++ /* ++ * Instead of starting in a small bootloader that jumps to the kernel, ++ * immediately start in the kernel. ++ */ ++ bool skip_bootloader; ++ + /* + * Confidential guest boot loads everything into RAM so it can be measured. + */ +-- +2.33.0 + diff --git a/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch b/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch deleted file mode 100644 index e7ca51a5deb7146de6d6b05aa1d8391b0c08adac..0000000000000000000000000000000000000000 --- a/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 220816989c1e3d490d293b8d7ac85dbc41a4c321 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 20 Sep 2019 18:40:39 +0100 -Subject: [PATCH] hw/arm/boot.c: Set NSACR.{CP11,CP10} for NS kernel boots - -If we're booting a Linux kernel directly into Non-Secure -state on a CPU which has Secure state, then make sure we -set the NSACR CP11 and CP10 bits, so that Non-Secure is allowed -to access the FPU. Otherwise an AArch32 kernel will UNDEF as -soon as it tries to use the FPU. - -It used to not matter that we didn't do this until commit -fc1120a7f5f2d4b6, where we implemented actually honouring -these NSACR bits. - -The problem only exists for CPUs where EL3 is AArch32; the -equivalent AArch64 trap bits are in CPTR_EL3 and are "0 to -not trap, 1 to trap", so the reset value of the register -permits NS access, unlike NSACR. - -Fixes: fc1120a7f5 -Fixes: https://bugs.launchpad.net/qemu/+bug/1844597 -Cc: qemu-stable@nongnu.org -Signed-off-by: Peter Maydell -Reviewed-by: Richard Henderson -Message-id: 20190920174039.3916-1-peter.maydell@linaro.org -(cherry picked from commit ece628fcf69cbbd4b3efb6fbd203af07609467a2) -Signed-off-by: Michael Roth ---- - hw/arm/boot.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/arm/boot.c b/hw/arm/boot.c -index c2b89b3bb9..fc4e021a38 100644 ---- a/hw/arm/boot.c -+++ b/hw/arm/boot.c -@@ -754,6 +754,8 @@ static void do_cpu_reset(void *opaque) - (cs != first_cpu || !info->secure_board_setup)) { - /* Linux expects non-secure state */ - env->cp15.scr_el3 |= SCR_NS; -+ /* Set NSACR.{CP11,CP10} so NS can access the FPU */ -+ env->cp15.nsacr |= 3 << 10; - } - } - --- -2.23.0 diff --git a/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch b/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8097e1973b31092363f156c4ee2df9372884906 --- /dev/null +++ b/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch @@ -0,0 +1,267 @@ +From 8ad397f33f8b7d82c0ef72608ef8dc3e0ecba1c2 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 14:38:38 +0100 +Subject: [PATCH] hw/arm,gicv3: Changes to update GIC with vCPU hot-plug + notification + +vCPU hot-(un)plug events MUST be notified to the GIC. Introduce a notfication +mechanism to update any such events to GIC so that it can update its vCPU to GIC +CPU interface association. + +This is required to implement a workaround to the limitations posed by the ARM +architecture. For details about the constraints and workarounds please check +below slides: + +Link: https://kvm-forum.qemu.org/2023/talk/9SMPDQ/ + +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 27 +++++++++++++-- + hw/intc/arm_gicv3_common.c | 54 +++++++++++++++++++++++++++++- + hw/intc/arm_gicv3_cpuif_common.c | 5 +++ + hw/intc/gicv3_internal.h | 1 + + include/hw/arm/virt.h | 1 + + include/hw/intc/arm_gicv3_common.h | 22 ++++++++++++ + 6 files changed, 107 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 97bf4cca11..0312fa366d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -750,6 +750,16 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + return dev; + } + ++static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(vms); ++ ++ if (mc->has_hotpluggable_cpus) { ++ Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic); ++ notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier); ++ } ++} ++ + static void create_its(VirtMachineState *vms) + { + const char *itsclass = its_class_name(); +@@ -997,6 +1007,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } else if (vms->gic_version == VIRT_GIC_VERSION_2) { + create_v2m(vms); + } ++ ++ /* add GIC CPU hot(un)plug update notifier */ ++ virt_add_gic_cpuhp_notifier(vms); + } + + static void create_uart(const VirtMachineState *vms, int uart, +@@ -2481,6 +2494,8 @@ static void machvirt_init(MachineState *machine) + create_fdt(vms); + qemu_log("cpu init start\n"); + ++ notifier_list_init(&vms->cpuhp_notifiers); ++ possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; +@@ -3133,6 +3148,14 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, + dev, &error_abort); + } + ++static void virt_update_gic(VirtMachineState *vms, CPUState *cs) ++{ ++ GICv3CPUHotplugInfo gic_info = { .gic = vms->gic, .cpu = cs }; ++ ++ /* notify gic to stitch GICC to this new cpu */ ++ notifier_list_notify(&vms->cpuhp_notifiers, &gic_info); ++} ++ + static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { +@@ -3215,7 +3238,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * vCPUs have their GIC state initialized during machvit_init(). + */ + if (vms->acpi_dev) { +- /* TODO: update GIC about this hotplug change here */ ++ virt_update_gic(vms, cs); + wire_gic_cpu_irqs(vms, cs); + } + +@@ -3301,7 +3324,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ + + unwire_gic_cpu_irqs(vms, cs); +- /* TODO: update the GIC about this hot unplug change */ ++ virt_update_gic(vms, cs); + + /* TODO: unregister cpu for reset & update F/W info for the next boot */ + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index ebd99af610..fc87fa9369 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -33,7 +33,6 @@ + #include "hw/arm/linux-boot-if.h" + #include "sysemu/kvm.h" + +- + static void gicv3_gicd_no_migration_shift_bug_post_load(GICv3State *cs) + { + if (cs->gicd_no_migration_shift_bug) { +@@ -322,6 +321,56 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + } + } + ++static int arm_gicv3_get_proc_num(GICv3State *s, CPUState *cpu) ++{ ++ uint64_t mp_affinity; ++ uint64_t gicr_typer; ++ uint64_t cpu_affid; ++ int i; ++ ++ mp_affinity = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); ++ /* match the cpu mp-affinity to get the gic cpuif number */ ++ for (i = 0; i < s->num_cpu; i++) { ++ gicr_typer = s->cpu[i].gicr_typer; ++ cpu_affid = (gicr_typer >> 32) & 0xFFFFFF; ++ if (cpu_affid == mp_affinity) { ++ return i; ++ } ++ } ++ ++ return -1; ++} ++ ++static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) ++{ ++ GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data; ++ CPUState *cpu = gic_info->cpu; ++ int gic_cpuif_num; ++ GICv3State *s; ++ ++ s = ARM_GICV3_COMMON(gic_info->gic); ++ ++ /* this shall get us mapped gicv3 cpuif corresponding to mpidr */ ++ gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu); ++ if (gic_cpuif_num < 0) { ++ error_report("Failed to associate cpu %d with any GIC cpuif", ++ cpu->cpu_index); ++ abort(); ++ } ++ ++ /* check if update is for vcpu hot-unplug */ ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[gic_cpuif_num].cpu = NULL; ++ return; ++ } ++ ++ /* re-stitch the gic cpuif to this new cpu */ ++ gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]); ++ gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu); ++ ++ /* TODO: initialize the registers info for this newly added cpu */ ++} ++ + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + { + GICv3State *s = ARM_GICV3_COMMON(dev); +@@ -444,6 +493,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST; + } + ++ s->cpu_update_notifier.notify = arm_gicv3_cpu_update_notifier; ++ + s->itslist = g_ptr_array_new(); + } + +@@ -451,6 +502,7 @@ static void arm_gicv3_finalize(Object *obj) + { + GICv3State *s = ARM_GICV3_COMMON(obj); + ++ notifier_remove(&s->cpu_update_notifier); + g_free(s->redist_region_count); + } + +diff --git a/hw/intc/arm_gicv3_cpuif_common.c b/hw/intc/arm_gicv3_cpuif_common.c +index ff1239f65d..381cf2754b 100644 +--- a/hw/intc/arm_gicv3_cpuif_common.c ++++ b/hw/intc/arm_gicv3_cpuif_common.c +@@ -20,3 +20,8 @@ void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s) + + env->gicv3state = (void *)s; + }; ++ ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu) ++{ ++ s->cpu = cpu; ++} +diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h +index 29d5cdc1b6..9d4c1209bd 100644 +--- a/hw/intc/gicv3_internal.h ++++ b/hw/intc/gicv3_internal.h +@@ -848,5 +848,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s) + } + + void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s); ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu); + + #endif /* QEMU_ARM_GICV3_INTERNAL_H */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 5de0185063..069c9f2a09 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -180,6 +180,7 @@ struct VirtMachineState { + PCIBus *bus; + char *oem_id; + char *oem_table_id; ++ NotifierList cpuhp_notifiers; + }; + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 4e2fb518e7..97a48f44b9 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -280,6 +280,7 @@ struct GICv3State { + GICv3CPUState *gicd_irouter_target[GICV3_MAXIRQ]; + uint32_t gicd_nsacr[DIV_ROUND_UP(GICV3_MAXIRQ, 16)]; + ++ Notifier cpu_update_notifier; + GICv3CPUState *cpu; + /* List of all ITSes connected to this GIC */ + GPtrArray *itslist; +@@ -328,6 +329,27 @@ struct ARMGICv3CommonClass { + + void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + const MemoryRegionOps *ops); ++/** ++ * Structure used by GICv3 CPU hotplug notifier ++ */ ++typedef struct GICv3CPUHotplugInfo { ++ DeviceState *gic; /* GICv3State */ ++ CPUState *cpu; ++} GICv3CPUHotplugInfo; ++ ++/** ++ * gicv3_cpuhp_notifier ++ * ++ * Returns CPU hotplug notifier which could be used to update GIC about any ++ * CPU hot(un)plug events. ++ * ++ * Returns: Notifier initialized with CPU Hot(un)plug update function ++ */ ++static inline Notifier *gicv3_cpuhp_notifier(DeviceState *dev) ++{ ++ GICv3State *s = ARM_GICV3_COMMON(dev); ++ return &s->cpu_update_notifier; ++} + + /** + * gicv3_class_name +-- +2.27.0 + diff --git a/hw-arm-mps2-tz.c-fix-RX-TX-interrupts-order.patch b/hw-arm-mps2-tz.c-fix-RX-TX-interrupts-order.patch new file mode 100644 index 0000000000000000000000000000000000000000..aa349bfd331e87b3637b8af6c4e68f63fa41973c --- /dev/null +++ b/hw-arm-mps2-tz.c-fix-RX-TX-interrupts-order.patch @@ -0,0 +1,50 @@ +From 322f39889ff60a6fda87d7d95a6f233efb558e8a Mon Sep 17 00:00:00 2001 +From: Marco Palumbi +Date: Thu, 1 Aug 2024 10:15:02 +0100 +Subject: [PATCH] hw/arm/mps2-tz.c: fix RX/TX interrupts order + +The order of the RX and TX interrupts are swapped. +This commit fixes the order as per the following documents: + * https://developer.arm.com/documentation/dai0505/latest/ + * https://developer.arm.com/documentation/dai0521/latest/ + * https://developer.arm.com/documentation/dai0524/latest/ + * https://developer.arm.com/documentation/dai0547/latest/ + +Cc: qemu-stable@nongnu.org +Signed-off-by: Marco Palumbi +Message-id: 20240730073123.72992-1-marco@palumbi.it +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit 5a558be93ad628e5bed6e0ee062870f49251725c) +Signed-off-by: zhujun2 +--- + hw/arm/mps2-tz.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c +index 668db5ed61..9d9c263ef8 100644 +--- a/hw/arm/mps2-tz.c ++++ b/hw/arm/mps2-tz.c +@@ -435,7 +435,7 @@ static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque, + const char *name, hwaddr size, + const int *irqs, const PPCExtraData *extradata) + { +- /* The irq[] array is tx, rx, combined, in that order */ ++ /* The irq[] array is rx, tx, combined, in that order */ + MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms); + CMSDKAPBUART *uart = opaque; + int i = uart - &mms->uart[0]; +@@ -447,8 +447,8 @@ static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque, + qdev_prop_set_uint32(DEVICE(uart), "pclk-frq", mmc->apb_periph_frq); + sysbus_realize(SYS_BUS_DEVICE(uart), &error_fatal); + s = SYS_BUS_DEVICE(uart); +- sysbus_connect_irq(s, 0, get_sse_irq_in(mms, irqs[0])); +- sysbus_connect_irq(s, 1, get_sse_irq_in(mms, irqs[1])); ++ sysbus_connect_irq(s, 0, get_sse_irq_in(mms, irqs[1])); ++ sysbus_connect_irq(s, 1, get_sse_irq_in(mms, irqs[0])); + sysbus_connect_irq(s, 2, qdev_get_gpio_in(orgate_dev, i * 2)); + sysbus_connect_irq(s, 3, qdev_get_gpio_in(orgate_dev, i * 2 + 1)); + sysbus_connect_irq(s, 4, get_sse_irq_in(mms, irqs[2])); +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmu-common-Add-a-nested-flag-to-SMMUState.patch b/hw-arm-smmu-common-Add-a-nested-flag-to-SMMUState.patch new file mode 100644 index 0000000000000000000000000000000000000000..d917e2649a6bbc8872b69e475e2450591edb153d --- /dev/null +++ b/hw-arm-smmu-common-Add-a-nested-flag-to-SMMUState.patch @@ -0,0 +1,67 @@ +From d589010512005bfc698f30417911e4b14478c81b Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Wed, 22 Jun 2022 01:30:39 -0700 +Subject: [PATCH] hw/arm/smmu-common: Add a nested flag to SMMUState + +Add a nested flag in the SMMUState, passed in from device property. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-common.c | 1 + + hw/arm/smmuv3.c | 5 +++++ + include/hw/arm/smmu-common.h | 4 ++++ + 3 files changed, 10 insertions(+) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 9a8ac45431..c5f3e02065 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -683,6 +683,7 @@ static Property smmu_dev_properties[] = { + DEFINE_PROP_UINT8("bus_num", SMMUState, bus_num, 0), + DEFINE_PROP_LINK("primary-bus", SMMUState, primary_bus, + TYPE_PCI_BUS, PCIBus *), ++ DEFINE_PROP_BOOL("nested", SMMUState, nested, false), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index c3871ae067..64ca4c5542 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1746,6 +1746,11 @@ static void smmu_realize(DeviceState *d, Error **errp) + SysBusDevice *dev = SYS_BUS_DEVICE(d); + Error *local_err = NULL; + ++ if (s->stage && strcmp("1", s->stage)) { ++ /* Only support nested with an stage1 only vSMMU */ ++ sys->nested = false; ++ } ++ + c->parent_realize(d, &local_err); + if (local_err) { + error_propagate(errp, local_err); +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index fd8d772da1..eae5d4d05b 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -22,6 +22,7 @@ + #include "hw/sysbus.h" + #include "hw/pci/pci.h" + #include "qom/object.h" ++#include "sysemu/iommufd.h" + + #define SMMU_PCI_BUS_MAX 256 + #define SMMU_PCI_DEVFN_MAX 256 +@@ -136,6 +137,9 @@ struct SMMUState { + const char *mrtypename; + MemoryRegion iomem; + ++ /* Nested SMMU */ ++ bool nested; ++ + GHashTable *smmu_pcibus_by_busptr; + GHashTable *configs; /* cache for configuration data */ + GHashTable *iotlb; +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmu-common-Add-iommufd-helpers.patch b/hw-arm-smmu-common-Add-iommufd-helpers.patch new file mode 100644 index 0000000000000000000000000000000000000000..a95b394b7e9ef7e33fd10502eacf988e8d79bc6e --- /dev/null +++ b/hw-arm-smmu-common-Add-iommufd-helpers.patch @@ -0,0 +1,179 @@ +From a2735cd15160a62065a0a0b39af405c7b0f3fae8 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Wed, 22 Jun 2022 14:41:27 -0700 +Subject: [PATCH] hw/arm/smmu-common: Add iommufd helpers + +Add a set of helper functions for IOMMUFD and new "struct SMMUS1Hwpt" +to store the nested hwpt information. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-common.c | 108 +++++++++++++++++++++++++++++++++++ + include/hw/arm/smmu-common.h | 20 +++++++ + 2 files changed, 128 insertions(+) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 038ae857d8..a79eb34277 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -838,6 +838,114 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) + return NULL; + } + ++/* IOMMUFD helpers */ ++int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type, ++ uint32_t data_len, void *data) ++{ ++ uint64_t caps; ++ ++ if (!sdev || !sdev->idev) { ++ return -ENOENT; ++ } ++ ++ return !iommufd_backend_get_device_info(sdev->idev->iommufd, ++ sdev->idev->devid, data_type, data, ++ data_len, &caps, NULL); ++} ++ ++void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort) ++{ ++ HostIOMMUDeviceIOMMUFD *idev = sdev->idev; ++ SMMUS1Hwpt *s1_hwpt = sdev->s1_hwpt; ++ uint32_t hwpt_id; ++ ++ if (!s1_hwpt || !sdev->viommu) { ++ return; ++ } ++ ++ if (abort) { ++ hwpt_id = sdev->viommu->abort_hwpt_id; ++ } else { ++ hwpt_id = sdev->viommu->bypass_hwpt_id; ++ } ++ ++ if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, NULL)) { ++ return; ++ } ++ ++ iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id); ++ sdev->s1_hwpt = NULL; ++ g_free(s1_hwpt); ++} ++ ++int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type, ++ uint32_t data_len, void *data) ++{ ++ SMMUViommu *viommu = sdev->viommu; ++ SMMUS1Hwpt *s1_hwpt = sdev->s1_hwpt; ++ HostIOMMUDeviceIOMMUFD *idev = sdev->idev; ++ ++ if (!idev || !viommu) { ++ return -ENOENT; ++ } ++ ++ if (s1_hwpt) { ++ smmu_dev_uninstall_nested_ste(sdev, false); ++ } ++ ++ s1_hwpt = g_new0(SMMUS1Hwpt, 1); ++ if (!s1_hwpt) { ++ return -ENOMEM; ++ } ++ ++ s1_hwpt->smmu = sdev->smmu; ++ s1_hwpt->viommu = viommu; ++ s1_hwpt->iommufd = idev->iommufd; ++ ++ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, ++ viommu->core->viommu_id, 0, data_type, ++ data_len, data, &s1_hwpt->hwpt_id, NULL)) { ++ goto free; ++ } ++ ++ if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, NULL)) { ++ goto free_hwpt; ++ } ++ ++ sdev->s1_hwpt = s1_hwpt; ++ ++ return 0; ++free_hwpt: ++ iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id); ++free: ++ sdev->s1_hwpt = NULL; ++ g_free(s1_hwpt); ++ ++ return -EINVAL; ++} ++ ++int smmu_hwpt_invalidate_cache(SMMUS1Hwpt *s1_hwpt, uint32_t type, uint32_t len, ++ uint32_t *num, void *reqs) ++{ ++ if (!s1_hwpt) { ++ return -ENOENT; ++ } ++ ++ return iommufd_backend_invalidate_cache(s1_hwpt->iommufd, s1_hwpt->hwpt_id, ++ type, len, num, reqs); ++} ++ ++int smmu_viommu_invalidate_cache(IOMMUFDViommu *viommu, uint32_t type, ++ uint32_t len, uint32_t *num, void *reqs) ++{ ++ if (!viommu) { ++ return -ENOENT; ++ } ++ ++ return iommufd_viommu_invalidate_cache(viommu->iommufd, viommu->viommu_id, ++ type, len, num, reqs); ++} ++ + /* Unmap all notifiers attached to @mr */ + static void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr) + { +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 3bfb68cef6..66dc7206ea 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -125,6 +125,15 @@ typedef struct SMMUViommu { + QLIST_ENTRY(SMMUViommu) next; + } SMMUViommu; + ++typedef struct SMMUS1Hwpt { ++ void *smmu; ++ IOMMUFDBackend *iommufd; ++ SMMUViommu *viommu; ++ uint32_t hwpt_id; ++ QLIST_HEAD(, SMMUDevice) device_list; ++ QLIST_ENTRY(SMMUViommu) next; ++} SMMUS1Hwpt; ++ + typedef struct SMMUDevice { + void *smmu; + PCIBus *bus; +@@ -132,6 +141,7 @@ typedef struct SMMUDevice { + IOMMUMemoryRegion iommu; + HostIOMMUDeviceIOMMUFD *idev; + SMMUViommu *viommu; ++ SMMUS1Hwpt *s1_hwpt; + AddressSpace as; + uint32_t cfg_cache_hits; + uint32_t cfg_cache_misses; +@@ -225,4 +235,14 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, + /* Unmap the range of all the notifiers registered to any IOMMU mr */ + void smmu_inv_notifiers_all(SMMUState *s); + ++/* IOMMUFD helpers */ ++int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type, ++ uint32_t data_len, void *data); ++void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort); ++int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type, ++ uint32_t data_len, void *data); ++int smmu_hwpt_invalidate_cache(SMMUS1Hwpt *s1_hwpt, uint32_t type, uint32_t len, ++ uint32_t *num, void *reqs); ++int smmu_viommu_invalidate_cache(IOMMUFDViommu *viommu, uint32_t type, ++ uint32_t len, uint32_t *num, void *reqs); + #endif /* HW_ARM_SMMU_COMMON_H */ +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmu-common-Add-set-unset_iommu_device-callba.patch b/hw-arm-smmu-common-Add-set-unset_iommu_device-callba.patch new file mode 100644 index 0000000000000000000000000000000000000000..271b408451120c9c11c5322eb162c3c7433b6777 --- /dev/null +++ b/hw-arm-smmu-common-Add-set-unset_iommu_device-callba.patch @@ -0,0 +1,283 @@ +From 539e12641dc2db30a6fea7a0f061e163bc245d79 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Wed, 22 Jun 2022 02:16:52 -0700 +Subject: [PATCH] hw/arm/smmu-common: Add set/unset_iommu_device callback + +Implement a set_iommu_device callback: + - Find an existing S2 hwpt to test attach() or allocate a new one + (Devices behind the same physical SMMU should share an S2 HWPT.) + - Attach the device to the S2 hwpt and add it to its device list + +And add an unset_iommu_device doing the opposite cleanup routine. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-common.c | 177 +++++++++++++++++++++++++++++++++++ + hw/arm/trace-events | 2 + + include/hw/arm/smmu-common.h | 21 +++++ + 3 files changed, 200 insertions(+) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 03d9ff58d4..038ae857d8 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -20,6 +20,7 @@ + #include "trace.h" + #include "exec/target_page.h" + #include "hw/core/cpu.h" ++#include "hw/pci/pci_device.h" + #include "hw/qdev-properties.h" + #include "qapi/error.h" + #include "qemu/jhash.h" +@@ -639,8 +640,184 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) + return &sdev->as; + } + ++static bool smmu_dev_attach_viommu(SMMUDevice *sdev, ++ HostIOMMUDeviceIOMMUFD *idev, Error **errp) ++{ ++ struct iommu_hwpt_arm_smmuv3 bypass_data = { ++ .ste = { 0x9ULL, 0x0ULL }, //0x1ULL << (108 - 64) }, ++ }; ++ struct iommu_hwpt_arm_smmuv3 abort_data = { ++ .ste = { 0x1ULL, 0x0ULL }, ++ }; ++ SMMUState *s = sdev->smmu; ++ SMMUS2Hwpt *s2_hwpt; ++ SMMUViommu *viommu; ++ uint32_t s2_hwpt_id; ++ ++ if (s->viommu) { ++ return host_iommu_device_iommufd_attach_hwpt( ++ idev, s->viommu->s2_hwpt->hwpt_id, errp); ++ } ++ ++ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id, ++ IOMMU_HWPT_ALLOC_NEST_PARENT, ++ IOMMU_HWPT_DATA_NONE, 0, NULL, ++ &s2_hwpt_id, errp)) { ++ error_setg(errp, "failed to allocate an S2 hwpt"); ++ return false; ++ } ++ ++ /* Attach to S2 for MSI cookie */ ++ if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) { ++ error_setg(errp, "failed to attach stage-2 HW pagetable"); ++ goto free_s2_hwpt; ++ } ++ ++ viommu = g_new0(SMMUViommu, 1); ++ ++ viommu->core = iommufd_backend_alloc_viommu(idev->iommufd, idev->devid, ++ IOMMU_VIOMMU_TYPE_ARM_SMMUV3, ++ s2_hwpt_id); ++ if (!viommu->core) { ++ error_setg(errp, "failed to allocate a viommu"); ++ goto free_viommu; ++ } ++ ++ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, ++ viommu->core->viommu_id, 0, ++ IOMMU_HWPT_DATA_ARM_SMMUV3, ++ sizeof(abort_data), &abort_data, ++ &viommu->abort_hwpt_id, errp)) { ++ error_setg(errp, "failed to allocate an abort pagetable"); ++ goto free_viommu_core; ++ } ++ ++ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, ++ viommu->core->viommu_id, 0, ++ IOMMU_HWPT_DATA_ARM_SMMUV3, ++ sizeof(bypass_data), &bypass_data, ++ &viommu->bypass_hwpt_id, errp)) { ++ error_setg(errp, "failed to allocate a bypass pagetable"); ++ goto free_abort_hwpt; ++ } ++ ++ if (!host_iommu_device_iommufd_attach_hwpt( ++ idev, viommu->bypass_hwpt_id, errp)) { ++ error_setg(errp, "failed to attach the bypass pagetable"); ++ goto free_bypass_hwpt; ++ } ++ ++ s2_hwpt = g_new0(SMMUS2Hwpt, 1); ++ s2_hwpt->iommufd = idev->iommufd; ++ s2_hwpt->hwpt_id = s2_hwpt_id; ++ s2_hwpt->ioas_id = idev->ioas_id; ++ ++ viommu->iommufd = idev->iommufd; ++ viommu->s2_hwpt = s2_hwpt; ++ ++ s->viommu = viommu; ++ return true; ++ ++free_bypass_hwpt: ++ iommufd_backend_free_id(idev->iommufd, viommu->bypass_hwpt_id); ++free_abort_hwpt: ++ iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id); ++free_viommu_core: ++ iommufd_backend_free_id(idev->iommufd, viommu->core->viommu_id); ++ g_free(viommu->core); ++free_viommu: ++ g_free(viommu); ++ host_iommu_device_iommufd_attach_hwpt(idev, sdev->idev->ioas_id, errp); ++free_s2_hwpt: ++ iommufd_backend_free_id(idev->iommufd, s2_hwpt_id); ++ return false; ++} ++ ++static bool smmu_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, ++ HostIOMMUDevice *hiod, Error **errp) ++{ ++ HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); ++ SMMUState *s = opaque; ++ SMMUPciBus *sbus = smmu_get_sbus(s, bus); ++ SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn); ++ ++ if (!s->nested) { ++ return true; ++ } ++ ++ if (sdev->idev) { ++ if (sdev->idev != idev) { ++ return false;//-EEXIST; ++ } else { ++ return true; ++ } ++ } ++ ++ if (!idev) { ++ return true; ++ } ++ ++ if (!smmu_dev_attach_viommu(sdev, idev, errp)) { ++ error_report("Unable to attach viommu"); ++ return false; ++ } ++ ++ sdev->idev = idev; ++ sdev->viommu = s->viommu; ++ QLIST_INSERT_HEAD(&s->viommu->device_list, sdev, next); ++ trace_smmu_set_iommu_device(devfn, smmu_get_sid(sdev)); ++ ++ return true; ++} ++ ++static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) ++{ ++ SMMUDevice *sdev; ++ SMMUViommu *viommu; ++ SMMUState *s = opaque; ++ SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus); ++ ++ if (!s->nested) { ++ return; ++ } ++ ++ if (!sbus) { ++ return; ++ } ++ ++ sdev = sbus->pbdev[devfn]; ++ if (!sdev) { ++ return; ++ } ++ ++ if (!host_iommu_device_iommufd_attach_hwpt(sdev->idev, ++ sdev->idev->ioas_id, NULL)) { ++ error_report("Unable to attach dev to the default HW pagetable"); ++ } ++ ++ viommu = sdev->viommu; ++ ++ sdev->idev = NULL; ++ sdev->viommu = NULL; ++ QLIST_REMOVE(sdev, next); ++ trace_smmu_unset_iommu_device(devfn, smmu_get_sid(sdev)); ++ ++ if (QLIST_EMPTY(&viommu->device_list)) { ++ iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id); ++ iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id); ++ iommufd_backend_free_id(viommu->iommufd, viommu->core->viommu_id); ++ g_free(viommu->core); ++ iommufd_backend_free_id(viommu->iommufd, viommu->s2_hwpt->hwpt_id); ++ g_free(viommu->s2_hwpt); ++ g_free(viommu); ++ s->viommu = NULL; ++ } ++} ++ + static const PCIIOMMUOps smmu_ops = { + .get_address_space = smmu_find_add_as, ++ .set_iommu_device = smmu_dev_set_iommu_device, ++ .unset_iommu_device = smmu_dev_unset_iommu_device, + }; + + IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index cdc1ea06a8..58e0636e95 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -5,6 +5,8 @@ virt_acpi_setup(void) "No fw cfg or ACPI disabled. Bailing out." + + # smmu-common.c + smmu_add_mr(const char *name) "%s" ++smmu_set_iommu_device(int devfn, uint32_t sid) "devfn=%d (sid=%d)" ++smmu_unset_iommu_device(int devfn, uint32_t sid) "devfn=%d (sid=%d)" + smmu_ptw_level(int stage, int level, uint64_t iova, size_t subpage_size, uint64_t baseaddr, uint32_t offset, uint64_t pte) "stage=%d level=%d iova=0x%"PRIx64" subpage_sz=0x%zx baseaddr=0x%"PRIx64" offset=%d => pte=0x%"PRIx64 + smmu_ptw_invalid_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, uint32_t offset, uint64_t pte) "stage=%d level=%d base@=0x%"PRIx64" pte@=0x%"PRIx64" offset=%d pte=0x%"PRIx64 + smmu_ptw_page_pte(int stage, int level, uint64_t iova, uint64_t baseaddr, uint64_t pteaddr, uint64_t pte, uint64_t address) "stage=%d level=%d iova=0x%"PRIx64" base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" page address = 0x%"PRIx64 +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index eae5d4d05b..3bfb68cef6 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -23,6 +23,7 @@ + #include "hw/pci/pci.h" + #include "qom/object.h" + #include "sysemu/iommufd.h" ++#include + + #define SMMU_PCI_BUS_MAX 256 + #define SMMU_PCI_DEVFN_MAX 256 +@@ -107,11 +108,30 @@ typedef struct SMMUTransCfg { + struct SMMUS2Cfg s2cfg; + } SMMUTransCfg; + ++typedef struct SMMUS2Hwpt { ++ IOMMUFDBackend *iommufd; ++ uint32_t hwpt_id; ++ uint32_t ioas_id; ++} SMMUS2Hwpt; ++ ++typedef struct SMMUViommu { ++ void *smmu; ++ IOMMUFDBackend *iommufd; ++ IOMMUFDViommu *core; ++ SMMUS2Hwpt *s2_hwpt; ++ uint32_t bypass_hwpt_id; ++ uint32_t abort_hwpt_id; ++ QLIST_HEAD(, SMMUDevice) device_list; ++ QLIST_ENTRY(SMMUViommu) next; ++} SMMUViommu; ++ + typedef struct SMMUDevice { + void *smmu; + PCIBus *bus; + int devfn; + IOMMUMemoryRegion iommu; ++ HostIOMMUDeviceIOMMUFD *idev; ++ SMMUViommu *viommu; + AddressSpace as; + uint32_t cfg_cache_hits; + uint32_t cfg_cache_misses; +@@ -139,6 +159,7 @@ struct SMMUState { + + /* Nested SMMU */ + bool nested; ++ SMMUViommu *viommu; + + GHashTable *smmu_pcibus_by_busptr; + GHashTable *configs; /* cache for configuration data */ +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch b/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch deleted file mode 100644 index 1824b8e4fd0fa31603df903bb640065c70858907..0000000000000000000000000000000000000000 --- a/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 8bf9d1dc67335c1fb921a56825f6bf198a568091 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 19 Mar 2021 12:22:48 -0400 -Subject: [PATCH] hw/arm/smmu-common: Allow domain invalidation for - NH_ALL/NSNH_ALL - -NH_ALL/NSNH_ALL corresponds to a domain granularity invalidation, -ie. all the notifier range gets invalidation, whatever the ASID. -So let's set the granularity to IOMMU_INV_GRAN_DOMAIN to allow -the consumer to benefit from the info if it can. - -Signed-off-by: Eric Auger -Suggested-by: chenxiang (M) -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmu-common.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 717d22bcbe..de9468d33f 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -395,6 +395,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n) - entry.iova = n->start; - entry.perm = IOMMU_NONE; - entry.addr_mask = n->end - n->start; -+ entry.granularity = IOMMU_INV_GRAN_DOMAIN; - - memory_region_notify_one(n, &entry); - } --- -2.27.0 - diff --git a/hw-arm-smmu-common-Bypass-emulated-IOTLB-for-a-neste.patch b/hw-arm-smmu-common-Bypass-emulated-IOTLB-for-a-neste.patch new file mode 100644 index 0000000000000000000000000000000000000000..8998bcb06848c163c8c6939e8889f0b241cfd882 --- /dev/null +++ b/hw-arm-smmu-common-Bypass-emulated-IOTLB-for-a-neste.patch @@ -0,0 +1,75 @@ +From 6c330f39cc08e4c641a3567e2b6ad0ebcadf5165 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Fri, 21 Jun 2024 21:22:04 +0000 +Subject: [PATCH] hw/arm/smmu-common: Bypass emulated IOTLB for a nested SMMU + +If a vSMMU is configured as a nested one, HW IOTLB will be used and all +cache invalidation should be done to the HW IOTLB too, v.s. the emulated +iotlb. In this case, an iommu notifier isn't registered, as the devices +behind a nested SMMU would stay in the system address space for stage-2 +mappings. + +However, the KVM code still requests an iommu address space to translate +an MSI doorbell gIOVA via get_msi_address_space() and translate(). + +Since a nested SMMU doesn't register an iommu notifier to flush emulated +iotlb, bypass the emulated IOTLB and always walk through the guest-level +IO page table. + +Note that regular nested SMMU could still register an iommu notifier for +IOTLB invalidation, since QEMU traps the invalidation commands. But this +would result in invalidation inefficiency since each invlaidation would +be doubled for both HW IOTLB and the emulated IOTLB. Also, with NVIDIA's +CMDQV feature on its Grace SoC, invalidation commands are issued to the +CMDQ HW direclty, without any trapping. So, there is no way to maintain +the emulated IOTLB. Meanwhile, the stage-1 translation request from KVM +is only activated in case of an MSI table update, which does not happen +that often to impact performance if walking through the guest RAM every +time. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-common.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index c5f3e02065..016418a48c 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -75,6 +75,16 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + uint8_t level = 4 - (inputsize - 4) / stride; + SMMUTLBEntry *entry = NULL; + ++ /* ++ * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However, ++ * KVM still requests for an iommu address space for an MSI fixup by looking ++ * up stage-1 page table. Make sure we don't go through the emulated pathway ++ * so that the emulated iotlb will not need any invalidation. ++ */ ++ if (bs->nested) { ++ return NULL; ++ } ++ + while (level <= 3) { + uint64_t subpage_size = 1ULL << level_shift(level, tt->granule_sz); + uint64_t mask = subpage_size - 1; +@@ -110,6 +120,16 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) + SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); + uint8_t tg = (new->granule - 10) / 2; + ++ /* ++ * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However, ++ * KVM still requests for an iommu address space for an MSI fixup by looking ++ * up stage-1 page table. Make sure we don't go through the emulated pathway ++ * so that the emulated iotlb will not need any invalidation. ++ */ ++ if (bs->nested) { ++ return; ++ } ++ + if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { + smmu_iotlb_inv_all(bs); + } +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmu-common-Extract-smmu_get_sbus-and-smmu_ge.patch b/hw-arm-smmu-common-Extract-smmu_get_sbus-and-smmu_ge.patch new file mode 100644 index 0000000000000000000000000000000000000000..16fe217d2a96fef8be9a0c47c36be504b9247640 --- /dev/null +++ b/hw-arm-smmu-common-Extract-smmu_get_sbus-and-smmu_ge.patch @@ -0,0 +1,68 @@ +From 2fea4f93632679afcb15f0c35b3d9abeede37778 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Wed, 10 Apr 2024 16:37:25 +0000 +Subject: [PATCH] hw/arm/smmu-common: Extract smmu_get_sbus and smmu_get_sdev + helpers + +Add two helpers to get sbus and sdev respectively. These will be used +by the following patch adding set/unset_iommu_device ops. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-common.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 016418a48c..03d9ff58d4 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -589,12 +589,9 @@ SMMUPciBus *smmu_find_smmu_pcibus(SMMUState *s, uint8_t bus_num) + return NULL; + } + +-static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) ++static SMMUPciBus *smmu_get_sbus(SMMUState *s, PCIBus *bus) + { +- SMMUState *s = opaque; + SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus); +- SMMUDevice *sdev; +- static unsigned int index; + + if (!sbus) { + sbus = g_malloc0(sizeof(SMMUPciBus) + +@@ -603,7 +600,15 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) + g_hash_table_insert(s->smmu_pcibus_by_busptr, bus, sbus); + } + +- sdev = sbus->pbdev[devfn]; ++ return sbus; ++} ++ ++static SMMUDevice *smmu_get_sdev(SMMUState *s, SMMUPciBus *sbus, ++ PCIBus *bus, int devfn) ++{ ++ SMMUDevice *sdev = sbus->pbdev[devfn]; ++ static unsigned int index; ++ + if (!sdev) { + char *name = g_strdup_printf("%s-%d-%d", s->mrtypename, devfn, index++); + +@@ -622,6 +627,15 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) + g_free(name); + } + ++ return sdev; ++} ++ ++static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) ++{ ++ SMMUState *s = opaque; ++ SMMUPciBus *sbus = smmu_get_sbus(s, bus); ++ SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn); ++ + return &sdev->as; + } + +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmu-common-Replace-smmu_iommu_mr-with-smmu_f.patch b/hw-arm-smmu-common-Replace-smmu_iommu_mr-with-smmu_f.patch new file mode 100644 index 0000000000000000000000000000000000000000..9cc887dc954e5e8a097cea442b0861bfde90e445 --- /dev/null +++ b/hw-arm-smmu-common-Replace-smmu_iommu_mr-with-smmu_f.patch @@ -0,0 +1,114 @@ +From d8d7f775b602a84c37b8aced11e00cb5b0521c4e Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Tue, 18 Jun 2024 17:22:18 -0700 +Subject: [PATCH] hw/arm/smmu-common: Replace smmu_iommu_mr with smmu_find_sdev + +The caller of smmu_iommu_mr wants to get sdev for smmuv3_flush_config(). + +Do it directly instead of bridging with an iommu mr pointer. + +Signed-off-by: Nicolin Chen +Message-id: 20240619002218.926674-1-nicolinc@nvidia.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +--- + hw/arm/smmu-common.c | 8 ++------ + hw/arm/smmuv3.c | 12 ++++-------- + include/hw/arm/smmu-common.h | 4 ++-- + 3 files changed, 8 insertions(+), 16 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 9e9af8f5c7..d0bc620606 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -837,20 +837,16 @@ static const PCIIOMMUOps smmu_ops = { + .unset_iommu_device = smmu_dev_unset_iommu_device, + }; + +-IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) ++SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid) + { + uint8_t bus_n, devfn; + SMMUPciBus *smmu_bus; +- SMMUDevice *smmu; + + bus_n = PCI_BUS_NUM(sid); + smmu_bus = smmu_find_smmu_pcibus(s, bus_n); + if (smmu_bus) { + devfn = SMMU_PCI_DEVFN(sid); +- smmu = smmu_bus->pbdev[devfn]; +- if (smmu) { +- return &smmu->iommu; +- } ++ return smmu_bus->pbdev[devfn]; + } + return NULL; + } +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 9d44bb19bc..b2ffe2d40b 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1407,20 +1407,18 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + case SMMU_CMD_CFGI_STE: + { + uint32_t sid = CMD_SID(&cmd); +- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); +- SMMUDevice *sdev; ++ SMMUDevice *sdev = smmu_find_sdev(bs, sid); + + if (CMD_SSEC(&cmd)) { + cmd_error = SMMU_CERROR_ILL; + break; + } + +- if (!mr) { ++ if (!sdev) { + break; + } + + trace_smmuv3_cmdq_cfgi_ste(sid); +- sdev = container_of(mr, SMMUDevice, iommu); + smmuv3_flush_config(sdev); + smmuv3_install_nested_ste(sdev, sid); + +@@ -1452,20 +1450,18 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + case SMMU_CMD_CFGI_CD_ALL: + { + uint32_t sid = CMD_SID(&cmd); +- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); +- SMMUDevice *sdev; ++ SMMUDevice *sdev = smmu_find_sdev(bs, sid); + + if (CMD_SSEC(&cmd)) { + cmd_error = SMMU_CERROR_ILL; + break; + } + +- if (!mr) { ++ if (!sdev) { + break; + } + + trace_smmuv3_cmdq_cfgi_cd(sid); +- sdev = container_of(mr, SMMUDevice, iommu); + smmuv3_flush_config(sdev); + break; + } +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 955ca716a5..e30539a8d4 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -234,8 +234,8 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, + */ + SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova); + +-/* Return the iommu mr associated to @sid, or NULL if none */ +-IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); ++/* Return the SMMUDevice associated to @sid, or NULL if none */ ++SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid); + + #define SMMU_IOTLB_MAX_SIZE 256 + +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmu-common-Return-sysmem-if-stage-1-is-bypas.patch b/hw-arm-smmu-common-Return-sysmem-if-stage-1-is-bypas.patch new file mode 100644 index 0000000000000000000000000000000000000000..406280cbc2f54b20a763711eb70c2215748777be --- /dev/null +++ b/hw-arm-smmu-common-Return-sysmem-if-stage-1-is-bypas.patch @@ -0,0 +1,87 @@ +From 3c6c29612d5ca0ff07bcb8a45735a3877c8fadd4 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Thu, 7 Dec 2023 20:04:47 +0000 +Subject: [PATCH] hw/arm/smmu-common: Return sysmem if stage-1 is bypassed + +When nested translation is enabled, there are 2-stage translation occuring +to two different address spaces: stage-1 in the iommu as, while stage-2 in +the system as. + +If a device attached to the vSMMU doesn't enable stage-1 translation, e.g. +vSTE sets to Config=Bypass, the system as should be returned, so QEMU can +set up system memory mappings onto the stage-2 page table. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-common.c | 18 +++++++++++++++++- + include/hw/arm/smmu-common.h | 3 +++ + 2 files changed, 20 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index a79eb34277..cc41bf3de8 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -622,6 +622,9 @@ static SMMUDevice *smmu_get_sdev(SMMUState *s, SMMUPciBus *sbus, + memory_region_init_iommu(&sdev->iommu, sizeof(sdev->iommu), + s->mrtypename, + OBJECT(s), name, UINT64_MAX); ++ if (s->nested) { ++ address_space_init(&sdev->as_sysmem, &s->root, name); ++ } + address_space_init(&sdev->as, + MEMORY_REGION(&sdev->iommu), name); + trace_smmu_add_mr(name); +@@ -637,7 +640,12 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) + SMMUPciBus *sbus = smmu_get_sbus(s, bus); + SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn); + +- return &sdev->as; ++ /* Return the system as if the device uses stage-2 only */ ++ if (s->nested && !sdev->s1_hwpt) { ++ return &sdev->as_sysmem; ++ } else { ++ return &sdev->as; ++ } + } + + static bool smmu_dev_attach_viommu(SMMUDevice *sdev, +@@ -983,6 +991,14 @@ static void smmu_base_realize(DeviceState *dev, Error **errp) + g_free, g_free); + s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL); + ++ if (s->nested) { ++ memory_region_init(&s->root, OBJECT(s), "root", UINT64_MAX); ++ memory_region_init_alias(&s->sysmem, OBJECT(s), ++ "smmu-sysmem", get_system_memory(), 0, ++ memory_region_size(get_system_memory())); ++ memory_region_add_subregion(&s->root, 0, &s->sysmem); ++ } ++ + if (s->primary_bus) { + pci_setup_iommu(s->primary_bus, &smmu_ops, s); + } else { +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 66dc7206ea..37dfeed026 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -143,6 +143,7 @@ typedef struct SMMUDevice { + SMMUViommu *viommu; + SMMUS1Hwpt *s1_hwpt; + AddressSpace as; ++ AddressSpace as_sysmem; + uint32_t cfg_cache_hits; + uint32_t cfg_cache_misses; + QLIST_ENTRY(SMMUDevice) next; +@@ -165,7 +166,9 @@ struct SMMUState { + /* */ + SysBusDevice dev; + const char *mrtypename; ++ MemoryRegion root; + MemoryRegion iomem; ++ MemoryRegion sysmem; + + /* Nested SMMU */ + bool nested; +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Add-initial-support-for-SMMUv3-Nested-.patch b/hw-arm-smmuv3-Add-initial-support-for-SMMUv3-Nested-.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a09fabaa9f8e0f8c003ebfa9e44dafe27550d29 --- /dev/null +++ b/hw-arm-smmuv3-Add-initial-support-for-SMMUv3-Nested-.patch @@ -0,0 +1,233 @@ +From 9895192512af4b52aff88432618a474e69b44bdd Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 6 Nov 2024 14:47:27 +0000 +Subject: [PATCH] hw/arm/smmuv3: Add initial support for SMMUv3 Nested device + +Based on SMMUv3 as a parent device, add a user-creatable +smmuv3-nested device. Subsequent patches will add support to +specify a PCI bus for this device. + +Currently only supported for "virt", so hook up the sybus mem & irq +for that as well. + +No FDT support is added for now. + +Signed-off-by: Shameer Kolothum +--- + hw/arm/smmuv3.c | 34 ++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 31 +++++++++++++++++++++++++++++-- + hw/core/sysbus-fdt.c | 1 + + include/hw/arm/smmuv3.h | 15 +++++++++++++++ + include/hw/arm/virt.h | 6 ++++++ + 5 files changed, 85 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index b860c8385f..3010471cdc 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -2069,6 +2069,19 @@ static void smmu_realize(DeviceState *d, Error **errp) + smmu_init_irq(s, dev); + } + ++static void smmu_nested_realize(DeviceState *d, Error **errp) ++{ ++ SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d); ++ SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested); ++ Error *local_err = NULL; ++ ++ c->parent_realize(d, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++} ++ + static const VMStateDescription vmstate_smmuv3_queue = { + .name = "smmuv3_queue", + .version_id = 1, +@@ -2167,6 +2180,18 @@ static void smmuv3_class_init(ObjectClass *klass, void *data) + device_class_set_props(dc, smmuv3_properties); + } + ++static void smmuv3_nested_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass); ++ ++ dc->vmsd = &vmstate_smmuv3; ++ device_class_set_parent_realize(dc, smmu_nested_realize, ++ &c->parent_realize); ++ dc->user_creatable = true; ++ dc->hotpluggable = false; ++} ++ + static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old, + IOMMUNotifierFlag new, +@@ -2205,6 +2230,14 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, + imrc->notify_flag_changed = smmuv3_notify_flag_changed; + } + ++static const TypeInfo smmuv3_nested_type_info = { ++ .name = TYPE_ARM_SMMUV3_NESTED, ++ .parent = TYPE_ARM_SMMUV3, ++ .instance_size = sizeof(SMMUv3NestedState), ++ .class_size = sizeof(SMMUv3NestedClass), ++ .class_init = smmuv3_nested_class_init, ++}; ++ + static const TypeInfo smmuv3_type_info = { + .name = TYPE_ARM_SMMUV3, + .parent = TYPE_ARM_SMMU, +@@ -2223,6 +2256,7 @@ static const TypeInfo smmuv3_iommu_memory_region_info = { + static void smmuv3_register_types(void) + { + type_register(&smmuv3_type_info); ++ type_register(&smmuv3_nested_type_info); + type_register(&smmuv3_iommu_memory_region_info); + } + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 08c40c314b..a55f297af2 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -166,6 +166,7 @@ static const MemMapEntry base_memmap[] = { + /* In the virtCCA scenario, this space is used for MSI interrupt mapping */ + [VIRT_CVM_MSI] = { 0x0a001000, 0x00fff000 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, ++ [VIRT_SMMU_NESTED] = { 0x0b010000, 0x00ff0000}, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ + [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, + [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 }, +@@ -211,6 +212,7 @@ static const int a15irqmap[] = { + [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */ + [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */ + [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */ ++ [VIRT_SMMU_NESTED] = 200, + }; + + static const char *valid_cpus[] = { +@@ -3613,10 +3615,34 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); + +- if (vms->platform_bus_dev) { +- MachineClass *mc = MACHINE_GET_CLASS(vms); ++ /* For smmuv3-nested devices we need to set the mem & irq */ ++ if (device_is_dynamic_sysbus(mc, dev) && ++ object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) { ++ hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base; ++ int irq = vms->irqmap[VIRT_SMMU_NESTED]; ++ ++ if (vms->smmu_nested_count >= MAX_SMMU_NESTED) { ++ error_setg(errp, "smmuv3-nested max count reached!"); ++ return; ++ } ++ ++ base += (vms->smmu_nested_count * SMMU_IO_LEN); ++ irq += (vms->smmu_nested_count * NUM_SMMU_IRQS); + ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); ++ for (int i = 0; i < 4; i++) { ++ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, ++ qdev_get_gpio_in(vms->gic, irq + i)); ++ } ++ if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) { ++ vms->iommu = VIRT_IOMMU_SMMUV3_NESTED; ++ } ++ vms->smmu_nested_count++; ++ } ++ ++ if (vms->platform_bus_dev) { + if (device_is_dynamic_sysbus(mc, dev)) { + platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev), + SYS_BUS_DEVICE(dev)); +@@ -3789,6 +3815,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM); ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_NESTED); + #ifdef CONFIG_TPM + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); + #endif +diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c +index eebcd28f9a..0f0d0b3e58 100644 +--- a/hw/core/sysbus-fdt.c ++++ b/hw/core/sysbus-fdt.c +@@ -489,6 +489,7 @@ static const BindingEntry bindings[] = { + #ifdef CONFIG_LINUX + TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node), + TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node), ++ TYPE_BINDING("arm-smmuv3-nested", no_fdt_node), + VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node), + #endif + #ifdef CONFIG_TPM +diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h +index d183a62766..87e628be7a 100644 +--- a/include/hw/arm/smmuv3.h ++++ b/include/hw/arm/smmuv3.h +@@ -84,6 +84,21 @@ struct SMMUv3Class { + #define TYPE_ARM_SMMUV3 "arm-smmuv3" + OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3) + ++#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested" ++OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED) ++ ++struct SMMUv3NestedState { ++ SMMUv3State smmuv3_state; ++}; ++ ++struct SMMUv3NestedClass { ++ /*< private >*/ ++ SMMUv3Class smmuv3_class; ++ /*< public >*/ ++ ++ DeviceRealize parent_realize; ++}; ++ + #define STAGE1_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S1P) + #define STAGE2_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S2P) + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index e6a449becd..cd41e28202 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -109,6 +109,9 @@ typedef enum { + /* MMIO region size for SMMUv3 */ + #define SMMU_IO_LEN 0x20000 + ++/* Max supported nested SMMUv3 */ ++#define MAX_SMMU_NESTED 64 ++ + enum { + VIRT_FLASH, + VIRT_MEM, +@@ -121,6 +124,7 @@ enum { + VIRT_GIC_ITS, + VIRT_GIC_REDIST, + VIRT_SMMU, ++ VIRT_SMMU_NESTED, + VIRT_UART, + VIRT_CPUFREQ, + VIRT_MMIO, +@@ -155,6 +159,7 @@ enum { + typedef enum VirtIOMMUType { + VIRT_IOMMU_NONE, + VIRT_IOMMU_SMMUV3, ++ VIRT_IOMMU_SMMUV3_NESTED, + VIRT_IOMMU_VIRTIO, + } VirtIOMMUType; + +@@ -222,6 +227,7 @@ struct VirtMachineState { + bool mte; + bool dtb_randomness; + bool pmu; ++ int smmu_nested_count; + OnOffAuto acpi; + VirtGICType gic_version; + VirtIOMMUType iommu; +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Add-missing-STE-invalidation.patch b/hw-arm-smmuv3-Add-missing-STE-invalidation.patch new file mode 100644 index 0000000000000000000000000000000000000000..f96f14407a0b15a8e219c6b5e4f0cacf0204fba3 --- /dev/null +++ b/hw-arm-smmuv3-Add-missing-STE-invalidation.patch @@ -0,0 +1,92 @@ +From 707bd8198642549595f11ef34c80094fbf7d2de1 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Mon, 29 Apr 2024 21:26:41 +0000 +Subject: [PATCH] hw/arm/smmuv3: Add missing STE invalidation + +Multitple STEs can be invalidated in a range via SMMU_CMD_CFGI_STE_RANGE +or SMMU_CMD_CFGI_ALL command. + +Add the missing STE invalidation in this pathway. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-internal.h | 1 + + hw/arm/smmuv3.c | 28 +++++++++++++++++++++++++--- + 2 files changed, 26 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h +index 843bebb185..5a81dd1b82 100644 +--- a/hw/arm/smmu-internal.h ++++ b/hw/arm/smmu-internal.h +@@ -142,6 +142,7 @@ typedef struct SMMUIOTLBPageInvInfo { + } SMMUIOTLBPageInvInfo; + + typedef struct SMMUSIDRange { ++ SMMUState *state; + uint32_t start; + uint32_t end; + } SMMUSIDRange; +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 540831ab8e..9d44bb19bc 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1322,11 +1322,9 @@ static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid) + } + + static gboolean +-smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data) ++_smmuv3_invalidate_ste(SMMUDevice *sdev, SMMUSIDRange *sid_range) + { +- SMMUDevice *sdev = (SMMUDevice *)key; + uint32_t sid = smmu_get_sid(sdev); +- SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data; + + if (sid < sid_range->start || sid > sid_range->end) { + return false; +@@ -1337,6 +1335,28 @@ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data) + return true; + } + ++static gboolean ++smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data) ++{ ++ return _smmuv3_invalidate_ste((SMMUDevice *)key, (SMMUSIDRange *)user_data); ++} ++ ++static void smmuv3_invalidate_nested_ste(SMMUSIDRange *sid_range) ++{ ++ SMMUState *bs = sid_range->state; ++ SMMUDevice *sdev; ++ ++ if (!bs->viommu) { ++ return; ++ } ++ ++ QLIST_FOREACH(sdev, &bs->viommu->device_list, next) { ++ if (smmu_get_sid(sdev)) { ++ _smmuv3_invalidate_ste(sdev, sid_range); ++ } ++ } ++} ++ + static int smmuv3_cmdq_consume(SMMUv3State *s) + { + SMMUState *bs = ARM_SMMU(s); +@@ -1418,12 +1438,14 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + } + + mask = (1ULL << (range + 1)) - 1; ++ sid_range.state = bs; + sid_range.start = sid & ~mask; + sid_range.end = sid_range.start + mask; + + trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end); + g_hash_table_foreach_remove(bs->configs, smmuv3_invalidate_ste, + &sid_range); ++ smmuv3_invalidate_nested_ste(&sid_range); + break; + } + case SMMU_CMD_CFGI_CD: +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Add-smmu_dev_install_nested_ste-for-CF.patch b/hw-arm-smmuv3-Add-smmu_dev_install_nested_ste-for-CF.patch new file mode 100644 index 0000000000000000000000000000000000000000..02defaef0fead966718af49b83ad800864bbec0c --- /dev/null +++ b/hw-arm-smmuv3-Add-smmu_dev_install_nested_ste-for-CF.patch @@ -0,0 +1,255 @@ +From 13b84313c9f7ca4823abdbad92baf091c337861e Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Fri, 21 Apr 2023 15:13:53 -0700 +Subject: [PATCH] hw/arm/smmuv3: Add smmu_dev_install_nested_ste() for CFGI_STE + +Call smmu_dev_install_nested_ste and eventually down to IOMMU_HWPT_ALLOC +ioctl for a nested HWPT allocation. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmu-common.c | 9 ++++ + hw/arm/smmuv3-internal.h | 1 + + hw/arm/smmuv3.c | 97 +++++++++++++++++++++++++++++++++++- + hw/arm/trace-events | 1 + + include/hw/arm/smmu-common.h | 14 ++++++ + 5 files changed, 120 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index cc41bf3de8..9e9af8f5c7 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -780,6 +780,7 @@ static bool smmu_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + + static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) + { ++ SMMUVdev *vdev; + SMMUDevice *sdev; + SMMUViommu *viommu; + SMMUState *s = opaque; +@@ -803,13 +804,21 @@ static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) + error_report("Unable to attach dev to the default HW pagetable"); + } + ++ vdev = sdev->vdev; + viommu = sdev->viommu; + + sdev->idev = NULL; + sdev->viommu = NULL; ++ sdev->vdev = NULL; + QLIST_REMOVE(sdev, next); + trace_smmu_unset_iommu_device(devfn, smmu_get_sid(sdev)); + ++ if (vdev) { ++ iommufd_backend_free_id(viommu->iommufd, vdev->core->vdev_id); ++ g_free(vdev->core); ++ g_free(vdev); ++ } ++ + if (QLIST_EMPTY(&viommu->device_list)) { + iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id); + iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id); +diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h +index 6076025ad6..163459d450 100644 +--- a/hw/arm/smmuv3-internal.h ++++ b/hw/arm/smmuv3-internal.h +@@ -552,6 +552,7 @@ typedef struct CD { + + #define STE_S1FMT(x) extract32((x)->word[0], 4 , 2) + #define STE_S1CDMAX(x) extract32((x)->word[1], 27, 5) ++#define STE_S1DSS(x) extract32((x)->word[2], 0, 2) + #define STE_S1STALLD(x) extract32((x)->word[2], 27, 1) + #define STE_EATS(x) extract32((x)->word[2], 28, 2) + #define STE_STRW(x) extract32((x)->word[2], 30, 2) +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 253d297eec..540831ab8e 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -563,6 +563,27 @@ bad_ste: + return -EINVAL; + } + ++static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config) ++{ ++ ++ if (STE_CFG_ABORT(config)) { ++ cfg->aborted = true; ++ return; ++ } ++ if (STE_CFG_BYPASS(config)) { ++ cfg->bypassed = true; ++ return; ++ } ++ ++ if (STE_CFG_S1_ENABLED(config)) { ++ cfg->stage = SMMU_STAGE_1; ++ } ++ ++ if (STE_CFG_S2_ENABLED(config)) { ++ cfg->stage |= SMMU_STAGE_2; ++ } ++} ++ + /* Returns < 0 in case of invalid STE, 0 otherwise */ + static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, + STE *ste, SMMUEventInfo *event) +@@ -579,12 +600,19 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, + + config = STE_CONFIG(ste); + +- if (STE_CFG_ABORT(config)) { ++ decode_ste_config(cfg, config); ++ ++ /* S1DSS.Terminate is same as Config.abort for default stream */ ++ if (STE_CFG_S1_ENABLED(config) && STE_S1DSS(ste) == 0) { + cfg->aborted = true; ++ } ++ ++ if (cfg->aborted || cfg->bypassed) { + return 0; + } + +- if (STE_CFG_BYPASS(config)) { ++ /* S1DSS.Bypass is same as Config.bypass for default stream */ ++ if (STE_CFG_S1_ENABLED(config) && STE_S1DSS(ste) == 0x1) { + cfg->bypassed = true; + return 0; + } +@@ -1231,6 +1259,68 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) + } + } + ++static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid) ++{ ++#ifdef __linux__ ++ SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid, ++ .inval_ste_allowed = true}; ++ struct iommu_hwpt_arm_smmuv3 nested_data = {}; ++ SMMUv3State *s = sdev->smmu; ++ SMMUState *bs = &s->smmu_state; ++ uint32_t config; ++ STE ste; ++ int ret; ++ ++ if (!sdev->viommu || !bs->nested) { ++ return; ++ } ++ ++ if (!sdev->vdev && sdev->idev && sdev->viommu) { ++ SMMUVdev *vdev = g_new0(SMMUVdev, 1); ++ vdev->core = iommufd_backend_alloc_vdev(sdev->idev, sdev->viommu->core, ++ sid); ++ if (!vdev->core) { ++ error_report("failed to allocate a vDEVICE"); ++ g_free(vdev); ++ return; ++ } ++ sdev->vdev = vdev; ++ } ++ ++ ret = smmu_find_ste(sdev->smmu, sid, &ste, &event); ++ if (ret) { ++ /* ++ * For a 2-level Stream Table, the level-2 table might not be ready ++ * until the device gets inserted to the stream table. Ignore this. ++ */ ++ return; ++ } ++ ++ config = STE_CONFIG(&ste); ++ if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) { ++ smmu_dev_uninstall_nested_ste(sdev, STE_CFG_ABORT(config)); ++ smmuv3_flush_config(sdev); ++ return; ++ } ++ ++ nested_data.ste[0] = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32; ++ nested_data.ste[1] = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32; ++ /* V | CONFIG | S1FMT | S1CTXPTR | S1CDMAX */ ++ nested_data.ste[0] &= 0xf80fffffffffffffULL; ++ /* S1DSS | S1CIR | S1COR | S1CSH | S1STALLD | EATS */ ++ nested_data.ste[1] &= 0x380000ffULL; ++ ++ ret = smmu_dev_install_nested_ste(sdev, IOMMU_HWPT_DATA_ARM_SMMUV3, ++ sizeof(nested_data), &nested_data); ++ if (ret) { ++ error_report("Unable to install nested STE=%16LX:%16LX, ret=%d", ++ nested_data.ste[1], nested_data.ste[0], ret); ++ } ++ ++ trace_smmuv3_install_nested_ste(sid, nested_data.ste[1], nested_data.ste[0]); ++#endif ++} ++ + static gboolean + smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data) + { +@@ -1241,6 +1331,8 @@ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data) + if (sid < sid_range->start || sid > sid_range->end) { + return false; + } ++ smmuv3_flush_config(sdev); ++ smmuv3_install_nested_ste(sdev, sid); + trace_smmuv3_config_cache_inv(sid); + return true; + } +@@ -1310,6 +1402,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + trace_smmuv3_cmdq_cfgi_ste(sid); + sdev = container_of(mr, SMMUDevice, iommu); + smmuv3_flush_config(sdev); ++ smmuv3_install_nested_ste(sdev, sid); + + break; + } +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index 1e3d86382d..490da6349c 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -57,4 +57,5 @@ smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s + smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" + smmuv3_get_device_info(uint32_t idr0, uint32_t idr1, uint32_t idr3, uint32_t idr5) "idr0=0x%x idr1=0x%x idr3=0x%x idr5=0x%x" + smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint16_t vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 ++smmuv3_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64 + +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index d120c352cf..955ca716a5 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -51,6 +51,13 @@ typedef enum { + SMMU_PTW_ERR_PERMISSION, /* Permission fault */ + } SMMUPTWEventType; + ++/* SMMU Stage */ ++typedef enum { ++ SMMU_STAGE_1 = 1, ++ SMMU_STAGE_2, ++ SMMU_NESTED, ++} SMMUStage; ++ + typedef struct SMMUPTWEventInfo { + int stage; + SMMUPTWEventType type; +@@ -125,6 +132,12 @@ typedef struct SMMUViommu { + QLIST_ENTRY(SMMUViommu) next; + } SMMUViommu; + ++typedef struct SMMUVdev { ++ SMMUViommu *vsmmu; ++ IOMMUFDVdev *core; ++ uint32_t sid; ++}SMMUVdev; ++ + typedef struct SMMUS1Hwpt { + void *smmu; + IOMMUFDBackend *iommufd; +@@ -141,6 +154,7 @@ typedef struct SMMUDevice { + IOMMUMemoryRegion iommu; + HostIOMMUDeviceIOMMUFD *idev; + SMMUViommu *viommu; ++ SMMUVdev *vdev; + SMMUS1Hwpt *s1_hwpt; + AddressSpace as; + AddressSpace as_sysmem; +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch b/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch deleted file mode 100644 index 89f9292287246e65a25587df2da43f2765457312..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch +++ /dev/null @@ -1,32 +0,0 @@ -From bc602a4d1355774a0a44e8fbf6dd842049dd63f3 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 28 Aug 2018 09:21:53 -0400 -Subject: [PATCH] hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute - -The SMMUv3 has the peculiarity to translate MSI -transactionss. let's advertise the corresponding -attribute. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 55eed5189e..83d59b6d28 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -1538,6 +1538,9 @@ static int smmuv3_get_attr(IOMMUMemoryRegion *iommu, - if (attr == IOMMU_ATTR_VFIO_NESTED) { - *(bool *) data = true; - return 0; -+ } else if (attr == IOMMU_ATTR_MSI_TRANSLATE) { -+ *(bool *) data = true; -+ return 0; - } - return -EINVAL; - } --- -2.27.0 - diff --git a/hw-arm-smmuv3-Allow-MAP-notifiers.patch b/hw-arm-smmuv3-Allow-MAP-notifiers.patch deleted file mode 100644 index ec050121fcd57a2e942774ce76fceb8ed5039cf2..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Allow-MAP-notifiers.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 965729b4875f637dacdbf82960347beb65512d12 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 18 Mar 2020 11:17:36 +0100 -Subject: [PATCH] hw/arm/smmuv3: Allow MAP notifiers - -We now have all bricks to support nested paging. This -uses MAP notifiers to map the MSIs. So let's allow MAP -notifiers to be registered. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 8 -------- - 1 file changed, 8 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 931d6eae57..c26fba118c 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -1563,14 +1563,6 @@ static void smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, - SMMUv3State *s3 = sdev->smmu; - SMMUState *s = &(s3->smmu_state); - -- if (new & IOMMU_NOTIFIER_MAP) { -- int bus_num = pci_bus_num(sdev->bus); -- PCIDevice *pcidev = pci_find_device(sdev->bus, bus_num, sdev->devfn); -- -- warn_report("SMMUv3 does not support notification on MAP: " -- "device %s will not function properly", pcidev->name); -- } -- - if (old == IOMMU_NOTIFIER_NONE) { - trace_smmuv3_notify_flag_add(iommu->parent_obj.name); - QLIST_INSERT_HEAD(&s->devices_with_notifiers, sdev, next); --- -2.27.0 - diff --git a/hw-arm-smmuv3-Associate-a-pci-bus-with-a-SMMUv3-Nest.patch b/hw-arm-smmuv3-Associate-a-pci-bus-with-a-SMMUv3-Nest.patch new file mode 100644 index 0000000000000000000000000000000000000000..abf1ab5691f43a249e5cbc6b9e1ae12b530ddbc6 --- /dev/null +++ b/hw-arm-smmuv3-Associate-a-pci-bus-with-a-SMMUv3-Nest.patch @@ -0,0 +1,95 @@ +From afca50145f52601d912a805b65bd4530e9278388 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 6 Nov 2024 15:53:45 +0000 +Subject: [PATCH] hw/arm/smmuv3: Associate a pci bus with a SMMUv3 Nested + device + +Subsequent patches will add IORT modifications to get this working. + +Signed-off-by: Shameer Kolothum +--- + hw/arm/smmuv3.c | 27 +++++++++++++++++++++++++++ + include/hw/arm/smmuv3.h | 2 ++ + 2 files changed, 29 insertions(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 3010471cdc..66e4e1b57d 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -24,6 +24,7 @@ + #include "hw/qdev-properties.h" + #include "hw/qdev-core.h" + #include "hw/pci/pci.h" ++#include "hw/pci/pci_bridge.h" + #include "cpu.h" + #include "trace.h" + #include "qemu/log.h" +@@ -2069,12 +2070,32 @@ static void smmu_realize(DeviceState *d, Error **errp) + smmu_init_irq(s, dev); + } + ++static int smmuv3_nested_pci_host_bridge(Object *obj, void *opaque) ++{ ++ DeviceState *d = opaque; ++ SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d); ++ ++ if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { ++ PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; ++ if (s_nested->pci_bus && !strcmp(bus->qbus.name, s_nested->pci_bus)) { ++ object_property_set_link(OBJECT(d), "primary-bus", OBJECT(bus), ++ &error_abort); ++ } ++ } ++ return 0; ++} ++ + static void smmu_nested_realize(DeviceState *d, Error **errp) + { + SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d); + SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested); ++ SysBusDevice *dev = SYS_BUS_DEVICE(d); + Error *local_err = NULL; + ++ object_child_foreach_recursive(object_get_root(), ++ smmuv3_nested_pci_host_bridge, d); ++ object_property_set_bool(OBJECT(dev), "nested", true, &error_abort); ++ + c->parent_realize(d, &local_err); + if (local_err) { + error_propagate(errp, local_err); +@@ -2161,6 +2182,11 @@ static Property smmuv3_properties[] = { + DEFINE_PROP_END_OF_LIST() + }; + ++static Property smmuv3_nested_properties[] = { ++ DEFINE_PROP_STRING("pci-bus", SMMUv3NestedState, pci_bus), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void smmuv3_instance_init(Object *obj) + { + /* Nothing much to do here as of now */ +@@ -2188,6 +2214,7 @@ static void smmuv3_nested_class_init(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_smmuv3; + device_class_set_parent_realize(dc, smmu_nested_realize, + &c->parent_realize); ++ device_class_set_props(dc, smmuv3_nested_properties); + dc->user_creatable = true; + dc->hotpluggable = false; + } +diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h +index 87e628be7a..96513fce56 100644 +--- a/include/hw/arm/smmuv3.h ++++ b/include/hw/arm/smmuv3.h +@@ -89,6 +89,8 @@ OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED) + + struct SMMUv3NestedState { + SMMUv3State smmuv3_state; ++ ++ char *pci_bus; + }; + + struct SMMUv3NestedClass { +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Check-idr-registers-for-STE_S1CDMAX-an.patch b/hw-arm-smmuv3-Check-idr-registers-for-STE_S1CDMAX-an.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bf8416baff11d101569b76b408a77c5de6e1050 --- /dev/null +++ b/hw-arm-smmuv3-Check-idr-registers-for-STE_S1CDMAX-an.patch @@ -0,0 +1,38 @@ +From fac9784bbedb50dc964feb9cf70b6f37472fcf60 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Fri, 21 Apr 2023 22:10:44 -0700 +Subject: [PATCH] hw/arm/smmuv3: Check idr registers for STE_S1CDMAX and + STE_S1STALLD + +With nested translation, the underlying HW could support those two fields. +Allow them according to the updated idr registers after the hw_info ioctl. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmuv3.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 4208325ab3..253d297eec 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -622,13 +622,14 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, + } + } + +- if (STE_S1CDMAX(ste) != 0) { ++ if (!FIELD_EX32(s->idr[1], IDR1, SSIDSIZE) && STE_S1CDMAX(ste) != 0) { + qemu_log_mask(LOG_UNIMP, + "SMMUv3 does not support multiple context descriptors yet\n"); + goto bad_ste; + } + +- if (STE_S1STALLD(ste)) { ++ /* STALL_MODEL being 0b01 means "stall is not supported" */ ++ if ((FIELD_EX32(s->idr[0], IDR0, STALL_MODEL) & 0x1) && STE_S1STALLD(ste)) { + qemu_log_mask(LOG_UNIMP, + "SMMUv3 S1 stalling fault model not allowed yet\n"); + goto bad_ste; +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Enable-sva-stall-IDR-features.patch b/hw-arm-smmuv3-Enable-sva-stall-IDR-features.patch new file mode 100644 index 0000000000000000000000000000000000000000..fde17487db46275897be8e5b5c2f939d64f9c428 --- /dev/null +++ b/hw-arm-smmuv3-Enable-sva-stall-IDR-features.patch @@ -0,0 +1,76 @@ +From c8267f88b2af37779a597aac00aeaf06adc80ccc Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Mon, 11 Dec 2023 14:42:01 +0000 +Subject: [PATCH] hw/arm/smmuv3: Enable sva/stall IDR features + +Emulate features that will enable the stall and sva feature in Guest. + +Signed-off-by: Shameer Kolothum +--- + hw/arm/smmuv3-internal.h | 3 ++- + hw/arm/smmuv3.c | 8 +++----- + 2 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h +index a411fd4048..cfc04c563e 100644 +--- a/hw/arm/smmuv3-internal.h ++++ b/hw/arm/smmuv3-internal.h +@@ -74,6 +74,7 @@ REG32(IDR1, 0x4) + FIELD(IDR1, ECMDQ, 31, 1) + + #define SMMU_IDR1_SIDSIZE 16 ++#define SMMU_IDR1_SSIDSIZE 16 + #define SMMU_CMDQS 19 + #define SMMU_EVENTQS 19 + +@@ -104,7 +105,7 @@ REG32(IDR5, 0x14) + FIELD(IDR5, VAX, 10, 2); + FIELD(IDR5, STALL_MAX, 16, 16); + +-#define SMMU_IDR5_OAS 4 ++#define SMMU_IDR5_OAS 5 + + REG32(IIDR, 0x18) + REG32(AIDR, 0x1c) +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 66e4e1b57d..8d8dcccd48 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -343,13 +343,14 @@ static void smmuv3_init_regs(SMMUv3State *s) + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, ASID16, 1); /* 16-bit ASID */ + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, VMID16, 1); /* 16-bit VMID */ + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, TTENDIAN, 2); /* little endian */ +- s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STALL_MODEL, 1); /* No stall */ ++ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STALL_MODEL, 0); /* stall */ + /* terminated transaction will always be aborted/error returned */ + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, TERM_MODEL, 1); + /* 2-level stream table supported */ + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STLEVEL, 1); + + s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SIDSIZE, SMMU_IDR1_SIDSIZE); ++ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, SMMU_IDR1_SSIDSIZE); + s->idr[1] = FIELD_DP32(s->idr[1], IDR1, EVENTQS, SMMU_EVENTQS); + s->idr[1] = FIELD_DP32(s->idr[1], IDR1, CMDQS, SMMU_CMDQS); + +@@ -361,7 +362,7 @@ static void smmuv3_init_regs(SMMUv3State *s) + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2); + +- s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ ++ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 48 bits */ + /* 4K, 16K and 64K granule support */ + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); +@@ -776,9 +777,6 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) + if (!CD_A(cd)) { + goto bad_cd; /* SMMU_IDR0.TERM_MODEL == 1 */ + } +- if (CD_S(cd)) { +- goto bad_cd; /* !STE_SECURE && SMMU_IDR0.STALL_MODEL == 1 */ +- } + if (CD_HA(cd) || CD_HD(cd)) { + goto bad_cd; /* HTTU = 0 */ + } +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch b/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch deleted file mode 100644 index 1f3425e7eddae1fee87d0cb8d86587f4e6011ee5..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 8108317641b3cb378bf1862dc3c0a73d1e0976ce Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 4 Sep 2018 08:48:33 -0400 -Subject: [PATCH] hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA - invalidation - -When the guest invalidates one S1 entry, it passes the asid. -When propagating this invalidation downto the host, the asid -information also must be passed. So let's fill the arch_id field -introduced for that purpose and accordingly set the flags to -indicate its presence. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index f8e721f949..c6b950af35 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -824,6 +824,8 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - entry.iova = iova; - entry.addr_mask = (1 << tt->granule_sz) - 1; - entry.perm = IOMMU_NONE; -+ entry.flags = IOMMU_INV_FLAGS_ARCHID; -+ entry.arch_id = asid; - - memory_region_notify_one(n, &entry); - } --- -2.27.0 - diff --git a/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch b/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch deleted file mode 100644 index febaffaa655ecbe70419d692e586e56b1561f330..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 6393ad5c1ba6a04b038d80ecc1e663ad91ed0d21 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 14 Mar 2019 09:55:13 -0400 -Subject: [PATCH] hw/arm/smmuv3: Fill the IOTLBEntry leaf field on NH_VA - invalidation - -Let's propagate the leaf attribute throughout the invalidation path. -This hint is used to reduce the scope of the invalidations to the -last level of translation. Not enforcing it induces large performance -penalties in nested mode. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 13 ++++++++----- - 1 file changed, 8 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index c6b950af35..c1caa6bc3a 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -795,7 +795,7 @@ epilogue: - static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - IOMMUNotifier *n, - int asid, -- dma_addr_t iova) -+ dma_addr_t iova, bool leaf) - { - SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); - SMMUEventInfo event = {}; -@@ -826,6 +826,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - entry.perm = IOMMU_NONE; - entry.flags = IOMMU_INV_FLAGS_ARCHID; - entry.arch_id = asid; -+ entry.leaf = leaf; - - memory_region_notify_one(n, &entry); - } -@@ -854,7 +855,8 @@ static void smmuv3_notify_asid(IOMMUMemoryRegion *mr, - } - - /* invalidate an asid/iova tuple in all mr's */ --static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) -+static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, -+ bool leaf) - { - SMMUDevice *sdev; - -@@ -865,7 +867,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) - trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova); - - IOMMU_NOTIFIER_FOREACH(n, mr) { -- smmuv3_notify_iova(mr, n, asid, iova); -+ smmuv3_notify_iova(mr, n, asid, iova, leaf); - } - } - } -@@ -1018,9 +1020,10 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) - { - dma_addr_t addr = CMD_ADDR(&cmd); - uint16_t vmid = CMD_VMID(&cmd); -+ bool leaf = CMD_LEAF(&cmd); - - trace_smmuv3_cmdq_tlbi_nh_vaa(vmid, addr); -- smmuv3_inv_notifiers_iova(bs, -1, addr); -+ smmuv3_inv_notifiers_iova(bs, -1, addr, leaf); - smmu_iotlb_inv_all(bs); - break; - } -@@ -1032,7 +1035,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) - bool leaf = CMD_LEAF(&cmd); - - trace_smmuv3_cmdq_tlbi_nh_va(vmid, asid, addr, leaf); -- smmuv3_inv_notifiers_iova(bs, asid, addr); -+ smmuv3_inv_notifiers_iova(bs, asid, addr, leaf); - smmu_iotlb_inv_iova(bs, asid, addr); - break; - } --- -2.27.0 - diff --git a/hw-arm-smmuv3-Forward-cache-invalidate-commands-via-.patch b/hw-arm-smmuv3-Forward-cache-invalidate-commands-via-.patch new file mode 100644 index 0000000000000000000000000000000000000000..9568a8e52a27ac08e4595594792f3a4cae00986f --- /dev/null +++ b/hw-arm-smmuv3-Forward-cache-invalidate-commands-via-.patch @@ -0,0 +1,229 @@ +From b331acc42fa54ca93496c32d92cdf5397927bff1 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Fri, 21 Apr 2023 15:18:56 -0700 +Subject: [PATCH] hw/arm/smmuv3: Forward cache invalidate commands via iommufd + +Inroduce an SMMUCommandBatch and some helpers to batch the commands. + +Rewind the q->cons accordingly when it fails to execute a batch/command. + +Currently separate TLBI commands and device cache commands to avoid some +errata on certain version of SMMUs. Later it should check IIDR register +to detect if underlying SMMU hw has such an erratum. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmuv3-internal.h | 13 +++++ + hw/arm/smmuv3.c | 113 ++++++++++++++++++++++++++++++++++++++- + 2 files changed, 125 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h +index 163459d450..a411fd4048 100644 +--- a/hw/arm/smmuv3-internal.h ++++ b/hw/arm/smmuv3-internal.h +@@ -226,6 +226,19 @@ static inline bool smmuv3_gerror_irq_enabled(SMMUv3State *s) + #define Q_CONS_WRAP(q) (((q)->cons & WRAP_MASK(q)) >> (q)->log2size) + #define Q_PROD_WRAP(q) (((q)->prod & WRAP_MASK(q)) >> (q)->log2size) + ++#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1)) ++ ++static inline int smmuv3_q_ncmds(SMMUQueue *q) ++{ ++ uint32_t prod = Q_PROD(q); ++ uint32_t cons = Q_CONS(q); ++ ++ if (Q_PROD_WRAP(q) == Q_CONS_WRAP(q)) ++ return prod - cons; ++ else ++ return WRAP_MASK(q) - cons + prod; ++} ++ + static inline bool smmuv3_q_full(SMMUQueue *q) + { + return ((q->cons ^ q->prod) & WRAP_INDEX_MASK(q)) == WRAP_MASK(q); +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index b2ffe2d40b..b860c8385f 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1357,16 +1357,85 @@ static void smmuv3_invalidate_nested_ste(SMMUSIDRange *sid_range) + } + } + ++/** ++ * SMMUCommandBatch - batch of commands to issue for nested SMMU invalidation ++ * @cmds: Pointer to list of commands ++ * @cons: Pointer to list of CONS corresponding to the commands ++ * @ncmds: Total ncmds in the batch ++ * @dev_cache: Issue to a device cache ++ */ ++typedef struct SMMUCommandBatch { ++ Cmd *cmds; ++ uint32_t *cons; ++ uint32_t ncmds; ++ bool dev_cache; ++} SMMUCommandBatch; ++ ++/* Update batch->ncmds to the number of execute cmds */ ++static int smmuv3_issue_cmd_batch(SMMUState *bs, SMMUCommandBatch *batch) ++{ ++ uint32_t total = batch->ncmds; ++ int ret; ++ ++ ret = smmu_viommu_invalidate_cache(bs->viommu->core, ++ IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3, ++ sizeof(Cmd), &batch->ncmds, batch->cmds); ++ if (total != batch->ncmds) { ++ error_report("%s failed: ret=%d, total=%d, done=%d", ++ __func__, ret, total, batch->ncmds); ++ return ret; ++ } ++ ++ batch->ncmds = 0; ++ batch->dev_cache = false; ++ return ret; ++} ++ ++static int smmuv3_batch_cmds(SMMUState *bs, SMMUCommandBatch *batch, ++ Cmd *cmd, uint32_t *cons, bool dev_cache) ++{ ++ int ret; ++ ++ if (!bs->nested || !bs->viommu) { ++ return 0; ++ } ++ ++ /* ++ * Currently separate dev_cache and hwpt for safety, which might not be ++ * necessary if underlying HW SMMU does not have the errata. ++ * ++ * TODO check IIDR register values read from hw_info. ++ */ ++ if (batch->ncmds && (dev_cache != batch->dev_cache)) { ++ ret = smmuv3_issue_cmd_batch(bs, batch); ++ if (ret) { ++ *cons = batch->cons[batch->ncmds]; ++ return ret; ++ } ++ } ++ batch->dev_cache = dev_cache; ++ batch->cmds[batch->ncmds] = *cmd; ++ batch->cons[batch->ncmds++] = *cons; ++ return 0; ++} ++ + static int smmuv3_cmdq_consume(SMMUv3State *s) + { + SMMUState *bs = ARM_SMMU(s); + SMMUCmdError cmd_error = SMMU_CERROR_NONE; + SMMUQueue *q = &s->cmdq; + SMMUCommandType type = 0; ++ SMMUCommandBatch batch = {}; ++ uint32_t ncmds = 0; + + if (!smmuv3_cmdq_enabled(s)) { + return 0; + } ++ ++ ncmds = smmuv3_q_ncmds(q); ++ batch.cmds = g_new0(Cmd, ncmds); ++ batch.cons = g_new0(uint32_t, ncmds); ++ + /* + * some commands depend on register values, typically CR0. In case those + * register values change while handling the command, spec says it +@@ -1463,6 +1532,13 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + + trace_smmuv3_cmdq_cfgi_cd(sid); + smmuv3_flush_config(sdev); ++ ++ if (sdev->s1_hwpt) { ++ if (smmuv3_batch_cmds(sdev->smmu, &batch, &cmd, &q->cons, true)) { ++ cmd_error = SMMU_CERROR_ILL; ++ break; ++ } ++ } + break; + } + case SMMU_CMD_TLBI_NH_ASID: +@@ -1477,6 +1553,10 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + trace_smmuv3_cmdq_tlbi_nh_asid(asid); + smmu_inv_notifiers_all(&s->smmu_state); + smmu_iotlb_inv_asid(bs, asid); ++ if (smmuv3_batch_cmds(bs, &batch, &cmd, &q->cons, false)) { ++ cmd_error = SMMU_CERROR_ILL; ++ break; ++ } + break; + } + case SMMU_CMD_TLBI_NH_ALL: +@@ -1489,6 +1569,11 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + trace_smmuv3_cmdq_tlbi_nh(); + smmu_inv_notifiers_all(&s->smmu_state); + smmu_iotlb_inv_all(bs); ++ ++ if (smmuv3_batch_cmds(bs, &batch, &cmd, &q->cons, false)) { ++ cmd_error = SMMU_CERROR_ILL; ++ break; ++ } + break; + case SMMU_CMD_TLBI_NH_VAA: + case SMMU_CMD_TLBI_NH_VA: +@@ -1497,7 +1582,24 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + break; + } + smmuv3_range_inval(bs, &cmd); ++ ++ if (smmuv3_batch_cmds(bs, &batch, &cmd, &q->cons, false)) { ++ cmd_error = SMMU_CERROR_ILL; ++ break; ++ } + break; ++ case SMMU_CMD_ATC_INV: ++ { ++ SMMUDevice *sdev = smmu_find_sdev(bs, CMD_SID(&cmd)); ++ ++ if (sdev->s1_hwpt) { ++ if (smmuv3_batch_cmds(sdev->smmu, &batch, &cmd, &q->cons, true)) { ++ cmd_error = SMMU_CERROR_ILL; ++ break; ++ } ++ } ++ break; ++ } + case SMMU_CMD_TLBI_S12_VMALL: + { + uint16_t vmid = CMD_VMID(&cmd); +@@ -1529,7 +1631,6 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + case SMMU_CMD_TLBI_EL2_ASID: + case SMMU_CMD_TLBI_EL2_VA: + case SMMU_CMD_TLBI_EL2_VAA: +- case SMMU_CMD_ATC_INV: + case SMMU_CMD_PRI_RESP: + case SMMU_CMD_RESUME: + case SMMU_CMD_STALL_TERM: +@@ -1554,12 +1655,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + */ + queue_cons_incr(q); + } ++ qemu_mutex_lock(&s->mutex); ++ if (!cmd_error && batch.ncmds && bs->viommu) { ++ if (smmuv3_issue_cmd_batch(bs, &batch)) { ++ q->cons = batch.cons[batch.ncmds]; ++ cmd_error = SMMU_CERROR_ILL; ++ } ++ } ++ qemu_mutex_unlock(&s->mutex); + + if (cmd_error) { + trace_smmuv3_cmdq_consume_error(smmu_cmd_string(type), cmd_error); + smmu_write_cmdq_err(s, cmd_error); + smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_CMDQ_ERR_MASK); + } ++ g_free(batch.cmds); ++ g_free(batch.cons); + + trace_smmuv3_cmdq_consume_out(Q_PROD(q), Q_CONS(q), + Q_PROD_WRAP(q), Q_CONS_WRAP(q)); +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Ignore-IOMMU_NOTIFIER_MAP-for-nested-s.patch b/hw-arm-smmuv3-Ignore-IOMMU_NOTIFIER_MAP-for-nested-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..5705fffb83c377b9c07fd5af4d5ad2fcf45230ea --- /dev/null +++ b/hw-arm-smmuv3-Ignore-IOMMU_NOTIFIER_MAP-for-nested-s.patch @@ -0,0 +1,43 @@ +From 9f3b8c283d4c1014ff292faddb78bbbfd7ec22d3 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Tue, 9 Apr 2024 01:49:26 +0000 +Subject: [PATCH] hw/arm/smmuv3: Ignore IOMMU_NOTIFIER_MAP for nested-smmuv3 + +If a device's MemmoryRegion type is iommu, vfio core registers a listener, +passing the IOMMU_NOTIFIER_IOTLB_EVENTS flag (bundle of IOMMU_NOTIFIER_MAP +and IOMMU_NOTIFIER_UNMAP). + +On the other hand, nested SMMUv3 does not use a map notifier. And it would +only insert an IOTLB entry for MSI doorbell page mapping, which can simply +be done by the mr->translate call. + +Ignore the IOMMU_NOTIFIER_MAP flag and drop the error out. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmuv3.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 64ca4c5542..db111220c7 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1881,12 +1881,9 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, + return -EINVAL; + } + +- if (new & IOMMU_NOTIFIER_MAP) { +- error_setg(errp, +- "device %02x.%02x.%x requires iommu MAP notifier which is " +- "not currently supported", pci_bus_num(sdev->bus), +- PCI_SLOT(sdev->devfn), PCI_FUNC(sdev->devfn)); +- return -EINVAL; ++ /* nested-smmuv3 does not need IOMMU_NOTIFIER_MAP. Ignore it. */ ++ if (s->nested) { ++ new &= ~IOMMU_NOTIFIER_MAP; + } + + if (old == IOMMU_NOTIFIER_NONE) { +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Implement-fault-injection.patch b/hw-arm-smmuv3-Implement-fault-injection.patch deleted file mode 100644 index 0260e28a05e7d30ec2b637eadb2251890c7e3701..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Implement-fault-injection.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 55bfd18b7671c82705d83d543281add0afcda31f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 13 Sep 2018 14:24:45 +0200 -Subject: [PATCH] hw/arm/smmuv3: Implement fault injection - -We convert iommu_fault structs received from the kernel -into the data struct used by the emulation code and record -the evnts into the virtual event queue. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 71 insertions(+) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 3d2151857d..931d6eae57 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -1594,6 +1594,76 @@ static int smmuv3_get_attr(IOMMUMemoryRegion *iommu, - return -EINVAL; - } - -+struct iommu_fault; -+ -+static inline int -+smmuv3_inject_faults(IOMMUMemoryRegion *iommu_mr, int count, -+ struct iommu_fault *buf) -+{ -+#ifdef __linux__ -+ SMMUDevice *sdev = container_of(iommu_mr, SMMUDevice, iommu); -+ SMMUv3State *s3 = sdev->smmu; -+ uint32_t sid = smmu_get_sid(sdev); -+ int i; -+ -+ for (i = 0; i < count; i++) { -+ SMMUEventInfo info = {}; -+ struct iommu_fault_unrecoverable *record; -+ -+ if (buf[i].type != IOMMU_FAULT_DMA_UNRECOV) { -+ continue; -+ } -+ -+ info.sid = sid; -+ record = &buf[i].event; -+ -+ switch (record->reason) { -+ case IOMMU_FAULT_REASON_PASID_INVALID: -+ info.type = SMMU_EVT_C_BAD_SUBSTREAMID; -+ /* TODO further fill info.u.c_bad_substream */ -+ break; -+ case IOMMU_FAULT_REASON_PASID_FETCH: -+ info.type = SMMU_EVT_F_CD_FETCH; -+ break; -+ case IOMMU_FAULT_REASON_BAD_PASID_ENTRY: -+ info.type = SMMU_EVT_C_BAD_CD; -+ /* TODO further fill info.u.c_bad_cd */ -+ break; -+ case IOMMU_FAULT_REASON_WALK_EABT: -+ info.type = SMMU_EVT_F_WALK_EABT; -+ info.u.f_walk_eabt.addr = record->addr; -+ info.u.f_walk_eabt.addr2 = record->fetch_addr; -+ break; -+ case IOMMU_FAULT_REASON_PTE_FETCH: -+ info.type = SMMU_EVT_F_TRANSLATION; -+ info.u.f_translation.addr = record->addr; -+ break; -+ case IOMMU_FAULT_REASON_OOR_ADDRESS: -+ info.type = SMMU_EVT_F_ADDR_SIZE; -+ info.u.f_addr_size.addr = record->addr; -+ break; -+ case IOMMU_FAULT_REASON_ACCESS: -+ info.type = SMMU_EVT_F_ACCESS; -+ info.u.f_access.addr = record->addr; -+ break; -+ case IOMMU_FAULT_REASON_PERMISSION: -+ info.type = SMMU_EVT_F_PERMISSION; -+ info.u.f_permission.addr = record->addr; -+ break; -+ default: -+ warn_report("%s Unexpected fault reason received from host: %d", -+ __func__, record->reason); -+ continue; -+ } -+ -+ smmuv3_record_event(s3, &info); -+ } -+ return 0; -+#else -+ return -1; -+#endif -+} -+ - static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, - void *data) - { -@@ -1602,6 +1672,7 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, - imrc->translate = smmuv3_translate; - imrc->notify_flag_changed = smmuv3_notify_flag_changed; - imrc->get_attr = smmuv3_get_attr; -+ imrc->inject_faults = smmuv3_inject_faults; - } - - static const TypeInfo smmuv3_type_info = { --- -2.27.0 - diff --git a/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch b/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch deleted file mode 100644 index 10639e89f957b970b78f2c0de930ad8b92032d0f..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch +++ /dev/null @@ -1,105 +0,0 @@ -From c0027c2e744c8ed99e937d3cbc88f400ab63a316 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Sun, 14 Feb 2021 12:30:57 -0500 -Subject: [PATCH] hw/arm/smmuv3: Improve stage1 ASID invalidation - -At the moment ASID invalidation command (CMD_TLBI_NH_ASID) is -propagated as a domain invalidation (the whole notifier range -is invalidated independently on any ASID information). - -The new granularity field now allows to be more precise and -restrict the invalidation to a peculiar ASID. Set the corresponding -fields and flag. - -We still keep the iova and addr_mask settings for consumers that -do not support the new fields, like VHOST. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 42 ++++++++++++++++++++++++++++++++++++++++-- - hw/arm/trace-events | 1 + - 2 files changed, 41 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 3b5723e1e1..0ef1ca376c 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -827,6 +827,29 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - memory_region_notify_one(n, &entry); - } - -+/** -+ * smmuv3_notify_asid - call the notifier @n for a given asid -+ * -+ * @mr: IOMMU mr region handle -+ * @n: notifier to be called -+ * @asid: address space ID or negative value if we don't care -+ */ -+static void smmuv3_notify_asid(IOMMUMemoryRegion *mr, -+ IOMMUNotifier *n, int asid) -+{ -+ IOMMUTLBEntry entry; -+ -+ entry.target_as = &address_space_memory; -+ entry.perm = IOMMU_NONE; -+ entry.granularity = IOMMU_INV_GRAN_PASID; -+ entry.flags = IOMMU_INV_FLAGS_ARCHID; -+ entry.arch_id = asid; -+ entry.iova = n->start; -+ entry.addr_mask = n->end - n->start; -+ -+ memory_region_notify_one(n, &entry); -+} -+ - /* invalidate an asid/iova tuple in all mr's */ - static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) - { -@@ -844,6 +867,22 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) - } - } - -+static void smmuv3_s1_asid_inval(SMMUState *s, uint16_t asid) -+{ -+ SMMUDevice *sdev; -+ -+ trace_smmuv3_s1_asid_inval(asid); -+ QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) { -+ IOMMUMemoryRegion *mr = &sdev->iommu; -+ IOMMUNotifier *n; -+ -+ IOMMU_NOTIFIER_FOREACH(n, mr) { -+ smmuv3_notify_asid(mr, n, asid); -+ } -+ } -+ smmu_iotlb_inv_asid(s, asid); -+} -+ - static int smmuv3_cmdq_consume(SMMUv3State *s) - { - SMMUState *bs = ARM_SMMU(s); -@@ -963,8 +1002,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) - uint16_t asid = CMD_ASID(&cmd); - - trace_smmuv3_cmdq_tlbi_nh_asid(asid); -- smmu_inv_notifiers_all(&s->smmu_state); -- smmu_iotlb_inv_asid(bs, asid); -+ smmuv3_s1_asid_inval(bs, asid); - break; - } - case SMMU_CMD_TLBI_NH_ALL: -diff --git a/hw/arm/trace-events b/hw/arm/trace-events -index 0acedcedc6..4512d20115 100644 ---- a/hw/arm/trace-events -+++ b/hw/arm/trace-events -@@ -44,6 +44,7 @@ smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t p - smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)" - smmuv3_cmdq_tlbi_nh_va(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" - smmuv3_cmdq_tlbi_nh_vaa(int vmid, uint64_t addr) "vmid =%d addr=0x%"PRIx64 -+smmuv3_s1_asid_inval(int asid) "asid=%d" - smmuv3_cmdq_tlbi_nh(void) "" - smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" - smmu_iotlb_cache_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" --- -2.27.0 - diff --git a/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch b/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch deleted file mode 100644 index a615b8664bd6b9c3603073bfd7ec0bb505e70ef8..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch +++ /dev/null @@ -1,147 +0,0 @@ -From d0a1ce3c46246b6ef5510ac1d5c18308417ed525 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 9 Aug 2018 21:04:19 +0200 -Subject: [PATCH] hw/arm/smmuv3: Pass stage 1 configurations to the host - -In case PASID PciOps are set for the device we call -the set_pasid_table() callback on each STE update. - -This allows to pass the guest stage 1 configuration -to the host and apply it at physical level. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 77 +++++++++++++++++++++++++++++++++++---------- - hw/arm/trace-events | 2 +- - 2 files changed, 61 insertions(+), 18 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index c1caa6bc3a..3d2151857d 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -16,6 +16,10 @@ - * with this program; if not, see . - */ - -+#ifdef __linux__ -+#include "linux/iommu.h" -+#endif -+ - #include "qemu/osdep.h" - #include "hw/boards.h" - #include "sysemu/sysemu.h" -@@ -872,6 +876,60 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, - } - } - -+static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) -+{ -+#ifdef __linux__ -+ IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); -+ SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid}; -+ IOMMUConfig iommu_config = {}; -+ SMMUTransCfg *cfg; -+ SMMUDevice *sdev; -+ -+ if (!mr) { -+ return; -+ } -+ -+ sdev = container_of(mr, SMMUDevice, iommu); -+ -+ /* flush QEMU config cache */ -+ smmuv3_flush_config(sdev); -+ -+ if (!pci_device_is_pasid_ops_set(sdev->bus, sdev->devfn)) { -+ return; -+ } -+ -+ cfg = smmuv3_get_config(sdev, &event); -+ -+ if (!cfg) { -+ return; -+ } -+ -+ iommu_config.pasid_cfg.argsz = sizeof(struct iommu_pasid_table_config); -+ iommu_config.pasid_cfg.version = PASID_TABLE_CFG_VERSION_1; -+ iommu_config.pasid_cfg.format = IOMMU_PASID_FORMAT_SMMUV3; -+ iommu_config.pasid_cfg.base_ptr = cfg->s1ctxptr; -+ iommu_config.pasid_cfg.pasid_bits = 0; -+ iommu_config.pasid_cfg.vendor_data.smmuv3.version = PASID_TABLE_SMMUV3_CFG_VERSION_1; -+ -+ if (cfg->disabled || cfg->bypassed) { -+ iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_BYPASS; -+ } else if (cfg->aborted) { -+ iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_ABORT; -+ } else { -+ iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_TRANSLATE; -+ } -+ -+ trace_smmuv3_notify_config_change(mr->parent_obj.name, -+ iommu_config.pasid_cfg.config, -+ iommu_config.pasid_cfg.base_ptr); -+ -+ if (pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config)) { -+ error_report("Failed to pass PASID table to host for iommu mr %s (%m)", -+ mr->parent_obj.name); -+ } -+#endif -+} -+ - static void smmuv3_s1_asid_inval(SMMUState *s, uint16_t asid) - { - SMMUDevice *sdev; -@@ -938,22 +996,14 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) - case SMMU_CMD_CFGI_STE: - { - uint32_t sid = CMD_SID(&cmd); -- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); -- SMMUDevice *sdev; - - if (CMD_SSEC(&cmd)) { - cmd_error = SMMU_CERROR_ILL; - break; - } - -- if (!mr) { -- break; -- } -- - trace_smmuv3_cmdq_cfgi_ste(sid); -- sdev = container_of(mr, SMMUDevice, iommu); -- smmuv3_flush_config(sdev); -- -+ smmuv3_notify_config_change(bs, sid); - break; - } - case SMMU_CMD_CFGI_STE_RANGE: /* same as SMMU_CMD_CFGI_ALL */ -@@ -970,14 +1020,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) - trace_smmuv3_cmdq_cfgi_ste_range(start, end); - - for (i = start; i <= end; i++) { -- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, i); -- SMMUDevice *sdev; -- -- if (!mr) { -- continue; -- } -- sdev = container_of(mr, SMMUDevice, iommu); -- smmuv3_flush_config(sdev); -+ smmuv3_notify_config_change(bs, i); - } - break; - } -diff --git a/hw/arm/trace-events b/hw/arm/trace-events -index 4512d20115..cbbe2ccafd 100644 ---- a/hw/arm/trace-events -+++ b/hw/arm/trace-events -@@ -53,4 +53,4 @@ smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" - smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" - smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" - smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova) "iommu mr=%s asid=%d iova=0x%"PRIx64 -- -+smmuv3_notify_config_change(const char *name, uint8_t config, uint64_t s1ctxptr) "iommu mr=%s config=%d s1ctxptr=0x%"PRIx64 --- -2.27.0 - diff --git a/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch b/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch deleted file mode 100644 index c363acb60c0fce72a986b6056aa74bb578b7a992..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 06e43bc658aa80bb5f4da3e43c1c13d4cab6ebdd Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Tue, 11 May 2021 10:08:16 +0800 -Subject: [PATCH] hw/arm/smmuv3: Post-load stage 1 configurations to the host - -In nested mode, we call the set_pasid_table() callback on each -STE update to pass the guest stage 1 configuration to the host -and apply it at physical level. - -In the case of live migration, we need to manually call the -set_pasid_table() to load the guest stage 1 configurations to -the host. If this operation fails, the migration fails. - -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 33 ++++++++++++++++++++++++++++----- - 1 file changed, 28 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index c26fba118c..f383143db1 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -876,7 +876,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, - } - } - --static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) -+static int smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) - { - #ifdef __linux__ - IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); -@@ -884,9 +884,10 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) - IOMMUConfig iommu_config = {}; - SMMUTransCfg *cfg; - SMMUDevice *sdev; -+ int ret; - - if (!mr) { -- return; -+ return 0; - } - - sdev = container_of(mr, SMMUDevice, iommu); -@@ -895,13 +896,13 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) - smmuv3_flush_config(sdev); - - if (!pci_device_is_pasid_ops_set(sdev->bus, sdev->devfn)) { -- return; -+ return 0; - } - - cfg = smmuv3_get_config(sdev, &event); - - if (!cfg) { -- return; -+ return 0; - } - - iommu_config.pasid_cfg.argsz = sizeof(struct iommu_pasid_table_config); -@@ -923,10 +924,13 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) - iommu_config.pasid_cfg.config, - iommu_config.pasid_cfg.base_ptr); - -- if (pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config)) { -+ ret = pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config); -+ if (ret) { - error_report("Failed to pass PASID table to host for iommu mr %s (%m)", - mr->parent_obj.name); - } -+ -+ return ret; - #endif - } - -@@ -1494,6 +1498,24 @@ static void smmu_realize(DeviceState *d, Error **errp) - smmu_init_irq(s, dev); - } - -+static int smmuv3_post_load(void *opaque, int version_id) -+{ -+ SMMUv3State *s3 = opaque; -+ SMMUState *s = &(s3->smmu_state); -+ SMMUDevice *sdev; -+ int ret = 0; -+ -+ QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) { -+ uint32_t sid = smmu_get_sid(sdev); -+ ret = smmuv3_notify_config_change(s, sid); -+ if (ret) { -+ break; -+ } -+ } -+ -+ return ret; -+} -+ - static const VMStateDescription vmstate_smmuv3_queue = { - .name = "smmuv3_queue", - .version_id = 1, -@@ -1512,6 +1534,7 @@ static const VMStateDescription vmstate_smmuv3 = { - .version_id = 1, - .minimum_version_id = 1, - .priority = MIG_PRI_IOMMU, -+ .post_load = smmuv3_post_load, - .fields = (VMStateField[]) { - VMSTATE_UINT32(features, SMMUv3State), - VMSTATE_UINT8(sid_size, SMMUv3State), --- -2.27.0 - diff --git a/hw-arm-smmuv3-Read-host-SMMU-device-info.patch b/hw-arm-smmuv3-Read-host-SMMU-device-info.patch new file mode 100644 index 0000000000000000000000000000000000000000..6363902bd11f9886b63d3d32381d0c3b066c4633 --- /dev/null +++ b/hw-arm-smmuv3-Read-host-SMMU-device-info.patch @@ -0,0 +1,135 @@ +From 03964c037862a594b4eb7d2e3754acd32c01c80b Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Thu, 22 Sep 2022 14:06:07 -0700 +Subject: [PATCH] hw/arm/smmuv3: Read host SMMU device info + +Read the underlying SMMU device info and set corresponding IDR bits. + +Signed-off-by: Nicolin Chen +--- + hw/arm/smmuv3.c | 77 ++++++++++++++++++++++++++++++++++++ + hw/arm/trace-events | 1 + + include/hw/arm/smmu-common.h | 1 + + 3 files changed, 79 insertions(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index db111220c7..4208325ab3 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -254,6 +254,80 @@ void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *info) + info->recorded = true; + } + ++static void smmuv3_nested_init_regs(SMMUv3State *s) ++{ ++ SMMUState *bs = ARM_SMMU(s); ++ SMMUDevice *sdev; ++ uint32_t data_type; ++ uint32_t val; ++ int ret; ++ ++ if (!bs->nested || !bs->viommu) { ++ return; ++ } ++ ++ sdev = QLIST_FIRST(&bs->viommu->device_list); ++ if (!sdev) { ++ return; ++ } ++ ++ if (sdev->info.idr[0]) { ++ error_report("reusing the previous hw_info"); ++ goto out; ++ } ++ ++ ret = smmu_dev_get_info(sdev, &data_type, sizeof(sdev->info), &sdev->info); ++ if (ret) { ++ error_report("failed to get SMMU device info"); ++ return; ++ } ++ ++ if (data_type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) { ++ error_report( "Wrong data type (%d)!", data_type); ++ return; ++ } ++ ++out: ++ trace_smmuv3_get_device_info(sdev->info.idr[0], sdev->info.idr[1], ++ sdev->info.idr[3], sdev->info.idr[5]); ++ ++ val = FIELD_EX32(sdev->info.idr[0], IDR0, BTM); ++ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, BTM, val); ++ val = FIELD_EX32(sdev->info.idr[0], IDR0, ATS); ++ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, ATS, val); ++ val = FIELD_EX32(sdev->info.idr[0], IDR0, ASID16); ++ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, ASID16, val); ++ val = FIELD_EX32(sdev->info.idr[0], IDR0, TERM_MODEL); ++ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, TERM_MODEL, val); ++ val = FIELD_EX32(sdev->info.idr[0], IDR0, STALL_MODEL); ++ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STALL_MODEL, val); ++ val = FIELD_EX32(sdev->info.idr[0], IDR0, STLEVEL); ++ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STLEVEL, val); ++ ++ val = FIELD_EX32(sdev->info.idr[1], IDR1, SIDSIZE); ++ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SIDSIZE, val); ++ val = FIELD_EX32(sdev->info.idr[1], IDR1, SSIDSIZE); ++ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, val); ++ ++ val = FIELD_EX32(sdev->info.idr[3], IDR3, HAD); ++ s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, val); ++ val = FIELD_EX32(sdev->info.idr[3], IDR3, RIL); ++ s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, val); ++ val = FIELD_EX32(sdev->info.idr[3], IDR3, BBML); ++ s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, val); ++ ++ val = FIELD_EX32(sdev->info.idr[5], IDR5, GRAN4K); ++ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, val); ++ val = FIELD_EX32(sdev->info.idr[5], IDR5, GRAN16K); ++ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, val); ++ val = FIELD_EX32(sdev->info.idr[5], IDR5, GRAN64K); ++ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, val); ++ val = FIELD_EX32(sdev->info.idr[5], IDR5, OAS); ++ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, val); ++ ++ /* FIXME check iidr and aidr registrs too */ ++} ++ + static void smmuv3_init_regs(SMMUv3State *s) + { + /* Based on sys property, the stages supported in smmu will be advertised.*/ +@@ -292,6 +366,9 @@ static void smmuv3_init_regs(SMMUv3State *s) + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); + ++ /* Override IDR fields with HW caps */ ++ smmuv3_nested_init_regs(s); ++ + s->cmdq.base = deposit64(s->cmdq.base, 0, 5, SMMU_CMDQS); + s->cmdq.prod = 0; + s->cmdq.cons = 0; +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index 58e0636e95..1e3d86382d 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -55,5 +55,6 @@ smmuv3_cmdq_tlbi_s12_vmid(uint16_t vmid) "vmid=%d" + smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x" + smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" + smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" ++smmuv3_get_device_info(uint32_t idr0, uint32_t idr1, uint32_t idr3, uint32_t idr5) "idr0=0x%x idr1=0x%x idr3=0x%x idr5=0x%x" + smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint16_t vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 + +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 37dfeed026..d120c352cf 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -146,6 +146,7 @@ typedef struct SMMUDevice { + AddressSpace as_sysmem; + uint32_t cfg_cache_hits; + uint32_t cfg_cache_misses; ++ struct iommu_hw_info_arm_smmuv3 info; + QLIST_ENTRY(SMMUDevice) next; + } SMMUDevice; + +-- +2.41.0.windows.1 + diff --git a/hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch b/hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch deleted file mode 100644 index 1139feaed62705a6baebbecba25ad0355b761daf..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch +++ /dev/null @@ -1,33 +0,0 @@ -From eceb9213e23d15d5b4342b6a6a8368f4fec60c2f Mon Sep 17 00:00:00 2001 -From: Zenghui Yu -Date: Mon, 19 Oct 2020 17:15:08 +0800 -Subject: [PATCH] hw/arm/smmuv3: Set the restoration priority of the vSMMUv3 - explicitly - -Ensure the vSMMUv3 will be restored before all PCIe devices so that DMA -translation can work properly during migration. - -Signed-off-by: Zenghui Yu -Message-id: 20201019091508.197-1-yuzenghui@huawei.com -Acked-by: Eric Auger -Signed-off-by: Peter Maydell -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 7911944c59..3b5723e1e1 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -1424,6 +1424,7 @@ static const VMStateDescription vmstate_smmuv3 = { - .name = "smmuv3", - .version_id = 1, - .minimum_version_id = 1, -+ .priority = MIG_PRI_IOMMU, - .fields = (VMStateField[]) { - VMSTATE_UINT32(features, SMMUv3State), - VMSTATE_UINT8(sid_size, SMMUv3State), --- -2.27.0 - diff --git a/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch b/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch deleted file mode 100644 index 8ed3590b6e3c3863486db0082be983a7b7d4968c..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 6fc85d8a6022d94ffec4cc118472cde583706bfb Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 9 Aug 2018 20:56:44 +0200 -Subject: [PATCH] hw/arm/smmuv3: Store the PASID table GPA in the translation - config - -For VFIO integration we will need to pass the Context Descriptor (CD) -table GPA to the host. The CD table is also referred to as the PASID -table. Its GPA corresponds to the s1ctrptr field of the Stream Table -Entry. So let's decode and store it in the configuration structure. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 1 + - include/hw/arm/smmu-common.h | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 83d59b6d28..f8e721f949 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -352,6 +352,7 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, - "SMMUv3 S1 stalling fault model not allowed yet\n"); - goto bad_ste; - } -+ cfg->s1ctxptr = STE_CTXPTR(ste); - return 0; - - bad_ste: -diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h -index 1f37844e5c..353668f4ea 100644 ---- a/include/hw/arm/smmu-common.h -+++ b/include/hw/arm/smmu-common.h -@@ -68,6 +68,7 @@ typedef struct SMMUTransCfg { - uint8_t tbi; /* Top Byte Ignore */ - uint16_t asid; - SMMUTransTableInfo tt[2]; -+ dma_addr_t s1ctxptr; - uint32_t iotlb_hits; /* counts IOTLB hits for this asid */ - uint32_t iotlb_misses; /* counts IOTLB misses for this asid */ - } SMMUTransCfg; --- -2.27.0 - diff --git a/hw-arm-smmuv3-Support-16K-translation-granule.patch b/hw-arm-smmuv3-Support-16K-translation-granule.patch deleted file mode 100644 index 08c4bc5603401f6e5735daa6767dfa2aa2785255..0000000000000000000000000000000000000000 --- a/hw-arm-smmuv3-Support-16K-translation-granule.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 008dec30dea19950ff48a34c54441d065c1f228b Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Wed, 31 Mar 2021 14:47:13 +0800 -Subject: [PATCH] hw/arm/smmuv3: Support 16K translation granule - -The driver can query some bits in SMMUv3 IDR5 to learn which -translation granules are supported. Arm recommends that SMMUv3 -implementations support at least 4K and 64K granules. But in -the vSMMUv3, there seems to be no reason not to support 16K -translation granule. In addition, if 16K is not supported, -vSVA will failed to be enabled in the future for 16K guest -kernel. So it'd better to support it. - -Signed-off-by: Kunkun Jiang -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Signed-off-by: Peter Maydell ---- - hw/arm/smmuv3.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index e96d5beb9a..7911944c59 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -254,8 +254,9 @@ static void smmuv3_init_regs(SMMUv3State *s) - s->idr[1] = FIELD_DP32(s->idr[1], IDR1, EVENTQS, SMMU_EVENTQS); - s->idr[1] = FIELD_DP32(s->idr[1], IDR1, CMDQS, SMMU_CMDQS); - -- /* 4K and 64K granule support */ -+ /* 4K, 16K and 64K granule support */ - s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); -+ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); - s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); - s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ - -@@ -480,7 +481,8 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) - - tg = CD_TG(cd, i); - tt->granule_sz = tg2granule(tg, i); -- if ((tt->granule_sz != 12 && tt->granule_sz != 16) || CD_ENDI(cd)) { -+ if ((tt->granule_sz != 12 && tt->granule_sz != 14 && -+ tt->granule_sz != 16) || CD_ENDI(cd)) { - goto bad_cd; - } - --- -2.27.0 - diff --git a/hw-arm-virt-Add-an-SMMU_IO_LEN-macro.patch b/hw-arm-virt-Add-an-SMMU_IO_LEN-macro.patch new file mode 100644 index 0000000000000000000000000000000000000000..38db82dce6b6b66beb7e76eb6f22d49c0c98c213 --- /dev/null +++ b/hw-arm-virt-Add-an-SMMU_IO_LEN-macro.patch @@ -0,0 +1,47 @@ +From a6c7b16107b506f85e6643604c923291e41f70d1 Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Wed, 19 Jun 2024 04:42:33 +0000 +Subject: [PATCH] hw/arm/virt: Add an SMMU_IO_LEN macro + +A following patch will add a new MMIO region for nested SMMU instances. + +This macro will be repeatedly used to set offsets and MMIO sizes in both +virt and virt-acpi-build. + +Signed-off-by: Nicolin Chen +Signed-off-by: Shameer Kolothum +--- + hw/arm/virt.c | 2 +- + include/hw/arm/virt.h | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8823f2ed1c..08c40c314b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -155,7 +155,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_FW_CFG] = { 0x09020000, 0x00000018 }, + [VIRT_GPIO] = { 0x09030000, 0x00001000 }, + [VIRT_SECURE_UART] = { 0x09040000, 0x00001000 }, +- [VIRT_SMMU] = { 0x09050000, 0x00020000 }, ++ [VIRT_SMMU] = { 0x09050000, SMMU_IO_LEN }, + [VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN }, + [VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN }, + [VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN}, +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 345b2d5594..e6a449becd 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -106,6 +106,9 @@ typedef enum { + ARM_L3_CACHE + } ArmCacheType; + ++/* MMIO region size for SMMUv3 */ ++#define SMMU_IO_LEN 0x20000 ++ + enum { + VIRT_FLASH, + VIRT_MEM, +-- +2.41.0.windows.1 + diff --git a/hw-arm-virt-Add-measurement-log-for-confidential-boo.patch b/hw-arm-virt-Add-measurement-log-for-confidential-boo.patch new file mode 100644 index 0000000000000000000000000000000000000000..916d124ba1bded4a0fe12a435f2512da3495248b --- /dev/null +++ b/hw-arm-virt-Add-measurement-log-for-confidential-boo.patch @@ -0,0 +1,186 @@ +From f22ae2af5af021521084e40c848e5a0505ab7955 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Thu, 7 Nov 2024 17:42:02 +0000 +Subject: [PATCH] hw/arm/virt: Add measurement log for confidential boot + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/7905fe583633f1246a50324c77c39026136fac29 + +Create a measurement log describing operations performed by QEMU to +initialize the guest, and load it into guest memory above the DTB. + +Cc: Stefan Berger +Signed-off-by: Jean-Philippe Brucker +Conflicts: + hw/arm/virt.c + include/hw/arm/virt.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/arm/boot.c | 47 +++++++++++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 22 ++++++++++++++++++++ + include/hw/arm/boot.h | 3 +++ + include/hw/arm/virt.h | 1 + + 4 files changed, 73 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 6980aebe1e..4f5bf6e77c 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -669,6 +669,24 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + + fdt_add_psci_node(fdt); + ++ /* Add a reserved-memory node for the event log */ ++ if (binfo->log_size) { ++ char *nodename; ++ ++ qemu_fdt_add_subnode(fdt, "/reserved-memory"); ++ qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#address-cells", 0x2); ++ qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#size-cells", 0x2); ++ qemu_fdt_setprop(fdt, "/reserved-memory", "ranges", NULL, 0); ++ ++ nodename = g_strdup_printf("/reserved-memory/event-log@%" PRIx64, ++ binfo->log_paddr); ++ qemu_fdt_add_subnode(fdt, nodename); ++ qemu_fdt_setprop_string(fdt, nodename, "compatible", "cc-event-log"); ++ qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", 2, binfo->log_paddr, ++ 2, binfo->log_size); ++ g_free(nodename); ++ } ++ + if (binfo->modify_dtb) { + binfo->modify_dtb(binfo, fdt); + } +@@ -941,6 +959,30 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, + return kernel_size; + } + ++static void add_event_log(struct arm_boot_info *info) ++{ ++ if (!info->log_size) { ++ return; ++ } ++ ++ if (!info->dtb_limit) { ++ int dtb_size = 0; ++ ++ if (!info->get_dtb(info, &dtb_size) || dtb_size == 0) { ++ error_report("Board does not have a DTB"); ++ exit(1); ++ } ++ info->dtb_limit = info->dtb_start + dtb_size; ++ } ++ ++ info->log_paddr = info->dtb_limit; ++ if (info->log_paddr + info->log_size > ++ info->loader_start + info->ram_size) { ++ error_report("Not enough space for measurement log and DTB"); ++ exit(1); ++ } ++} ++ + static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + struct arm_boot_info *info) + { +@@ -988,6 +1030,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + } + info->dtb_start = info->loader_start; + info->dtb_limit = image_low_addr; ++ add_event_log(info); + } + } + entry = elf_entry; +@@ -1126,6 +1169,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, + error_report("Not enough space for DTB after kernel/initrd"); + exit(1); + } ++ add_event_log(info); ++ + fixupcontext[FIXUP_ARGPTR_LO] = info->dtb_start; + fixupcontext[FIXUP_ARGPTR_HI] = info->dtb_start >> 32; + } else { +@@ -1212,6 +1257,8 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, + error_report("could not load firmware '%s'", firmware_filename); + exit(EXIT_FAILURE); + } ++ ++ add_event_log(info); + } + + static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, const char *firmware_filename) +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e6053acec6..52789a3782 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1989,6 +1989,11 @@ void virt_machine_done(Notifier *notifier, void *data) + exit(1); + } + ++ if (vms->event_log) { ++ object_property_set_uint(vms->event_log, "load-addr", ++ vms->bootinfo.log_paddr, &error_fatal); ++ } ++ + fw_cfg_add_extra_pci_roots(vms->bus, vms->fw_cfg); + + virt_acpi_setup(vms); +@@ -2398,6 +2403,21 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + ++static void create_measurement_log(VirtMachineState *vms) ++{ ++ Error *err = NULL; ++ ++ vms->event_log = kvm_arm_rme_get_measurement_log(); ++ if (vms->event_log == NULL) { ++ return; ++ } ++ vms->bootinfo.log_size = object_property_get_uint(vms->event_log, ++ "max-size", &err); ++ if (err != NULL) { ++ error_report_err(err); ++ } ++} ++ + static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, + Error **errp) + { +@@ -2900,6 +2920,8 @@ static void machvirt_init(MachineState *machine) + + kvm_arm_rme_init_gpa_space(vms->highest_gpa, vms->bus); + ++ create_measurement_log(vms); ++ + vms->bootinfo.ram_size = machine->ram_size; + vms->bootinfo.board_id = -1; + vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 326c92782e..8fed25706b 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -147,6 +147,9 @@ struct arm_boot_info { + * Confidential guest boot loads everything into RAM so it can be measured. + */ + bool confidential; ++ /* measurement log location in guest memory */ ++ hwaddr log_paddr; ++ size_t log_size; + }; + + /** +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 9b43e72aac..fee7c27e0c 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -254,6 +254,7 @@ struct VirtMachineState { + char *oem_table_id; + char *kvm_type; + NotifierList cpuhp_notifiers; ++ Object *event_log; + }; + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +-- +2.33.0 + diff --git a/hw-arm-virt-Add-memory-hotplug-framework.patch b/hw-arm-virt-Add-memory-hotplug-framework.patch deleted file mode 100644 index dcb0f21f5b63e10636d889f6cae99a4d738d1d0e..0000000000000000000000000000000000000000 --- a/hw-arm-virt-Add-memory-hotplug-framework.patch +++ /dev/null @@ -1,130 +0,0 @@ -From e14fadc66d488ad10a10a2076721b72cc239ded9 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 18 Sep 2019 14:06:26 +0100 -Subject: [PATCH] hw/arm/virt: Add memory hotplug framework - -This patch adds the memory hot-plug/hot-unplug infrastructure -in machvirt. The device memory is not yet exposed to the Guest -either through DT or ACPI and hence both cold/hot plug of memory -is explicitly disabled for now. - -Signed-off-by: Eric Auger -Signed-off-by: Kwangwoo Lee -Signed-off-by: Shameer Kolothum -Reviewed-by: Peter Maydell -Reviewed-by: Igor Mammedov -Message-Id: <20190918130633.4872-5-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/arm/Kconfig | 2 ++ - hw/arm/virt.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 54 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig -index ab65ecd216..84961c17ab 100644 ---- a/hw/arm/Kconfig -+++ b/hw/arm/Kconfig -@@ -20,6 +20,8 @@ config ARM_VIRT - select SMBIOS - select VIRTIO_MMIO - select ACPI_PCI -+ select MEM_DEVICE -+ select DIMM - - config CHEETAH - bool -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 23d72aed97..c7c07fe3ac 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -65,6 +65,8 @@ - #include "hw/arm/smmuv3.h" - #include "hw/acpi/acpi.h" - #include "target/arm/internals.h" -+#include "hw/mem/pc-dimm.h" -+#include "hw/mem/nvdimm.h" - - #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ - static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ -@@ -1998,6 +2000,42 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - return ms->possible_cpus; - } - -+static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, -+ Error **errp) -+{ -+ -+ /* -+ * The device memory is not yet exposed to the Guest either through -+ * DT or ACPI and hence both cold/hot plug of memory is explicitly -+ * disabled for now. -+ */ -+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -+ error_setg(errp, "memory cold/hot plug is not yet supported"); -+ return; -+ } -+ -+ pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), NULL, errp); -+} -+ -+static void virt_memory_plug(HotplugHandler *hotplug_dev, -+ DeviceState *dev, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); -+ Error *local_err = NULL; -+ -+ pc_dimm_plug(PC_DIMM(dev), MACHINE(vms), &local_err); -+ -+ error_propagate(errp, local_err); -+} -+ -+static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, -+ DeviceState *dev, Error **errp) -+{ -+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -+ virt_memory_pre_plug(hotplug_dev, dev, errp); -+ } -+} -+ - static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -@@ -2009,12 +2047,23 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, - SYS_BUS_DEVICE(dev)); - } - } -+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -+ virt_memory_plug(hotplug_dev, dev, errp); -+ } -+} -+ -+static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, -+ DeviceState *dev, Error **errp) -+{ -+ error_setg(errp, "device unplug request for unsupported device" -+ " type: %s", object_get_typename(OBJECT(dev))); - } - - static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, - DeviceState *dev) - { -- if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) { -+ if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE) || -+ (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) { - return HOTPLUG_HANDLER(machine); - } - -@@ -2078,7 +2127,9 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - mc->kvm_type = virt_kvm_type; - assert(!mc->get_hotplug_handler); - mc->get_hotplug_handler = virt_machine_get_hotplug_handler; -+ hc->pre_plug = virt_machine_device_pre_plug_cb; - hc->plug = virt_machine_device_plug_cb; -+ hc->unplug_request = virt_machine_device_unplug_request_cb; - mc->numa_mem_supported = true; - } - --- -2.19.1 diff --git a/hw-arm-virt-Add-support-for-Arm-RME.patch b/hw-arm-virt-Add-support-for-Arm-RME.patch new file mode 100644 index 0000000000000000000000000000000000000000..bbc4c3d66736baed92324657df442c74a0e5da72 --- /dev/null +++ b/hw-arm-virt-Add-support-for-Arm-RME.patch @@ -0,0 +1,78 @@ +From 8f73dd3647c1ea8255c3fbd809ded08d30cbe746 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 6 Feb 2023 16:49:25 +0000 +Subject: [PATCH] hw/arm/virt: Add support for Arm RME + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/6e0e9f49e9bdf22e4bd06d3506b9abc63c927b85 + +When confidential-guest-support is enabled for the virt machine, add the +RME flag to the VM type. + +The HVC conduit for PSCI is not supported for Realms. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + hw/arm/virt.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/arm/virt.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a43f18020c..ec4faab9dc 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -260,6 +260,11 @@ static bool cpu_type_valid(const char *cpu) + return false; + } + ++static bool virt_machine_is_confidential(VirtMachineState *vms) ++{ ++ return MACHINE(vms)->cgs; ++} ++ + static void create_randomness(MachineState *ms, const char *node) + { + struct { +@@ -2610,10 +2615,12 @@ static void machvirt_init(MachineState *machine) + * if the guest has EL2 then we will use SMC as the conduit, + * and otherwise we will use HVC (for backwards compatibility and + * because if we're using KVM then we must use HVC). ++ * Realm guests must also use SMC. + */ + if (vms->secure && firmware_loaded) { + vms->psci_conduit = QEMU_PSCI_CONDUIT_DISABLED; +- } else if (vms->virt || virtcca_cvm_enabled()) { ++ } else if (vms->virt || virtcca_cvm_enabled() || ++ virt_machine_is_confidential(vms)) { + vms->psci_conduit = QEMU_PSCI_CONDUIT_SMC; + } else { + vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC; +@@ -3813,6 +3820,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + virtcca_cvm_type = VIRTCCA_CVM_TYPE; + } + } ++ int rme_vm_type = kvm_arm_rme_vm_type(ms), type; + int max_vm_pa_size, requested_pa_size; + bool fixed_ipa; + +@@ -3842,9 +3850,12 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + * the implicit legacy 40b IPA setting, in which case the kvm_type + * must be 0. + */ +- return strcmp(type_str, "cvm") == 0 ? +- ((fixed_ipa ? 0 : requested_pa_size) | virtcca_cvm_type) : +- (fixed_ipa ? 0 : requested_pa_size); ++ type = strcmp(type_str, "cvm") == 0 ? virtcca_cvm_type : 0; ++ if (fixed_ipa) { ++ return type; ++ } ++ ++ return requested_pa_size | rme_vm_type | type; + } + + static void virt_machine_class_init(ObjectClass *oc, void *data) +-- +2.33.0 + diff --git a/hw-arm-virt-Disable-DTB-randomness-for-confidential-.patch b/hw-arm-virt-Disable-DTB-randomness-for-confidential-.patch new file mode 100644 index 0000000000000000000000000000000000000000..061fdf5c163e4cdb4be81ba33f5552e91c331283 --- /dev/null +++ b/hw-arm-virt-Disable-DTB-randomness-for-confidential-.patch @@ -0,0 +1,175 @@ +From 8796ed125a4e424df483e2059eab2b4fa7f88f8d Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 6 Feb 2023 16:52:37 +0000 +Subject: [PATCH] hw/arm/virt: Disable DTB randomness for confidential VMs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/68a0501d8fbf67b2828c262e8aa296820a1b32a1 + +The dtb-randomness feature, which adds random seeds to the DTB, isn't +really compatible with confidential VMs since it randomizes the Realm +Initial Measurement. Enabling it is not an error, but it prevents +attestation. It also isn't useful to a Realm, which doesn't trust host +input. + +Currently the feature is automatically enabled, unless the user disables +it on the command-line. Change it to OnOffAuto, and automatically +disable it for confidential VMs, unless the user explicitly enables it. + +Signed-off-by: Jean-Philippe Brucker +Reviewed-by: Philippe Mathieu-Daudé +Conflicts: + hw/arm/virt.c + include/hw/arm/virt.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + docs/system/arm/virt.rst | 9 +++++---- + hw/arm/virt.c | 41 +++++++++++++++++++++++++--------------- + include/hw/arm/virt.h | 2 +- + 3 files changed, 32 insertions(+), 20 deletions(-) + +diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst +index 7c4c80180c..0ba6d8610f 100644 +--- a/docs/system/arm/virt.rst ++++ b/docs/system/arm/virt.rst +@@ -153,10 +153,11 @@ dtb-randomness + rng-seed and kaslr-seed nodes (in both "/chosen" and + "/secure-chosen") to use for features like the random number + generator and address space randomisation. The default is +- ``on``. You will want to disable it if your trusted boot chain +- will verify the DTB it is passed, since this option causes the +- DTB to be non-deterministic. It would be the responsibility of +- the firmware to come up with a seed and pass it on if it wants to. ++ ``off`` for confidential VMs, and ``on`` otherwise. You will want ++ to disable it if your trusted boot chain will verify the DTB it is ++ passed, since this option causes the DTB to be non-deterministic. ++ It would be the responsibility of the firmware to come up with a ++ seed and pass it on if it wants to. + + dtb-kaslr-seed + A deprecated synonym for dtb-randomness. +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ec4faab9dc..66d2d68944 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -281,6 +281,7 @@ static void create_randomness(MachineState *ms, const char *node) + + static void create_fdt(VirtMachineState *vms) + { ++ bool dtb_randomness = true; + MachineState *ms = MACHINE(vms); + int nb_numa_nodes = ms->numa_state->num_nodes; + void *fdt = create_device_tree(&vms->fdt_size); +@@ -290,6 +291,16 @@ static void create_fdt(VirtMachineState *vms) + exit(1); + } + ++ /* ++ * Including random data in the DTB causes random intial measurement on CCA, ++ * so disable it for confidential VMs. ++ */ ++ if (vms->dtb_randomness == ON_OFF_AUTO_OFF || ++ (vms->dtb_randomness == ON_OFF_AUTO_AUTO && ++ virt_machine_is_confidential(vms))) { ++ dtb_randomness = false; ++ } ++ + ms->fdt = fdt; + + /* Header */ +@@ -306,7 +317,7 @@ static void create_fdt(VirtMachineState *vms) + kvm_type = object_property_get_str(OBJECT(current_machine), + "kvm-type", &error_abort); + } +- if (vms->dtb_randomness) { ++ if (dtb_randomness) { + if (!(kvm_type && !strcmp(kvm_type, "cvm"))) { + create_randomness(ms, "/chosen"); + } +@@ -314,7 +325,7 @@ static void create_fdt(VirtMachineState *vms) + + if (vms->secure) { + qemu_fdt_add_subnode(fdt, "/secure-chosen"); +- if (vms->dtb_randomness) { ++ if (dtb_randomness) { + create_randomness(ms, "/secure-chosen"); + } + } +@@ -2998,18 +3009,21 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + vms->its = value; + } + +-static bool virt_get_dtb_randomness(Object *obj, Error **errp) ++static void virt_get_dtb_randomness(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); ++ OnOffAuto dtb_randomness = vms->dtb_randomness; + +- return vms->dtb_randomness; ++ visit_type_OnOffAuto(v, name, &dtb_randomness, errp); + } + +-static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) ++static void virt_set_dtb_randomness(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); + +- vms->dtb_randomness = value; ++ visit_type_OnOffAuto(v, name, &vms->dtb_randomness, errp); + } + + static char *virt_get_oem_id(Object *obj, Error **errp) +@@ -3996,16 +4010,16 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable " + "ITS instantiation"); + +- object_class_property_add_bool(oc, "dtb-randomness", +- virt_get_dtb_randomness, +- virt_set_dtb_randomness); ++ object_class_property_add(oc, "dtb-randomness", "OnOffAuto", ++ virt_get_dtb_randomness, virt_set_dtb_randomness, ++ NULL, NULL); + object_class_property_set_description(oc, "dtb-randomness", + "Set off to disable passing random or " + "non-deterministic dtb nodes to guest"); + +- object_class_property_add_bool(oc, "dtb-kaslr-seed", +- virt_get_dtb_randomness, +- virt_set_dtb_randomness); ++ object_class_property_add(oc, "dtb-kaslr-seed", "OnOffAuto", ++ virt_get_dtb_randomness, virt_set_dtb_randomness, ++ NULL, NULL); + object_class_property_set_description(oc, "dtb-kaslr-seed", + "Deprecated synonym of dtb-randomness"); + +@@ -4092,9 +4106,6 @@ static void virt_instance_init(Object *obj) + /* MTE is disabled by default. */ + vms->mte = false; + +- /* Supply kaslr-seed and rng-seed by default */ +- vms->dtb_randomness = true; +- + vms->irqmap = a15irqmap; + + virt_flash_create(vms); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 3e2759d225..9b43e72aac 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -225,7 +225,7 @@ struct VirtMachineState { + bool cpu_hotplug_enabled; + bool ras; + bool mte; +- bool dtb_randomness; ++ OnOffAuto dtb_randomness; + bool pmu; + int smmu_accel_count; + OnOffAuto acpi; +-- +2.33.0 + diff --git a/hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch b/hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch deleted file mode 100644 index b32b2a01929189ff6f89e94f011d4d9cc3811a3b..0000000000000000000000000000000000000000 --- a/hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch +++ /dev/null @@ -1,252 +0,0 @@ -From ce813d8daa2e01df52509f4bb52b9ab774408706 Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:27 +0100 -Subject: [PATCH] hw/arm/virt: Enable device memory cold/hot plug with ACPI - boot - -This initializes the GED device with base memory and irq, configures -ged memory hotplug event and builds the corresponding aml code. With -this, both hot and cold plug of device memory is enabled now for Guest -with ACPI boot. Memory cold plug support with Guest DT boot is not yet -supported. - -As DSDT table gets changed by this, update bios-tables-test-allowed-diff.h -to avoid "make check" failure. - -Signed-off-by: Shameer Kolothum -Message-Id: <20190918130633.4872-6-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov ---- - hw/arm/Kconfig | 2 ++ - hw/arm/virt-acpi-build.c | 21 ++++++++++++++ - hw/arm/virt.c | 59 +++++++++++++++++++++++++++++++++++----- - include/hw/arm/virt.h | 4 +++ - 4 files changed, 79 insertions(+), 7 deletions(-) - -diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig -index 84961c17ab..ad7f7c089b 100644 ---- a/hw/arm/Kconfig -+++ b/hw/arm/Kconfig -@@ -22,6 +22,8 @@ config ARM_VIRT - select ACPI_PCI - select MEM_DEVICE - select DIMM -+ select ACPI_MEMORY_HOTPLUG -+ select ACPI_HW_REDUCED - - config CHEETAH - bool -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index fe54411f6a..fca53ae01f 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -40,6 +40,8 @@ - #include "hw/acpi/aml-build.h" - #include "hw/acpi/utils.h" - #include "hw/acpi/pci.h" -+#include "hw/acpi/memory_hotplug.h" -+#include "hw/acpi/generic_event_device.h" - #include "hw/pci/pcie_host.h" - #include "hw/pci/pci.h" - #include "hw/arm/virt.h" -@@ -779,6 +781,7 @@ static void - build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - { - Aml *scope, *dsdt; -+ MachineState *ms = MACHINE(vms); - const MemMapEntry *memmap = vms->memmap; - const int *irqmap = vms->irqmap; - -@@ -803,6 +806,24 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - vms->highmem, vms->highmem_ecam); - acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO], - (irqmap[VIRT_GPIO] + ARM_SPI_BASE)); -+ if (vms->acpi_dev) { -+ build_ged_aml(scope, "\\_SB."GED_DEVICE, -+ HOTPLUG_HANDLER(vms->acpi_dev), -+ irqmap[VIRT_ACPI_GED] + ARM_SPI_BASE, AML_SYSTEM_MEMORY, -+ memmap[VIRT_ACPI_GED].base); -+ } -+ -+ if (vms->acpi_dev) { -+ uint32_t event = object_property_get_uint(OBJECT(vms->acpi_dev), -+ "ged-event", &error_abort); -+ -+ if (event & ACPI_GED_MEM_HOTPLUG_EVT) { -+ build_memory_hotplug_aml(scope, ms->ram_slots, "\\_SB", NULL, -+ AML_SYSTEM_MEMORY, -+ memmap[VIRT_PCDIMM_ACPI].base); -+ } -+ } -+ - acpi_dsdt_add_power_button(scope); - - aml_append(dsdt, scope); -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c7c07fe3ac..8ccabd5159 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -67,6 +67,7 @@ - #include "target/arm/internals.h" - #include "hw/mem/pc-dimm.h" - #include "hw/mem/nvdimm.h" -+#include "hw/acpi/generic_event_device.h" - - #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ - static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ -@@ -137,6 +138,8 @@ static const MemMapEntry base_memmap[] = { - [VIRT_GPIO] = { 0x09030000, 0x00001000 }, - [VIRT_SECURE_UART] = { 0x09040000, 0x00001000 }, - [VIRT_SMMU] = { 0x09050000, 0x00020000 }, -+ [VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN }, -+ [VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN }, - [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, - [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, - /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ -@@ -173,6 +176,7 @@ static const int a15irqmap[] = { - [VIRT_PCIE] = 3, /* ... to 6 */ - [VIRT_GPIO] = 7, - [VIRT_SECURE_UART] = 8, -+ [VIRT_ACPI_GED] = 9, - [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */ - [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */ - [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */ -@@ -630,6 +634,29 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms) - } - } - -+static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) -+{ -+ DeviceState *dev; -+ MachineState *ms = MACHINE(vms); -+ int irq = vms->irqmap[VIRT_ACPI_GED]; -+ uint32_t event = 0; -+ -+ if (ms->ram_slots) { -+ event = ACPI_GED_MEM_HOTPLUG_EVT; -+ } -+ -+ dev = qdev_create(NULL, TYPE_ACPI_GED); -+ qdev_prop_set_uint32(dev, "ged-event", event); -+ -+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); -+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); -+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irq]); -+ -+ qdev_init_nofail(dev); -+ -+ return dev; -+} -+ - static void create_its(VirtMachineState *vms, DeviceState *gicdev) - { - const char *itsclass = its_class_name(); -@@ -1603,6 +1630,7 @@ static void machvirt_init(MachineState *machine) - MemoryRegion *ram = g_new(MemoryRegion, 1); - bool firmware_loaded; - bool aarch64 = true; -+ bool has_ged = !vmc->no_ged; - unsigned int smp_cpus = machine->smp.cpus; - unsigned int max_cpus = machine->smp.max_cpus; - -@@ -1824,6 +1852,10 @@ static void machvirt_init(MachineState *machine) - - create_gpio(vms, pic); - -+ if (has_ged && aarch64 && firmware_loaded && acpi_enabled) { -+ vms->acpi_dev = create_acpi_ged(vms, pic); -+ } -+ - /* Create mmio transports, so the user can create virtio backends - * (which will be automatically plugged in to the transports). If - * no backend is created the transport will just sit harmlessly idle. -@@ -2003,14 +2035,17 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) - { -+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); -+ const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); - -- /* -- * The device memory is not yet exposed to the Guest either through -- * DT or ACPI and hence both cold/hot plug of memory is explicitly -- * disabled for now. -- */ -- if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -- error_setg(errp, "memory cold/hot plug is not yet supported"); -+ if (is_nvdimm) { -+ error_setg(errp, "nvdimm is not yet supported"); -+ return; -+ } -+ -+ if (!vms->acpi_dev) { -+ error_setg(errp, -+ "memory hotplug is not enabled: missing acpi-ged device"); - return; - } - -@@ -2020,11 +2055,18 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - static void virt_memory_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -+ HotplugHandlerClass *hhc; - VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); - Error *local_err = NULL; - - pc_dimm_plug(PC_DIMM(dev), MACHINE(vms), &local_err); -+ if (local_err) { -+ goto out; -+ } - -+ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); -+ hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &error_abort); -+out: - error_propagate(errp, local_err); - } - -@@ -2231,8 +2273,11 @@ DEFINE_VIRT_MACHINE_AS_LATEST(4, 1) - - static void virt_machine_4_0_options(MachineClass *mc) - { -+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -+ - virt_machine_4_1_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); -+ vmc->no_ged = true; - } - DEFINE_VIRT_MACHINE(4, 0) - -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index a9d6977afc..0350285136 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -78,6 +78,8 @@ enum { - VIRT_GPIO, - VIRT_SECURE_UART, - VIRT_SECURE_MEM, -+ VIRT_PCDIMM_ACPI, -+ VIRT_ACPI_GED, - VIRT_LOWMEMMAP_LAST, - }; - -@@ -107,6 +109,7 @@ typedef struct { - bool claim_edge_triggered_timers; - bool smbios_old_sys_ver; - bool no_highmem_ecam; -+ bool no_ged; /* Machines < 4.1 has no support for ACPI GED device */ - bool kvm_no_adjvtime; - } VirtMachineClass; - -@@ -135,6 +138,7 @@ typedef struct { - uint32_t iommu_phandle; - int psci_conduit; - hwaddr highest_gpa; -+ DeviceState *acpi_dev; - } VirtMachineState; - - #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) --- -2.19.1 diff --git a/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch b/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c124e92aa8a31f4573ac5b9098d39c925ec6fa3 --- /dev/null +++ b/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch @@ -0,0 +1,107 @@ +From 837b04877be49b930a2d437f55e2ae15ff820421 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 23 Sep 2023 22:31:49 +0000 +Subject: [PATCH] hw/arm/virt: Expose cold-booted CPUs as MADT GICC Enabled + +Hotpluggable CPUs MUST be exposed as 'online-capable' as per the new change. But +cold booted CPUs if made 'online-capable' during boot time might not get +detected in the legacy OS. Hence, can cause compatibility problems. + +Original Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706 + +Specification change might take time and hence disabling the support of +unplugging any cold booted CPUs to preserve the compatibility with legacy OS. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 19 ++++++++++++++----- + hw/arm/virt.c | 16 ++++++++++++++++ + include/hw/core/cpu.h | 2 ++ + 3 files changed, 32 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index c402e102c4..590afcfa98 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -789,17 +789,26 @@ static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) + } + + /* +- * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot +- * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the +- * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged. +- * Though as-of-now this is only used as a debugging feature. ++ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot. We ++ * MUST set 'online-capable' bit for all hotpluggable CPUs. ++ * Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706 + * + * UEFI ACPI Specification 6.5 + * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure + * Table: 5.37 GICC CPU Interface Flags + * Link: https://uefi.org/specs/ACPI/6.5 ++ * ++ * Cold-booted CPUs, except for the first/boot CPU, SHOULD be allowed to be ++ * hot(un)plug as well but for this to happen these MUST have ++ * 'online-capable' bit set. Later creates compatibility problem with legacy ++ * OS as it might ignore online-capable' bits during boot time and hence ++ * some CPUs might not get detected. To fix this MADT GIC CPU interface flag ++ * should be allowed to have both bits set i.e. 'online-capable' and ++ * 'Enabled' bits together. This change will require UEFI ACPI standard ++ * change. Till this happens exposing all cold-booted CPUs as 'enabled' only ++ * + */ +- return cpu && !cpu->cpu_index ? 1 : (1 << 3); ++ return cpu && cpu->cold_booted ? 1 : (1 << 3); + } + + static void +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index eedff8e525..ed437ce0e8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3250,6 +3250,10 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * This shall be used during the init of ACPI Hotplug state and hot-unplug + */ + cs->acpi_persistent = true; ++ ++ if (!dev->hotplugged) { ++ cs->cold_booted = true; ++ } + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -3313,6 +3317,18 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + ++ /* ++ * UEFI ACPI standard change is required to make both 'enabled' and the ++ * 'online-capable' bit co-exist instead of being mutually exclusive. ++ * check virt_acpi_get_gicc_flags() for more details. ++ * ++ * Disable the unplugging of cold-booted vCPUs as a temporary mitigation. ++ */ ++ if (cs->cold_booted) { ++ error_setg(errp, "Hot-unplug of cold-booted CPU not supported!"); ++ return; ++ } ++ + if (cs->cpu_index == first_cpu->cpu_index) { + error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported", + first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id, +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 6dbe163548..ee04ee44c2 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -565,6 +565,8 @@ struct CPUState { + uint32_t halted; + int32_t exception_index; + ++ bool cold_booted; ++ + AccelCPUState *accel; + /* shared by kvm and hvf */ + bool vcpu_dirty; +-- +2.27.0 + diff --git a/hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch b/hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch deleted file mode 100644 index 1e3befaf62920ca12cae4c8ead6d731800ef79a8..0000000000000000000000000000000000000000 --- a/hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 3a0af1446395e74476a763ca12713b28c099a144 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Mon, 6 Apr 2020 12:50:54 +0800 -Subject: [PATCH] hw/arm/virt: Factor out some CPU init codes to pre_plug hook - -The init path of hotplugged CPU is pre_plug/realize/plug, so we -must move these init code in machvirt_init to pre_plug hook, to -let them be shared by all CPUs. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 108 +++++++++++++++++++++++++++----------------------- - 1 file changed, 58 insertions(+), 50 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 64532b61b2..83f4887e57 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -196,6 +196,8 @@ static const char *valid_cpus[] = { - ARM_CPU_TYPE_NAME("max"), - }; - -+static MemoryRegion *secure_sysmem; -+ - static bool cpu_type_valid(const char *cpu) - { - int i; -@@ -1629,7 +1631,6 @@ static void machvirt_init(MachineState *machine) - MachineClass *mc = MACHINE_GET_CLASS(machine); - const CPUArchIdList *possible_cpus; - MemoryRegion *sysmem = get_system_memory(); -- MemoryRegion *secure_sysmem = NULL; - int n, virt_max_cpus; - MemoryRegion *ram = g_new(MemoryRegion, 1); - bool firmware_loaded; -@@ -1752,57 +1753,10 @@ static void machvirt_init(MachineState *machine) - } - - cpuobj = object_new(possible_cpus->cpus[n].type); -- object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id, -- "mp-affinity", NULL); -+ aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); - - cs = CPU(cpuobj); - cs->cpu_index = n; -- -- numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), -- &error_fatal); -- -- aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); -- -- if (!vms->secure) { -- object_property_set_bool(cpuobj, false, "has_el3", NULL); -- } -- -- if (!vms->virt && object_property_find(cpuobj, "has_el2", NULL)) { -- object_property_set_bool(cpuobj, false, "has_el2", NULL); -- } -- -- if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { -- object_property_set_int(cpuobj, vms->psci_conduit, -- "psci-conduit", NULL); -- -- /* Secondary CPUs start in PSCI powered-down state */ -- if (n > 0) { -- object_property_set_bool(cpuobj, true, -- "start-powered-off", NULL); -- } -- } -- -- if (vmc->kvm_no_adjvtime && -- object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { -- object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); -- } -- -- if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { -- object_property_set_bool(cpuobj, false, "pmu", NULL); -- } -- -- if (object_property_find(cpuobj, "reset-cbar", NULL)) { -- object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base, -- "reset-cbar", &error_abort); -- } -- -- object_property_set_link(cpuobj, OBJECT(sysmem), "memory", -- &error_abort); -- if (vms->secure) { -- object_property_set_link(cpuobj, OBJECT(secure_sysmem), -- "secure-memory", &error_abort); -- } -- - object_property_set_bool(cpuobj, true, "realized", &error_fatal); - object_unref(cpuobj); - } -@@ -2089,10 +2043,16 @@ out: - static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -- CPUState *cs = CPU(dev); - ARMCPUTopoInfo topo; -+ Object *cpuobj = OBJECT(dev); -+ CPUState *cs = CPU(dev); - ARMCPU *cpu = ARM_CPU(dev); - MachineState *ms = MACHINE(hotplug_dev); -+ MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); -+ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); -+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(hotplug_dev); -+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); -+ MemoryRegion *sysmem = get_system_memory(); - int smp_cores = ms->smp.cores; - int smp_threads = ms->smp.threads; - -@@ -2145,6 +2105,54 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, - return; - } - cpu->thread_id = topo.smt_id; -+ -+ /* Init some properties */ -+ -+ object_property_set_int(cpuobj, possible_cpus->cpus[cs->cpu_index].arch_id, -+ "mp-affinity", NULL); -+ -+ numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), -+ &error_fatal); -+ -+ if (!vms->secure) { -+ object_property_set_bool(cpuobj, false, "has_el3", NULL); -+ } -+ -+ if (!vms->virt && object_property_find(cpuobj, "has_el2", NULL)) { -+ object_property_set_bool(cpuobj, false, "has_el2", NULL); -+ } -+ -+ if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { -+ object_property_set_int(cpuobj, vms->psci_conduit, -+ "psci-conduit", NULL); -+ -+ /* Secondary CPUs start in PSCI powered-down state */ -+ if (cs->cpu_index > 0) { -+ object_property_set_bool(cpuobj, true, -+ "start-powered-off", NULL); -+ } -+ } -+ -+ if (vmc->kvm_no_adjvtime && -+ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { -+ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); -+ } -+ -+ if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { -+ object_property_set_bool(cpuobj, false, "pmu", NULL); -+ } -+ -+ if (object_property_find(cpuobj, "reset-cbar", NULL)) { -+ object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base, -+ "reset-cbar", &error_abort); -+ } -+ -+ object_property_set_link(cpuobj, OBJECT(sysmem), "memory", -+ &error_abort); -+ if (vms->secure) { -+ object_property_set_link(cpuobj, OBJECT(secure_sysmem), -+ "secure-memory", &error_abort); -+ } - } - - static void virt_cpu_plug(HotplugHandler *hotplug_dev, --- -2.19.1 diff --git a/hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch b/hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch new file mode 100644 index 0000000000000000000000000000000000000000..c6702ddc53b8b4e0645016c8ced66e36efe75cac --- /dev/null +++ b/hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch @@ -0,0 +1,170 @@ +From ff64aed3c87427dfa65fa85aef93b44372aefe7d Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 16:59:20 +0800 +Subject: [PATCH 2/4] hw/arm/virt: HDBSS: fix arm-softmmu build on x86 platform + +Move kvm_update_hdbss_cap() to accel/kvm/kvm-stub.c, +check kvm_enabled() and add stub function + +Fixes: e549f32b1a88 ("hw/arm/virt: support the HDBSS feature") +Signed-off-by: Jason Zeng +--- + accel/kvm/kvm-all.c | 25 +++++++++++++++++++++++++ + accel/stubs/kvm-stub.c | 5 +++++ + include/sysemu/kvm.h | 8 ++++++++ + migration/migration.h | 7 ------- + migration/ram.c | 35 ++++++----------------------------- + 5 files changed, 44 insertions(+), 36 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index f96afb1230..7d175d3262 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -3251,6 +3251,31 @@ bool kvm_arm_supports_user_irq(void) + return kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ); + } + ++void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) ++{ ++ KVMState *s = kvm_state; ++ int size, ret; ++ ++ if (s == NULL || !kvm_check_extension(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK)) { ++ return; ++ } ++ ++ size = hdbss_buffer_size; ++ if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { ++ fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); ++ return; ++ } ++ ++ ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK, 0, ++ enable ? size : 0); ++ if (ret) { ++ fprintf(stderr, "Could not %s KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: %d\n", ++ enable ? "enable" : "disable", ret); ++ } ++ ++ return; ++} ++ + #ifdef KVM_CAP_SET_GUEST_DEBUG + struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, vaddr pc) + { +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index 1fffdc0ea2..2625175b99 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -119,6 +119,11 @@ bool kvm_arm_supports_user_irq(void) + return false; + } + ++void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) ++{ ++ g_assert_not_reached(); ++} ++ + bool kvm_dirty_ring_enabled(void) + { + return false; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 16cccc881e..098257e72f 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -229,6 +229,14 @@ int kvm_has_gsi_routing(void); + */ + bool kvm_arm_supports_user_irq(void); + ++/* ++ * The default HDBSS size. The value ranges [0, 9]. ++ * Set to 0 to disable the HDBSS feature. ++ */ ++#define DEFAULT_HDBSS_BUFFER_SIZE 0 ++#define MAX_HDBSS_BUFFER_SIZE 9 ++ ++void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size); + + int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); + int kvm_on_sigbus(int code, void *addr); +diff --git a/migration/migration.h b/migration/migration.h +index 4a95f00157..eeddb7c0bd 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -48,13 +48,6 @@ struct PostcopyBlocktimeContext; + */ + #define CLEAR_BITMAP_SHIFT_MAX 31 + +-/* +- * The default HDBSS size. The value ranges [0, 9]. +- * Set to 0 to disable the HDBSS feature. +- */ +-#define DEFAULT_HDBSS_BUFFER_SIZE 0 +-#define MAX_HDBSS_BUFFER_SIZE 9 +- + /* This is an abstraction of a "temp huge page" for postcopy's purpose */ + typedef struct { + /* +diff --git a/migration/ram.c b/migration/ram.c +index ee57da62f6..91bec89a6e 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2812,33 +2812,6 @@ static void xbzrle_cleanup(void) + XBZRLE_cache_unlock(); + } + +-#ifdef TARGET_AARCH64 +-static void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) +-{ +- KVMState *s = kvm_state; +- int size, ret; +- +- if (s == NULL || !kvm_check_extension(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK)) { +- return; +- } +- +- size = hdbss_buffer_size; +- if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { +- fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); +- return; +- } +- +- ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK, 0, +- enable ? size : 0); +- if (ret) { +- fprintf(stderr, "Could not %s KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: %d\n", +- enable ? "enable" : "disable", ret); +- } +- +- return; +-} +-#endif +- + static void ram_save_cleanup(void *opaque) + { + RAMState **rsp = opaque; +@@ -2856,7 +2829,9 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_start/stop used in pairs + */ + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(false, 0); ++ if (kvm_enabled()) { ++ kvm_update_hdbss_cap(false, 0); ++ } + #endif + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } +@@ -3262,7 +3237,9 @@ static void ram_init_bitmaps(RAMState *rs) + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(true, migrate_hdbss_buffer_size()); ++ if (kvm_enabled()) { ++ kvm_update_hdbss_cap(true, migrate_hdbss_buffer_size()); ++ } + #endif + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); +-- +2.33.0 + diff --git a/hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch b/hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch deleted file mode 100644 index c124df5394121fdb0415b3b85d04fc3417a747aa..0000000000000000000000000000000000000000 --- a/hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch +++ /dev/null @@ -1,73 +0,0 @@ -From acc5162f1d1591ee4830f9b67934fc6d8a9ebbc1 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Tue, 8 Sep 2020 22:09:44 +0800 -Subject: [PATCH] hw/arm/virt: Init PMU for hotplugged vCPU - -Factor out PMU init code from fdt_add_pmu_nodes and -do PMU init for hotplugged vCPU. - -Signed-off-by: Keqian Zhu ---- - hw/arm/virt.c | 29 +++++++++++++++++++++-------- - 1 file changed, 21 insertions(+), 8 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 7afc6c5e..7506d0ff 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -605,6 +605,23 @@ static void fdt_add_gic_node(VirtMachineState *vms) - g_free(nodename); - } - -+static bool virt_cpu_init_pmu(const VirtMachineState *vms, CPUState *cpu) -+{ -+ ARMCPU *armcpu = ARM_CPU(cpu); -+ -+ if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { -+ return false; -+ } -+ if (kvm_enabled()) { -+ if (kvm_irqchip_in_kernel()) { -+ kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ)); -+ } -+ kvm_arm_pmu_init(cpu); -+ } -+ -+ return true; -+} -+ - static void fdt_add_pmu_nodes(const VirtMachineState *vms) - { - CPUState *cpu; -@@ -612,16 +629,9 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms) - uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI; - - CPU_FOREACH(cpu) { -- armcpu = ARM_CPU(cpu); -- if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { -+ if (!virt_cpu_init_pmu(vms, cpu)) { - return; - } -- if (kvm_enabled()) { -- if (kvm_irqchip_in_kernel()) { -- kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ)); -- } -- kvm_arm_pmu_init(cpu); -- } - } - - if (vms->gic_version == 2) { -@@ -2248,6 +2258,9 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, - agcc->cpu_hotplug_realize(gicv3, ncpu); - connect_gic_cpu_irqs(vms, ncpu); - -+ /* Init PMU part */ -+ virt_cpu_init_pmu(vms, cs); -+ - /* Register CPU reset and trigger it manually */ - cpu_synchronize_state(cs); - cpu_hotplug_register_reset(ncpu); --- -2.23.0 - - diff --git a/hw-arm-virt-Introduce-cpu-topology-support.patch b/hw-arm-virt-Introduce-cpu-topology-support.patch deleted file mode 100644 index 932f467fe274453668edf80bac5d023817d6f95b..0000000000000000000000000000000000000000 --- a/hw-arm-virt-Introduce-cpu-topology-support.patch +++ /dev/null @@ -1,236 +0,0 @@ -From 73fc4af05ebe12d77915e6b3c85c48f5e0c432f3 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 22 Apr 2020 19:23:27 +0800 -Subject: [PATCH] hw/arm/virt: Introduce cpu topology support - -Add topology support for guest vcpu by cpu-map in dtb when the guest is booted -with dtb, and by pptt table when the guest is booted with acpi. - -Signed-off-by: Honghao -Signed-off-by: zhanghailiang -(picked-from https://patchwork.ozlabs.org/cover/939301/ which is pushed by -Andrew Jones ) ---- - device_tree.c | 32 ++++++++++++++++++++++ - hw/acpi/aml-build.c | 53 ++++++++++++++++++++++++++++++++++++ - hw/arm/virt-acpi-build.c | 4 +++ - hw/arm/virt.c | 32 +++++++++++++++++++++- - include/hw/acpi/aml-build.h | 2 ++ - include/sysemu/device_tree.h | 1 + - 6 files changed, 123 insertions(+), 1 deletion(-) - -diff --git a/device_tree.c b/device_tree.c -index f8b46b3c..03906a14 100644 ---- a/device_tree.c -+++ b/device_tree.c -@@ -524,6 +524,38 @@ int qemu_fdt_add_subnode(void *fdt, const char *name) - return retval; - } - -+/** -+ * qemu_fdt_add_path -+ * @fdt: Flattened Device Tree -+ * @path: Flattened Device Tree node path -+ * -+ * qemu_fdt_add_path works like qemu_fdt_add_subnode, except it -+ * also recursively adds any missing parent nodes. -+ */ -+int qemu_fdt_add_path(void *fdt, const char *path) -+{ -+ char *parent; -+ int offset; -+ -+ offset = fdt_path_offset(fdt, path); -+ if (offset < 0 && offset != -FDT_ERR_NOTFOUND) { -+ error_report("%s Couldn't find node %s: %s", __func__, path, -+ fdt_strerror(offset)); -+ exit(1); -+ } -+ -+ if (offset != -FDT_ERR_NOTFOUND) { -+ return offset; -+ } -+ -+ parent = g_strdup(path); -+ strrchr(parent, '/')[0] = '\0'; -+ qemu_fdt_add_path(fdt, parent); -+ g_free(parent); -+ -+ return qemu_fdt_add_subnode(fdt, path); -+} -+ - void qemu_fdt_dumpdtb(void *fdt, int size) - { - const char *dumpdtb = qemu_opt_get(qemu_get_machine_opts(), "dumpdtb"); -diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 73f97751..f2c8c28f 100644 ---- a/hw/acpi/aml-build.c -+++ b/hw/acpi/aml-build.c -@@ -25,6 +25,7 @@ - #include "qemu/bswap.h" - #include "qemu/bitops.h" - #include "sysemu/numa.h" -+#include "sysemu/cpus.h" - - static GArray *build_alloc_array(void) - { -@@ -51,6 +52,58 @@ static void build_append_array(GArray *array, GArray *val) - g_array_append_vals(array, val->data, val->len); - } - -+/* -+ * ACPI 6.2 Processor Properties Topology Table (PPTT) -+ */ -+static void build_cpu_hierarchy(GArray *tbl, uint32_t flags, -+ uint32_t parent, uint32_t id) -+{ -+ build_append_byte(tbl, 0); /* Type 0 - processor */ -+ build_append_byte(tbl, 20); /* Length, no private resources */ -+ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ -+ build_append_int_noprefix(tbl, flags, 4); -+ build_append_int_noprefix(tbl, parent, 4); -+ build_append_int_noprefix(tbl, id, 4); -+ build_append_int_noprefix(tbl, 0, 4); /* Num private resources */ -+} -+ -+void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) -+{ -+ int pptt_start = table_data->len; -+ int uid = 0, cpus = 0, socket; -+ MachineState *ms = MACHINE(qdev_get_machine()); -+ unsigned int smp_cores = ms->smp.cores; -+ unsigned int smp_threads = ms->smp.threads; -+ -+ acpi_data_push(table_data, sizeof(AcpiTableHeader)); -+ -+ for (socket = 0; cpus < possible_cpus; socket++) { -+ uint32_t socket_offset = table_data->len - pptt_start; -+ int core; -+ -+ build_cpu_hierarchy(table_data, 1, 0, socket); -+ -+ for (core = 0; core < smp_cores; core++) { -+ uint32_t core_offset = table_data->len - pptt_start; -+ int thread; -+ -+ if (smp_threads > 1) { -+ build_cpu_hierarchy(table_data, 0, socket_offset, core); -+ for (thread = 0; thread < smp_threads; thread++) { -+ build_cpu_hierarchy(table_data, 2, core_offset, uid++); -+ } -+ } else { -+ build_cpu_hierarchy(table_data, 2, socket_offset, uid++); -+ } -+ } -+ cpus += smp_cores * smp_threads; -+ } -+ -+ build_header(linker, table_data, -+ (void *)(table_data->data + pptt_start), "PPTT", -+ table_data->len - pptt_start, 1, NULL, NULL); -+} -+ - #define ACPI_NAMESEG_LEN 4 - - static void -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index 29494ebd..fe54411f 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -848,6 +848,10 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) - acpi_add_table(table_offsets, tables_blob); - build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); - -+ acpi_add_table(table_offsets, tables_blob); -+ -+ build_pptt(tables_blob, tables->linker, vms->smp_cpus); -+ - acpi_add_table(table_offsets, tables_blob); - build_madt(tables_blob, tables->linker, vms); - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 0fa355ba..272455bc 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -44,6 +44,7 @@ - #include "net/net.h" - #include "sysemu/device_tree.h" - #include "sysemu/numa.h" -+#include "sysemu/cpus.h" - #include "sysemu/sysemu.h" - #include "sysemu/kvm.h" - #include "hw/loader.h" -@@ -312,7 +313,8 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) - int cpu; - int addr_cells = 1; - const MachineState *ms = MACHINE(vms); -- -+ unsigned int smp_cores = ms->smp.cores; -+ unsigned int smp_threads = ms->smp.threads; - /* - * From Documentation/devicetree/bindings/arm/cpus.txt - * On ARM v8 64-bit systems value should be set to 2, -@@ -368,8 +370,36 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) - ms->possible_cpus->cpus[cs->cpu_index].props.node_id); - } - -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", -+ qemu_fdt_alloc_phandle(vms->fdt)); -+ - g_free(nodename); - } -+ -+ /* Add vcpu topology by fdt node cpu-map. */ -+ qemu_fdt_add_subnode(vms->fdt, "/cpus/cpu-map"); -+ -+ for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) { -+ char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu); -+ char *map_path; -+ -+ if (smp_threads > 1) { -+ map_path = g_strdup_printf( -+ "/cpus/cpu-map/%s%d/%s%d/%s%d", -+ "cluster", cpu / (smp_cores * smp_threads), -+ "core", (cpu / smp_threads) % smp_cores, -+ "thread", cpu % smp_threads); -+ } else { -+ map_path = g_strdup_printf( -+ "/cpus/cpu-map/%s%d/%s%d", -+ "cluster", cpu / smp_cores, -+ "core", cpu % smp_cores); -+ } -+ qemu_fdt_add_path(vms->fdt, map_path); -+ qemu_fdt_setprop_phandle(vms->fdt, map_path, "cpu", cpu_path); -+ g_free(map_path); -+ g_free(cpu_path); -+ } - } - - static void fdt_add_its_gic_node(VirtMachineState *vms) -diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h -index 375335ab..bfb0b100 100644 ---- a/include/hw/acpi/aml-build.h -+++ b/include/hw/acpi/aml-build.h -@@ -417,6 +417,8 @@ build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s) - void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, - uint64_t len, int node, MemoryAffinityFlags flags); - -+void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus); -+ - void build_slit(GArray *table_data, BIOSLinker *linker); - - void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, -diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h -index c16fd69b..d62fc873 100644 ---- a/include/sysemu/device_tree.h -+++ b/include/sysemu/device_tree.h -@@ -101,6 +101,7 @@ uint32_t qemu_fdt_get_phandle(void *fdt, const char *path); - uint32_t qemu_fdt_alloc_phandle(void *fdt); - int qemu_fdt_nop_node(void *fdt, const char *node_path); - int qemu_fdt_add_subnode(void *fdt, const char *name); -+int qemu_fdt_add_path(void *fdt, const char *path); - - #define qemu_fdt_setprop_cells(fdt, node_path, property, ...) \ - do { \ --- -2.23.0 diff --git a/hw-arm-virt-Keep-Guest-L1-cache-type-consistent-with.patch b/hw-arm-virt-Keep-Guest-L1-cache-type-consistent-with.patch new file mode 100644 index 0000000000000000000000000000000000000000..592ff9b97046f4a229d389d5b6c7882aa5c9376d --- /dev/null +++ b/hw-arm-virt-Keep-Guest-L1-cache-type-consistent-with.patch @@ -0,0 +1,671 @@ +From 6060f8cad07a3d2a49795fef19d585a9d205ecef Mon Sep 17 00:00:00 2001 +From: Jia Qingtong +Date: Tue, 24 Sep 2024 18:24:33 +0800 +Subject: [PATCH] hw/arm/virt:Keep Guest L1 cache type consistent with KVM + +Linux KVM normalize the cache configuration and expose a +fabricated CLIDR_EL1 value to guest, where L1 cache type +could be unified or seperate instruction cache and data +cache. Let's keep guest L1 cache type consistent with +KVM by checking the guest visable CLIDR_EL1, which can +avoid abnormal issue in guest when it's probing cache +info conbined CLIDR_EL1 with ACPI PPTT and DT. + +Signed-off-by: Yanan Wang +Signed-off-by: lishusen +--- + hw/acpi/aml-build.c | 165 ++--------------------------------- + hw/arm/virt-acpi-build.c | 167 ++++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 86 +++++++++++++++---- + include/hw/acpi/aml-build.h | 54 ++---------- + include/hw/arm/virt.h | 60 +++++++++++++ + 5 files changed, 306 insertions(+), 226 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index bf9c59f544..0d4994bafe 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -47,7 +47,7 @@ static void build_prepend_byte(GArray *array, uint8_t val) + g_array_prepend_val(array, val); + } + +-static void build_append_byte(GArray *array, uint8_t val) ++void build_append_byte(GArray *array, uint8_t val) + { + g_array_append_val(array, val); + } +@@ -1990,10 +1990,10 @@ void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, + * ACPI spec, Revision 6.3 + * 5.2.29.1 Processor hierarchy node structure (Type 0) + */ +-static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, +- uint32_t parent, uint32_t id, +- uint32_t *priv_rsrc, +- uint32_t priv_num) ++void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, ++ uint32_t parent, uint32_t id, ++ uint32_t *priv_rsrc, ++ uint32_t priv_num) + { + int i; + +@@ -2016,161 +2016,6 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + +-/* +- * ACPI spec, Revision 6.3 +- * 5.2.29.2 Cache Type Structure (Type 1) +- */ +-static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, +- uint32_t cache_type) +-{ +- build_append_byte(tbl, 1); +- build_append_byte(tbl, 24); +- build_append_int_noprefix(tbl, 0, 2); +- build_append_int_noprefix(tbl, 127, 4); +- build_append_int_noprefix(tbl, next_level, 4); +- +- switch (cache_type) { +- case ARM_L1D_CACHE: /* L1 dcache info */ +- build_append_int_noprefix(tbl, ARM_L1DCACHE_SIZE, 4); +- build_append_int_noprefix(tbl, ARM_L1DCACHE_SETS, 4); +- build_append_byte(tbl, ARM_L1DCACHE_ASSOCIATIVITY); +- build_append_byte(tbl, ARM_L1DCACHE_ATTRIBUTES); +- build_append_int_noprefix(tbl, ARM_L1DCACHE_LINE_SIZE, 2); +- break; +- case ARM_L1I_CACHE: /* L1 icache info */ +- build_append_int_noprefix(tbl, ARM_L1ICACHE_SIZE, 4); +- build_append_int_noprefix(tbl, ARM_L1ICACHE_SETS, 4); +- build_append_byte(tbl, ARM_L1ICACHE_ASSOCIATIVITY); +- build_append_byte(tbl, ARM_L1ICACHE_ATTRIBUTES); +- build_append_int_noprefix(tbl, ARM_L1ICACHE_LINE_SIZE, 2); +- break; +- case ARM_L2_CACHE: /* L2 cache info */ +- build_append_int_noprefix(tbl, ARM_L2CACHE_SIZE, 4); +- build_append_int_noprefix(tbl, ARM_L2CACHE_SETS, 4); +- build_append_byte(tbl, ARM_L2CACHE_ASSOCIATIVITY); +- build_append_byte(tbl, ARM_L2CACHE_ATTRIBUTES); +- build_append_int_noprefix(tbl, ARM_L2CACHE_LINE_SIZE, 2); +- break; +- case ARM_L3_CACHE: /* L3 cache info */ +- build_append_int_noprefix(tbl, ARM_L3CACHE_SIZE, 4); +- build_append_int_noprefix(tbl, ARM_L3CACHE_SETS, 4); +- build_append_byte(tbl, ARM_L3CACHE_ASSOCIATIVITY); +- build_append_byte(tbl, ARM_L3CACHE_ATTRIBUTES); +- build_append_int_noprefix(tbl, ARM_L3CACHE_LINE_SIZE, 2); +- break; +- default: +- build_append_int_noprefix(tbl, 0, 4); +- build_append_int_noprefix(tbl, 0, 4); +- build_append_byte(tbl, 0); +- build_append_byte(tbl, 0); +- build_append_int_noprefix(tbl, 0, 2); +- } +-} +- +-/* +- * ACPI spec, Revision 6.3 +- * 5.2.29 Processor Properties Topology Table (PPTT) +- */ +-void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, +- const char *oem_id, const char *oem_table_id) +-{ +- MachineClass *mc = MACHINE_GET_CLASS(ms); +- GQueue *list = g_queue_new(); +- guint pptt_start = table_data->len; +- guint parent_offset; +- guint length, i; +- int uid = 0; +- int socket; +- AcpiTable table = { .sig = "PPTT", .rev = 2, +- .oem_id = oem_id, .oem_table_id = oem_table_id }; +- +- acpi_table_begin(&table, table_data); +- +- for (socket = 0; socket < ms->smp.sockets; socket++) { +- uint32_t l3_cache_offset = table_data->len - pptt_start; +- build_cache_hierarchy_node(table_data, 0, ARM_L3_CACHE); +- +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- /* +- * Physical package - represents the boundary +- * of a physical package +- */ +- (1 << 0), +- 0, socket, &l3_cache_offset, 1); +- } +- +- if (mc->smp_props.clusters_supported) { +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int cluster; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (cluster = 0; cluster < ms->smp.clusters; cluster++) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, cluster, NULL, 0); +- } +- } +- } +- +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int core; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (core = 0; core < ms->smp.cores; core++) { +- uint32_t priv_rsrc[3] = {}; +- priv_rsrc[0] = table_data->len - pptt_start; /* L2 cache offset */ +- build_cache_hierarchy_node(table_data, 0, ARM_L2_CACHE); +- +- priv_rsrc[1] = table_data->len - pptt_start; /* L1 dcache offset */ +- build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1D_CACHE); +- +- priv_rsrc[2] = table_data->len - pptt_start; /* L1 icache offset */ +- build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1I_CACHE); +- +- if (ms->smp.threads > 1) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, core, priv_rsrc, 3); +- } else { +- build_processor_hierarchy_node( +- table_data, +- (1 << 1) | /* ACPI Processor ID valid */ +- (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, priv_rsrc, 3); +- } +- } +- } +- +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int thread; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (thread = 0; thread < ms->smp.threads; thread++) { +- build_processor_hierarchy_node( +- table_data, +- (1 << 1) | /* ACPI Processor ID valid */ +- (1 << 2) | /* Processor is a Thread */ +- (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, NULL, 0); +- } +- } +- +- g_queue_free(list); +- acpi_table_end(linker, &table); +-} +- + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 179600d4fe..86984b7167 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -63,6 +63,173 @@ + + #define ACPI_BUILD_TABLE_SIZE 0x20000 + ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29.2 Cache Type Structure (Type 1) ++ */ ++static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, ++ uint32_t cache_type) ++{ ++ build_append_byte(tbl, 1); ++ build_append_byte(tbl, 24); ++ build_append_int_noprefix(tbl, 0, 2); ++ build_append_int_noprefix(tbl, 127, 4); ++ build_append_int_noprefix(tbl, next_level, 4); ++ ++ switch (cache_type) { ++ case ARM_L1D_CACHE: /* L1 dcache info */ ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L1DCACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1DCACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L1I_CACHE: /* L1 icache info */ ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L1ICACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1ICACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L1_CACHE: /* L1 cache info */ ++ build_append_int_noprefix(tbl, ARM_L1CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1CACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L1CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1CACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L2_CACHE: /* L2 cache info */ ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L2CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L2CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L3_CACHE: /* L3 cache info */ ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L3CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L3CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_LINE_SIZE, 2); ++ break; ++ default: ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_byte(tbl, 0); ++ build_append_byte(tbl, 0); ++ build_append_int_noprefix(tbl, 0, 2); ++ } ++} ++ ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29 Processor Properties Topology Table (PPTT) ++ */ ++static void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++ const char *oem_id, const char *oem_table_id) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ GQueue *list = g_queue_new(); ++ guint pptt_start = table_data->len; ++ guint parent_offset; ++ guint length, i; ++ int uid = 0; ++ int socket; ++ AcpiTable table = { .sig = "PPTT", .rev = 2, ++ .oem_id = oem_id, .oem_table_id = oem_table_id }; ++ bool unified_l1 = cpu_l1_cache_unified(0); ++ ++ acpi_table_begin(&table, table_data); ++ ++ for (socket = 0; socket < ms->smp.sockets; socket++) { ++ uint32_t l3_cache_offset = table_data->len - pptt_start; ++ build_cache_hierarchy_node(table_data, 0, ARM_L3_CACHE); ++ ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ /* ++ * Physical package - represents the boundary ++ * of a physical package ++ */ ++ (1 << 0), ++ 0, socket, &l3_cache_offset, 1); ++ } ++ ++ if (mc->smp_props.clusters_supported) { ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int cluster; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (cluster = 0; cluster < ms->smp.clusters; cluster++) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, cluster, NULL, 0); ++ } ++ } ++ } ++ ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int core; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (core = 0; core < ms->smp.cores; core++) { ++ uint32_t priv_rsrc[3] = {}; ++ priv_rsrc[0] = table_data->len - pptt_start; /* L2 cache offset */ ++ build_cache_hierarchy_node(table_data, 0, ARM_L2_CACHE); ++ ++ if (unified_l1) { ++ priv_rsrc[1] = table_data->len - pptt_start; /* L1 cache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1_CACHE); ++ } else { ++ priv_rsrc[1] = table_data->len - pptt_start; /* L1 dcache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1D_CACHE); ++ priv_rsrc[2] = table_data->len - pptt_start; /* L1 icache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1I_CACHE); ++ } ++ ++ if (ms->smp.threads > 1) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, core, priv_rsrc, 3); ++ } else { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, priv_rsrc, 3); ++ } ++ } ++ } ++ ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int thread; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (thread = 0; thread < ms->smp.threads; thread++) { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 2) | /* Processor is a Thread */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, NULL, 0); ++ } ++ } ++ ++ g_queue_free(list); ++ acpi_table_end(linker, &table); ++} ++ + static void acpi_dsdt_add_psd(Aml *dev, int cpus) + { + Aml *pkg; +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e31c289968..a9efcec85e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -401,6 +401,39 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) + INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags); + } + ++/* ++ * In CLIDR_EL1 exposed to guest by the hypervisor, L1 cache type ++ * maybe unified or seperate ins and data. We need to read the ++ * guest visable CLIDR_EL1 and check L1 cache type. ++ */ ++bool cpu_l1_cache_unified(int cpu) ++{ ++ bool unified = false; ++ uint64_t clidr; ++ ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); ++ CPUState *cs = CPU(armcpu); ++ int ret; ++ ++ if (kvm_enabled()) { ++ struct kvm_one_reg reg = { ++ .id = ARM64_REG_CLIDR_EL1, ++ .addr = (uintptr_t)&clidr ++ }; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); ++ if (ret) { ++ error_setg(&error_fatal, "Get vCPU clidr from KVM failed:%d", ret); ++ return unified; ++ } ++ ++ if (CLIDR_CTYPE(clidr, 1) == CTYPE_UNIFIED) { ++ unified = true; ++ } ++ } ++ ++ return unified; ++} ++ + static void fdt_add_l3cache_nodes(const VirtMachineState *vms) + { + int i; +@@ -415,9 +448,10 @@ static void fdt_add_l3cache_nodes(const VirtMachineState *vms) + qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); + qemu_fdt_setprop_string(ms->fdt, nodename, "cache-unified", "true"); + qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-level", 3); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x2000000); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 128); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 2048); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", ARM_L3CACHE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", ++ ARM_L3CACHE_LINE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", ARM_L3CACHE_SETS); + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(ms->fdt)); + g_free(nodename); +@@ -436,10 +470,12 @@ static void fdt_add_l2cache_nodes(const VirtMachineState *vms) + char *nodename = g_strdup_printf("/cpus/l2-cache%d", cpu); + + qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "cache-unified", "true"); + qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x80000); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 64); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 1024); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", ARM_L2CACHE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", ++ ARM_L2CACHE_LINE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", ARM_L2CACHE_SETS); + qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", + next_path); + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", +@@ -453,18 +489,32 @@ static void fdt_add_l2cache_nodes(const VirtMachineState *vms) + static void fdt_add_l1cache_prop(const VirtMachineState *vms, + char *nodename, int cpu) + { +- const MachineState *ms = MACHINE(vms); +- char *cachename = g_strdup_printf("/cpus/l2-cache%d", cpu); +- +- qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-size", 0x10000); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-line-size", 64); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-sets", 256); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-size", 0x10000); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-line-size", 64); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-sets", 256); +- qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", +- cachename); +- g_free(cachename); ++ const MachineState *ms = MACHINE(vms); ++ char *next_path = g_strdup_printf("/cpus/l2-cache%d", cpu); ++ bool unified_l1 = cpu_l1_cache_unified(0); ++ ++ if (unified_l1) { ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", ARM_L1CACHE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", ++ ARM_L1CACHE_LINE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", ARM_L1CACHE_SETS); ++ } else { ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-size", ++ ARM_L1DCACHE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-line-size", ++ ARM_L1DCACHE_LINE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-sets", ++ ARM_L1DCACHE_SETS); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-size", ++ ARM_L1ICACHE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-line-size", ++ ARM_L1ICACHE_LINE_SIZE); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-sets", ++ ARM_L1ICACHE_SETS); ++ } ++ qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", next_path); ++ ++ g_free(next_path); + } + + static void fdt_add_cpu_nodes(const VirtMachineState *vms) +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 7281c281f6..91f9cbf4f1 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -221,51 +221,6 @@ struct AcpiBuildTables { + BIOSLinker *linker; + } AcpiBuildTables; + +-/* Definitions of the hardcoded cache info*/ +- +-typedef enum { +- ARM_L1D_CACHE, +- ARM_L1I_CACHE, +- ARM_L2_CACHE, +- ARM_L3_CACHE +-} ArmCacheType; +- +-/* L1 data cache: */ +-#define ARM_L1DCACHE_SIZE 65536 +-#define ARM_L1DCACHE_SETS 256 +-#define ARM_L1DCACHE_ASSOCIATIVITY 4 +-#define ARM_L1DCACHE_ATTRIBUTES 2 +-#define ARM_L1DCACHE_LINE_SIZE 64 +- +-/* L1 instruction cache: */ +-#define ARM_L1ICACHE_SIZE 65536 +-#define ARM_L1ICACHE_SETS 256 +-#define ARM_L1ICACHE_ASSOCIATIVITY 4 +-#define ARM_L1ICACHE_ATTRIBUTES 4 +-#define ARM_L1ICACHE_LINE_SIZE 64 +- +-/* Level 2 unified cache: */ +-#define ARM_L2CACHE_SIZE 524288 +-#define ARM_L2CACHE_SETS 1024 +-#define ARM_L2CACHE_ASSOCIATIVITY 8 +-#define ARM_L2CACHE_ATTRIBUTES 10 +-#define ARM_L2CACHE_LINE_SIZE 64 +- +-/* Level 3 unified cache: */ +-#define ARM_L3CACHE_SIZE 33554432 +-#define ARM_L3CACHE_SETS 2048 +-#define ARM_L3CACHE_ASSOCIATIVITY 15 +-#define ARM_L3CACHE_ATTRIBUTES 10 +-#define ARM_L3CACHE_LINE_SIZE 128 +- +-struct offset_status { +- uint32_t parent; +- uint32_t l2_offset; +- uint32_t l1d_offset; +- uint32_t l1i_offset; +-}; +- +- + typedef + struct CrsRangeEntry { + uint64_t base; +@@ -460,6 +415,7 @@ Aml *aml_sizeof(Aml *arg); + Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target); + Aml *aml_object_type(Aml *object); + ++void build_append_byte(GArray *array, uint8_t val); + void build_append_int_noprefix(GArray *table, uint64_t value, int size); + + typedef struct AcpiTable { +@@ -537,10 +493,12 @@ void build_srat_memory(GArray *table_data, uint64_t base, + void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id); + +-void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, +- const char *oem_id, const char *oem_table_id); ++void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, ++ uint32_t parent, uint32_t id, ++ uint32_t *priv_rsrc, ++ uint32_t priv_num); + +-void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id); + + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 76a0d3fa5b..4b7dc61c24 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -47,6 +47,65 @@ + /* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */ + #define PVTIME_SIZE_PER_CPU 64 + ++/* ARM CLIDR_EL1 related definitions */ ++/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ ++#define CTYPE_NONE 0b000 ++#define CTYPE_INS 0b001 ++#define CTYPE_DATA 0b010 ++#define CTYPE_INS_DATA 0b011 ++#define CTYPE_UNIFIED 0b100 ++ ++#define ARM64_REG_CLIDR_EL1 ARM64_SYS_REG(3, 1, 0, 0, 1) ++ ++#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) ++#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) ++#define CLIDR_CTYPE(clidr, level) \ ++ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) ++ ++/* L1 data cache */ ++#define ARM_L1DCACHE_SIZE 65536 ++#define ARM_L1DCACHE_SETS 256 ++#define ARM_L1DCACHE_ASSOCIATIVITY 4 ++#define ARM_L1DCACHE_ATTRIBUTES 2 ++#define ARM_L1DCACHE_LINE_SIZE 64 ++ ++/* L1 instruction cache */ ++#define ARM_L1ICACHE_SIZE 65536 ++#define ARM_L1ICACHE_SETS 256 ++#define ARM_L1ICACHE_ASSOCIATIVITY 4 ++#define ARM_L1ICACHE_ATTRIBUTES 4 ++#define ARM_L1ICACHE_LINE_SIZE 64 ++ ++/* L1 unified cache */ ++#define ARM_L1CACHE_SIZE 131072 ++#define ARM_L1CACHE_SETS 256 ++#define ARM_L1CACHE_ASSOCIATIVITY 4 ++#define ARM_L1CACHE_ATTRIBUTES 10 ++#define ARM_L1CACHE_LINE_SIZE 128 ++ ++/* L2 unified cache */ ++#define ARM_L2CACHE_SIZE 524288 ++#define ARM_L2CACHE_SETS 1024 ++#define ARM_L2CACHE_ASSOCIATIVITY 8 ++#define ARM_L2CACHE_ATTRIBUTES 10 ++#define ARM_L2CACHE_LINE_SIZE 64 ++ ++/* L3 unified cache */ ++#define ARM_L3CACHE_SIZE 33554432 ++#define ARM_L3CACHE_SETS 2048 ++#define ARM_L3CACHE_ASSOCIATIVITY 15 ++#define ARM_L3CACHE_ATTRIBUTES 10 ++#define ARM_L3CACHE_LINE_SIZE 128 ++ ++/* Definitions of the hardcoded cache info */ ++typedef enum { ++ ARM_L1D_CACHE, ++ ARM_L1I_CACHE, ++ ARM_L1_CACHE, ++ ARM_L2_CACHE, ++ ARM_L3_CACHE ++} ArmCacheType; ++ + enum { + VIRT_FLASH, + VIRT_MEM, +@@ -194,6 +253,7 @@ OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) + + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); ++bool cpu_l1_cache_unified(int cpu); + + /* Return number of redistributors that fit in the specified region */ + static uint32_t virt_redist_capacity(VirtMachineState *vms, int region) +-- +2.41.0.windows.1 + diff --git a/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch b/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec3b72f02c9fe269e68623de66654906734e83f4 --- /dev/null +++ b/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch @@ -0,0 +1,311 @@ +From 8daa90ad502b79e232377f831f67df456a743304 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 26 Aug 2023 01:29:37 +0000 +Subject: [PATCH] hw/arm/virt: Move setting of common CPU properties in a + function + +Factor out CPU properties code common for {hot,cold}-plugged CPUs. This allows +code reuse. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 220 ++++++++++++++++++++++++++---------------- + include/hw/arm/virt.h | 4 + + 2 files changed, 140 insertions(+), 84 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 94481d45d4..8f647422d8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2113,16 +2113,130 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + ++static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, ++ Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ Error *local_err = NULL; ++ VirtMachineClass *vmc; ++ ++ vmc = VIRT_MACHINE_GET_CLASS(ms); ++ ++ /* now, set the cpu object property values */ ++ numa_cpu_pre_plug(cpu_slot, DEVICE(cpuobj), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ object_property_set_int(cpuobj, "mp-affinity", cpu_slot->arch_id, NULL); ++ ++ if (!vms->secure) { ++ object_property_set_bool(cpuobj, "has_el3", false, NULL); ++ } ++ ++ if (!vms->virt && object_property_find(cpuobj, "has_el2")) { ++ object_property_set_bool(cpuobj, "has_el2", false, NULL); ++ } ++ ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime")) { ++ object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); ++ } ++ ++ if (vmc->no_kvm_steal_time && ++ object_property_find(cpuobj, "kvm-steal-time")) { ++ object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); ++ } ++ ++ if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { ++ object_property_set_bool(cpuobj, "pmu", false, NULL); ++ } ++ ++ if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { ++ object_property_set_bool(cpuobj, "lpa2", false, NULL); ++ } ++ ++ if (object_property_find(cpuobj, "reset-cbar")) { ++ object_property_set_int(cpuobj, "reset-cbar", ++ vms->memmap[VIRT_CPUPERIPHS].base, ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* link already initialized {secure,tag}-memory regions to this cpu */ ++ object_property_set_link(cpuobj, "memory", OBJECT(vms->sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-memory", ++ OBJECT(vms->secure_sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ if (vms->mte) { ++ if (!object_property_find(cpuobj, "tag-memory")) { ++ error_setg(&local_err, "MTE requested, but not supported " ++ "by the guest CPU"); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ object_property_set_link(cpuobj, "tag-memory", OBJECT(vms->tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-tag-memory", ++ OBJECT(vms->secure_tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ } ++ ++ /* ++ * RFC: Question: this must only be called for the hotplugged cpus. For the ++ * cold booted secondary cpus this is being taken care in arm_load_kernel() ++ * in boot.c. Perhaps we should remove that code now? ++ */ ++ if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { ++ object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit, ++ NULL); ++ ++ /* Secondary CPUs start in PSCI powered-down state */ ++ if (CPU(cpuobj)->cpu_index > 0) { ++ object_property_set_bool(cpuobj, "start-powered-off", true, NULL); ++ } ++ } ++ ++out: ++ if (local_err) { ++ error_propagate(errp, local_err); ++ } ++ return; ++} ++ + static void machvirt_init(MachineState *machine) + { + VirtMachineState *vms = VIRT_MACHINE(machine); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; +- MemoryRegion *sysmem = get_system_memory(); ++ MemoryRegion *secure_tag_sysmem = NULL; + MemoryRegion *secure_sysmem = NULL; + MemoryRegion *tag_sysmem = NULL; +- MemoryRegion *secure_tag_sysmem = NULL; ++ MemoryRegion *sysmem; + int n, virt_max_cpus; + bool firmware_loaded; + bool aarch64 = true; +@@ -2166,6 +2280,8 @@ static void machvirt_init(MachineState *machine) + */ + finalize_gic_version(vms); + ++ sysmem = vms->sysmem = get_system_memory(); ++ + if (vms->secure) { + /* + * The Secure view of the world is the same as the NonSecure, +@@ -2173,7 +2289,7 @@ static void machvirt_init(MachineState *machine) + * containing the system memory at low priority; any secure-only + * devices go in at higher priority and take precedence. + */ +- secure_sysmem = g_new(MemoryRegion, 1); ++ secure_sysmem = vms->secure_sysmem = g_new(MemoryRegion, 1); + memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory", + UINT64_MAX); + memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1); +@@ -2246,6 +2362,23 @@ static void machvirt_init(MachineState *machine) + exit(1); + } + ++ if (vms->mte) { ++ /* Create the memory region only once, but link to all cpus later */ ++ tag_sysmem = vms->tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(tag_sysmem, OBJECT(machine), ++ "tag-memory", UINT64_MAX / 32); ++ ++ if (vms->secure) { ++ secure_tag_sysmem = vms->secure_tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(secure_tag_sysmem, OBJECT(machine), ++ "secure-tag-memory", UINT64_MAX / 32); ++ ++ /* As with ram, secure-tag takes precedence over tag. */ ++ memory_region_add_subregion_overlap(secure_tag_sysmem, 0, ++ tag_sysmem, -1); ++ } ++ } ++ + create_fdt(vms); + qemu_log("cpu init start\n"); + +@@ -2259,15 +2392,10 @@ static void machvirt_init(MachineState *machine) + } + + cpuobj = object_new(possible_cpus->cpus[n].type); +- object_property_set_int(cpuobj, "mp-affinity", +- possible_cpus->cpus[n].arch_id, NULL); + + cs = CPU(cpuobj); + cs->cpu_index = n; + +- numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), +- &error_fatal); +- + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", + virt_get_socket_id(machine, n), NULL); +@@ -2278,82 +2406,6 @@ static void machvirt_init(MachineState *machine) + object_property_set_int(cpuobj, "thread-id", + virt_get_thread_id(machine, n), NULL); + +- if (!vms->secure) { +- object_property_set_bool(cpuobj, "has_el3", false, NULL); +- } +- +- if (!vms->virt && object_property_find(cpuobj, "has_el2")) { +- object_property_set_bool(cpuobj, "has_el2", false, NULL); +- } +- +- if (vmc->kvm_no_adjvtime && +- object_property_find(cpuobj, "kvm-no-adjvtime")) { +- object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); +- } +- +- if (vmc->no_kvm_steal_time && +- object_property_find(cpuobj, "kvm-steal-time")) { +- object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); +- } +- +- if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { +- object_property_set_bool(cpuobj, "pmu", false, NULL); +- } +- +- if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { +- object_property_set_bool(cpuobj, "lpa2", false, NULL); +- } +- +- if (object_property_find(cpuobj, "reset-cbar")) { +- object_property_set_int(cpuobj, "reset-cbar", +- vms->memmap[VIRT_CPUPERIPHS].base, +- &error_abort); +- } +- +- object_property_set_link(cpuobj, "memory", OBJECT(sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-memory", +- OBJECT(secure_sysmem), &error_abort); +- } +- +- if (vms->mte) { +- /* Create the memory region only once, but link to all cpus. */ +- if (!tag_sysmem) { +- /* +- * The property exists only if MemTag is supported. +- * If it is, we must allocate the ram to back that up. +- */ +- if (!object_property_find(cpuobj, "tag-memory")) { +- error_report("MTE requested, but not supported " +- "by the guest CPU"); +- exit(1); +- } +- +- tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(tag_sysmem, OBJECT(machine), +- "tag-memory", UINT64_MAX / 32); +- +- if (vms->secure) { +- secure_tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(secure_tag_sysmem, OBJECT(machine), +- "secure-tag-memory", UINT64_MAX / 32); +- +- /* As with ram, secure-tag takes precedence over tag. */ +- memory_region_add_subregion_overlap(secure_tag_sysmem, 0, +- tag_sysmem, -1); +- } +- } +- +- object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-tag-memory", +- OBJECT(secure_tag_sysmem), +- &error_abort); +- } +- } +- + qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); + object_unref(cpuobj); + } +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index e944d434c4..49d1ec8656 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -139,6 +139,10 @@ struct VirtMachineState { + DeviceState *platform_bus_dev; + FWCfgState *fw_cfg; + PFlashCFI01 *flash[2]; ++ MemoryRegion *sysmem; ++ MemoryRegion *secure_sysmem; ++ MemoryRegion *tag_sysmem; ++ MemoryRegion *secure_tag_sysmem; + bool secure; + bool highmem; + bool highmem_compact; +-- +2.27.0 + diff --git a/hw-arm-virt-Move-virt_flash_create-to-machvirt_init.patch b/hw-arm-virt-Move-virt_flash_create-to-machvirt_init.patch new file mode 100644 index 0000000000000000000000000000000000000000..13dab3c8557acde6f5c61b08f018d582e777289b --- /dev/null +++ b/hw-arm-virt-Move-virt_flash_create-to-machvirt_init.patch @@ -0,0 +1,46 @@ +From ddf23b6f58d3c605a083ad3f09388dcb6edf729e Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Fri, 12 Aug 2022 11:53:11 +0100 +Subject: [PATCH] hw/arm/virt: Move virt_flash_create() to machvirt_init() + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b7d6407b658327eb0be8a3014a63f84f58406043 + +For confidential VMs we'll want to skip flash device creation. +Unfortunately, in virt_instance_init() the machine->cgs member has not +yet been initialized, so we cannot check whether confidential guest is +enabled. Move virt_flash_create() to machvirt_init(), where we can +access the machine->cgs member. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + hw/arm/virt.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/arm/virt.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 95f6acf655..116c3ddbf0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2572,6 +2572,7 @@ static void machvirt_init(MachineState *machine) + } + + finalize_gic_version(vms); ++ virt_flash_create(vms); + + possible_cpus = mc->possible_cpu_arch_ids(machine); + +@@ -4120,8 +4121,6 @@ static void virt_instance_init(Object *obj) + + vms->irqmap = a15irqmap; + +- virt_flash_create(vms); +- + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); + +-- +2.33.0 + diff --git a/hw-arm-virt-Reserve-one-bit-of-guest-physical-addres.patch b/hw-arm-virt-Reserve-one-bit-of-guest-physical-addres.patch new file mode 100644 index 0000000000000000000000000000000000000000..342e226d6e73c87946c8ab7dafa56a52addd47e0 --- /dev/null +++ b/hw-arm-virt-Reserve-one-bit-of-guest-physical-addres.patch @@ -0,0 +1,66 @@ +From 726dbebf1dc71cf4ede0f0bf6ea049639d93c00d Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 6 Feb 2023 16:56:39 +0000 +Subject: [PATCH] hw/arm/virt: Reserve one bit of guest-physical address for + RME + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/ebffee632eb86b3423ac08a264ea0edc5cf97ead + +When RME is enabled, the upper GPA bit is used to distinguish protected +from unprotected addresses. Reserve it when setting up the guest memory +map. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/arm/virt.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 66d2d68944..51f7c940f4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3836,14 +3836,24 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + } + int rme_vm_type = kvm_arm_rme_vm_type(ms), type; + int max_vm_pa_size, requested_pa_size; ++ int rme_reserve_bit = 0; + bool fixed_ipa; + +- max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); ++ if (rme_vm_type) { ++ /* ++ * With RME, the upper GPA bit differentiates Realm from NS memory. ++ * Reserve the upper bit to ensure that highmem devices will fit. ++ */ ++ rme_reserve_bit = 1; ++ } ++ ++ max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa) - ++ rme_reserve_bit; + + /* we freeze the memory map to compute the highest gpa */ + virt_set_memmap(vms, max_vm_pa_size); + +- requested_pa_size = 64 - clz64(vms->highest_gpa); ++ requested_pa_size = 64 - clz64(vms->highest_gpa) + rme_reserve_bit; + + /* + * KVM requires the IPA size to be at least 32 bits. +@@ -3852,11 +3862,11 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + requested_pa_size = 32; + } + +- if (requested_pa_size > max_vm_pa_size) { ++ if (requested_pa_size > max_vm_pa_size + rme_reserve_bit) { + error_report("-m and ,maxmem option values " + "require an IPA range (%d bits) larger than " + "the one supported by the host (%d bits)", +- requested_pa_size, max_vm_pa_size); ++ requested_pa_size, max_vm_pa_size + rme_reserve_bit); + return -1; + } + /* +-- +2.33.0 + diff --git a/hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch b/hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch deleted file mode 100644 index 262cb508bcb8ba48bf93a3875957f2c9ace7698d..0000000000000000000000000000000000000000 --- a/hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch +++ /dev/null @@ -1,402 +0,0 @@ -From 5d1be90750551f1debf5767d7a6e2b9c50054c05 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Mon, 9 Dec 2019 10:03:06 +0100 -Subject: [PATCH] hw/arm/virt: Simplify by moving the gic in the machine state -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Make the gic a field in the machine state, and instead of filling -an array of qemu_irq and passing it around, directly call -qdev_get_gpio_in() on the gic field. - -Signed-off-by: Philippe Mathieu-Daudé -Reviewed-by: Luc Michel -Message-id: 20191209090306.20433-1-philmd@redhat.com -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell ---- - hw/arm/virt.c | 109 +++++++++++++++++++++--------------------- - include/hw/arm/virt.h | 1 + - 2 files changed, 55 insertions(+), 55 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 18321e522b..8638aeedb7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -634,7 +634,7 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms) - } - } - --static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) -+static inline DeviceState *create_acpi_ged(VirtMachineState *vms) - { - DeviceState *dev; - MachineState *ms = MACHINE(vms); -@@ -650,14 +650,14 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) - - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); -- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irq]); -+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); - - qdev_init_nofail(dev); - - return dev; - } - --static void create_its(VirtMachineState *vms, DeviceState *gicdev) -+static void create_its(VirtMachineState *vms) - { - const char *itsclass = its_class_name(); - DeviceState *dev; -@@ -669,7 +669,7 @@ static void create_its(VirtMachineState *vms, DeviceState *gicdev) - - dev = qdev_create(NULL, itsclass); - -- object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3", -+ object_property_set_link(OBJECT(dev), OBJECT(vms->gic), "parent-gicv3", - &error_abort); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_ITS].base); -@@ -677,7 +677,7 @@ static void create_its(VirtMachineState *vms, DeviceState *gicdev) - fdt_add_its_gic_node(vms); - } - --static void create_v2m(VirtMachineState *vms, qemu_irq *pic) -+static void create_v2m(VirtMachineState *vms) - { - int i; - int irq = vms->irqmap[VIRT_GIC_V2M]; -@@ -690,17 +690,17 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic) - qdev_init_nofail(dev); - - for (i = 0; i < NUM_GICV2M_SPIS; i++) { -- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]); -+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, -+ qdev_get_gpio_in(vms->gic, irq + i)); - } - - fdt_add_v2m_gic_node(vms); - } - --static void create_gic(VirtMachineState *vms, qemu_irq *pic) -+static void create_gic(VirtMachineState *vms) - { - MachineState *ms = MACHINE(vms); - /* We create a standalone GIC */ -- DeviceState *gicdev; - SysBusDevice *gicbusdev; - const char *gictype; - int type = vms->gic_version, i; -@@ -709,15 +709,15 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) - - gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); - -- gicdev = qdev_create(NULL, gictype); -- qdev_prop_set_uint32(gicdev, "revision", type); -- qdev_prop_set_uint32(gicdev, "num-cpu", smp_cpus); -+ vms->gic = qdev_create(NULL, gictype); -+ qdev_prop_set_uint32(vms->gic, "revision", type); -+ qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); - /* Note that the num-irq property counts both internal and external - * interrupts; there are always 32 of the former (mandated by GIC spec). - */ -- qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32); -+ qdev_prop_set_uint32(vms->gic, "num-irq", NUM_IRQS + 32); - if (!kvm_irqchip_in_kernel()) { -- qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure); -+ qdev_prop_set_bit(vms->gic, "has-security-extensions", vms->secure); - } - - if (type == 3) { -@@ -727,25 +727,25 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) - - nb_redist_regions = virt_gicv3_redist_region_count(vms); - -- qdev_prop_set_uint32(gicdev, "len-redist-region-count", -+ qdev_prop_set_uint32(vms->gic, "len-redist-region-count", - nb_redist_regions); -- qdev_prop_set_uint32(gicdev, "redist-region-count[0]", redist0_count); -+ qdev_prop_set_uint32(vms->gic, "redist-region-count[0]", redist0_count); - - if (nb_redist_regions == 2) { - uint32_t redist1_capacity = - vms->memmap[VIRT_HIGH_GIC_REDIST2].size / GICV3_REDIST_SIZE; - -- qdev_prop_set_uint32(gicdev, "redist-region-count[1]", -+ qdev_prop_set_uint32(vms->gic, "redist-region-count[1]", - MIN(smp_cpus - redist0_count, redist1_capacity)); - } - } else { - if (!kvm_irqchip_in_kernel()) { -- qdev_prop_set_bit(gicdev, "has-virtualization-extensions", -+ qdev_prop_set_bit(vms->gic, "has-virtualization-extensions", - vms->virt); - } - } -- qdev_init_nofail(gicdev); -- gicbusdev = SYS_BUS_DEVICE(gicdev); -+ qdev_init_nofail(vms->gic); -+ gicbusdev = SYS_BUS_DEVICE(vms->gic); - sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base); - if (type == 3) { - sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base); -@@ -781,23 +781,23 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) - - for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { - qdev_connect_gpio_out(cpudev, irq, -- qdev_get_gpio_in(gicdev, -+ qdev_get_gpio_in(vms->gic, - ppibase + timer_irq[irq])); - } - - if (type == 3) { -- qemu_irq irq = qdev_get_gpio_in(gicdev, -+ qemu_irq irq = qdev_get_gpio_in(vms->gic, - ppibase + ARCH_GIC_MAINT_IRQ); - qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", - 0, irq); - } else if (vms->virt) { -- qemu_irq irq = qdev_get_gpio_in(gicdev, -+ qemu_irq irq = qdev_get_gpio_in(vms->gic, - ppibase + ARCH_GIC_MAINT_IRQ); - sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); - } - - qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, -- qdev_get_gpio_in(gicdev, ppibase -+ qdev_get_gpio_in(vms->gic, ppibase - + VIRTUAL_PMU_IRQ)); - - sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); -@@ -809,20 +809,16 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) - qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); - } - -- for (i = 0; i < NUM_IRQS; i++) { -- pic[i] = qdev_get_gpio_in(gicdev, i); -- } -- - fdt_add_gic_node(vms); - - if (type == 3 && vms->its) { -- create_its(vms, gicdev); -+ create_its(vms); - } else if (type == 2) { -- create_v2m(vms, pic); -+ create_v2m(vms); - } - } - --static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, -+static void create_uart(const VirtMachineState *vms, int uart, - MemoryRegion *mem, Chardev *chr) - { - char *nodename; -@@ -838,7 +834,7 @@ static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, - qdev_init_nofail(dev); - memory_region_add_subregion(mem, base, - sysbus_mmio_get_region(s, 0)); -- sysbus_connect_irq(s, 0, pic[irq]); -+ sysbus_connect_irq(s, 0, qdev_get_gpio_in(vms->gic, irq)); - - nodename = g_strdup_printf("/pl011@%" PRIx64, base); - qemu_fdt_add_subnode(vms->fdt, nodename); -@@ -880,7 +876,7 @@ static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem) - memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0)); - } - --static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) -+static void create_rtc(const VirtMachineState *vms) - { - char *nodename; - hwaddr base = vms->memmap[VIRT_RTC].base; -@@ -888,7 +884,7 @@ static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) - int irq = vms->irqmap[VIRT_RTC]; - const char compat[] = "arm,pl031\0arm,primecell"; - -- sysbus_create_simple("pl031", base, pic[irq]); -+ sysbus_create_simple("pl031", base, qdev_get_gpio_in(vms->gic, irq)); - - nodename = g_strdup_printf("/pl031@%" PRIx64, base); - qemu_fdt_add_subnode(vms->fdt, nodename); -@@ -916,7 +912,7 @@ static void virt_powerdown_req(Notifier *n, void *opaque) - } - } - --static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) -+static void create_gpio(const VirtMachineState *vms) - { - char *nodename; - DeviceState *pl061_dev; -@@ -925,7 +921,8 @@ static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) - int irq = vms->irqmap[VIRT_GPIO]; - const char compat[] = "arm,pl061\0arm,primecell"; - -- pl061_dev = sysbus_create_simple("pl061", base, pic[irq]); -+ pl061_dev = sysbus_create_simple("pl061", base, -+ qdev_get_gpio_in(vms->gic, irq)); - - uint32_t phandle = qemu_fdt_alloc_phandle(vms->fdt); - nodename = g_strdup_printf("/pl061@%" PRIx64, base); -@@ -959,7 +956,7 @@ static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) - g_free(nodename); - } - --static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic) -+static void create_virtio_devices(const VirtMachineState *vms) - { - int i; - hwaddr size = vms->memmap[VIRT_MMIO].size; -@@ -995,7 +992,8 @@ static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic) - int irq = vms->irqmap[VIRT_MMIO] + i; - hwaddr base = vms->memmap[VIRT_MMIO].base + i * size; - -- sysbus_create_simple("virtio-mmio", base, pic[irq]); -+ sysbus_create_simple("virtio-mmio", base, -+ qdev_get_gpio_in(vms->gic, irq)); - } - - /* We add dtb nodes in reverse order so that they appear in the finished -@@ -1244,7 +1242,7 @@ static void create_pcie_irq_map(const VirtMachineState *vms, - 0x7 /* PCI irq */); - } - --static void create_smmu(const VirtMachineState *vms, qemu_irq *pic, -+static void create_smmu(const VirtMachineState *vms, - PCIBus *bus) - { - char *node; -@@ -1267,7 +1265,8 @@ static void create_smmu(const VirtMachineState *vms, qemu_irq *pic, - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); - for (i = 0; i < NUM_SMMU_IRQS; i++) { -- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]); -+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, -+ qdev_get_gpio_in(vms->gic, irq + i)); - } - - node = g_strdup_printf("/smmuv3@%" PRIx64, base); -@@ -1294,7 +1293,7 @@ static void create_smmu(const VirtMachineState *vms, qemu_irq *pic, - g_free(node); - } - --static void create_pcie(VirtMachineState *vms, qemu_irq *pic) -+static void create_pcie(VirtMachineState *vms) - { - hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base; - hwaddr size_mmio = vms->memmap[VIRT_PCIE_MMIO].size; -@@ -1354,7 +1353,8 @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_pio); - - for (i = 0; i < GPEX_NUM_IRQS; i++) { -- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]); -+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, -+ qdev_get_gpio_in(vms->gic, irq + i)); - gpex_set_irq_num(GPEX_HOST(dev), i, irq + i); - } - -@@ -1414,7 +1414,7 @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) - if (vms->iommu) { - vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt); - -- create_smmu(vms, pic, pci->bus); -+ create_smmu(vms, pci->bus); - - qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, 0x10000); -@@ -1423,7 +1423,7 @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) - g_free(nodename); - } - --static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic) -+static void create_platform_bus(VirtMachineState *vms) - { - DeviceState *dev; - SysBusDevice *s; -@@ -1439,8 +1439,8 @@ static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic) - - s = SYS_BUS_DEVICE(dev); - for (i = 0; i < PLATFORM_BUS_NUM_IRQS; i++) { -- int irqn = vms->irqmap[VIRT_PLATFORM_BUS] + i; -- sysbus_connect_irq(s, i, pic[irqn]); -+ int irq = vms->irqmap[VIRT_PLATFORM_BUS] + i; -+ sysbus_connect_irq(s, i, qdev_get_gpio_in(vms->gic, irq)); - } - - memory_region_add_subregion(sysmem, -@@ -1621,7 +1621,6 @@ static void machvirt_init(MachineState *machine) - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); - MachineClass *mc = MACHINE_GET_CLASS(machine); - const CPUArchIdList *possible_cpus; -- qemu_irq pic[NUM_IRQS]; - MemoryRegion *sysmem = get_system_memory(); - MemoryRegion *secure_sysmem = NULL; - int n, virt_max_cpus; -@@ -1829,29 +1828,29 @@ static void machvirt_init(MachineState *machine) - - virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); - -- create_gic(vms, pic); -+ create_gic(vms); - - fdt_add_pmu_nodes(vms); - -- create_uart(vms, pic, VIRT_UART, sysmem, serial_hd(0)); -+ create_uart(vms, VIRT_UART, sysmem, serial_hd(0)); - - create_cpufreq(vms, sysmem); - - if (vms->secure) { - create_secure_ram(vms, secure_sysmem); -- create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); -+ create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); - } - - vms->highmem_ecam &= vms->highmem && (!firmware_loaded || aarch64); - -- create_rtc(vms, pic); -+ create_rtc(vms); - -- create_pcie(vms, pic); -+ create_pcie(vms); - - if (has_ged && aarch64 && firmware_loaded && acpi_enabled) { -- vms->acpi_dev = create_acpi_ged(vms, pic); -+ vms->acpi_dev = create_acpi_ged(vms); - } else { -- create_gpio(vms, pic); -+ create_gpio(vms); - } - - /* connect powerdown request */ -@@ -1862,12 +1861,12 @@ static void machvirt_init(MachineState *machine) - * (which will be automatically plugged in to the transports). If - * no backend is created the transport will just sit harmlessly idle. - */ -- create_virtio_devices(vms, pic); -+ create_virtio_devices(vms); - - vms->fw_cfg = create_fw_cfg(vms, &address_space_memory); - rom_set_fw(vms->fw_cfg); - -- create_platform_bus(vms, pic); -+ create_platform_bus(vms); - - vms->bootinfo.ram_size = machine->ram_size; - vms->bootinfo.kernel_filename = machine->kernel_filename; -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index dcceb9c615..3dfefca93b 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -138,6 +138,7 @@ typedef struct { - uint32_t iommu_phandle; - int psci_conduit; - hwaddr highest_gpa; -+ DeviceState *gic; - DeviceState *acpi_dev; - Notifier powerdown_notifier; - } VirtMachineState; --- -2.19.1 diff --git a/hw-arm-virt-Use-RAM-instead-of-flash-for-confidentia.patch b/hw-arm-virt-Use-RAM-instead-of-flash-for-confidentia.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c6aea8e88ebee48da0ef8c2b704ef2a43fdc7c9 --- /dev/null +++ b/hw-arm-virt-Use-RAM-instead-of-flash-for-confidentia.patch @@ -0,0 +1,110 @@ +From 2e0ea64c8643318f8824040b010f0b2421efbd33 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Fri, 12 Aug 2022 12:08:58 +0100 +Subject: [PATCH] hw/arm/virt: Use RAM instead of flash for confidential guest + firmware + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/a8d4685f97e63dac012647cc3f9b1d830b784f8c + +The flash device that holds firmware code relies on read-only stage-2 +mappings. Read accesses behave as RAM and write accesses as MMIO. Since +the RMM does not support read-only mappings we cannot use the flash +device as-is. + +That isn't a problem because the firmware does not want to disclose any +information to the host, hence will not store its variables in clear +persistent memory. We can therefore replace the flash device with RAM, +and load the firmware there. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + hw/arm/boot.c + hw/arm/virt.c + include/hw/arm/boot.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/arm/virt.c | 20 +++++++++++++++++++- + include/hw/arm/boot.h | 5 +++++ + 2 files changed, 24 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 116c3ddbf0..8423912c89 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1407,6 +1407,10 @@ static PFlashCFI01 *virt_flash_create1(VirtMachineState *vms, + + static void virt_flash_create(VirtMachineState *vms) + { ++ if (virt_machine_is_confidential(vms)) { ++ return; ++ } ++ + vms->flash[0] = virt_flash_create1(vms, "virt.flash0", "pflash0"); + vms->flash[1] = virt_flash_create1(vms, "virt.flash1", "pflash1"); + } +@@ -1445,6 +1449,10 @@ static void virt_flash_map(VirtMachineState *vms, + hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2; + hwaddr flashbase = vms->memmap[VIRT_FLASH].base; + ++ if (virt_machine_is_confidential(vms)) { ++ return; ++ } ++ + virt_flash_map1(vms->flash[0], flashbase, flashsize, + secure_sysmem); + virt_flash_map1(vms->flash[1], flashbase + flashsize, flashsize, +@@ -1460,7 +1468,7 @@ static void virt_flash_fdt(VirtMachineState *vms, + MachineState *ms = MACHINE(vms); + char *nodename; + +- if (virtcca_cvm_enabled()) { ++ if (virtcca_cvm_enabled() || virt_machine_is_confidential(vms)) { + return; + } + +@@ -1524,6 +1532,15 @@ static bool virt_firmware_init(VirtMachineState *vms, + const char *bios_name; + BlockBackend *pflash_blk0; + ++ /* ++ * For a confidential VM, the firmware image and any boot information, ++ * including EFI variables, are stored in RAM in order to be measurable and ++ * private. Create a RAM region and load the firmware image there. ++ */ ++ if (virt_machine_is_confidential(vms)) { ++ return virt_confidential_firmware_init(vms, sysmem); ++ } ++ + /* Map legacy -drive if=pflash to machine properties */ + for (i = 0; i < ARRAY_SIZE(vms->flash); i++) { + pflash_cfi01_legacy_drive(vms->flash[i], +@@ -2893,6 +2910,7 @@ static void machvirt_init(MachineState *machine) + vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size; + vms->bootinfo.confidential = virtcca_cvm_enabled(); + vms->bootinfo.psci_conduit = vms->psci_conduit; ++ vms->bootinfo.confidential = virt_machine_is_confidential(vms); + arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); + + vms->machine_done.notify = virt_machine_done; +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 06ca1d90b2..0cbae4685b 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -133,8 +133,13 @@ struct arm_boot_info { + bool secure_board_setup; + + arm_endianness endianness; ++ ++ /* Used when loading firmware into RAM */ + hwaddr firmware_base; + hwaddr firmware_max_size; ++ /* ++ * Confidential guest boot loads everything into RAM so it can be measured. ++ */ + bool confidential; + }; + +-- +2.33.0 + diff --git a/hw-arm-virt-acpi-build-Add-IORT-RMR-regions-to-handl.patch b/hw-arm-virt-acpi-build-Add-IORT-RMR-regions-to-handl.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c0d8de2c6075c2941bb86ed8da47a0ebfc6874e --- /dev/null +++ b/hw-arm-virt-acpi-build-Add-IORT-RMR-regions-to-handl.patch @@ -0,0 +1,187 @@ +From 1746ba1aee671b9552540e36a629988b00846a82 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 5 Oct 2021 10:53:13 +0200 +Subject: [PATCH] hw/arm/virt-acpi-build: Add IORT RMR regions to handle MSI + nested binding + +To handle SMMUv3 nested stage support it is practical to +expose the guest with reserved memory regions (RMRs) +covering the IOVAs used by the host kernel to map +physical MSI doorbells. + +Those IOVAs belong to [0x8000000, 0x8100000] matching +MSI_IOVA_BASE and MSI_IOVA_LENGTH definitions in kernel +arm-smmu-v3 driver. This is the window used to allocate +IOVAs matching physical MSI doorbells. + +With those RMRs, the guest is forced to use a flat mapping +for this range. Hence the assigned device is programmed +with one IOVA from this range. Stage 1, owned by the guest +has a flat mapping for this IOVA. Stage2, owned by the VMM +then enforces a mapping from this IOVA to the physical +MSI doorbell. + +The creation of those RMR nodes only is relevant if nested +stage SMMU is in use, along with VFIO. As VFIO devices can be +hotplugged, all RMRs need to be created in advance. Hence +the patch introduces a new arm virt "nested-smmuv3" iommu type. + +ARM DEN 0049E.b IORT specification also mandates that when +RMRs are present, the OS must preserve PCIe configuration +performed by the boot FW. So along with the RMR IORT nodes, +a _DSM function #5, as defined by PCI FIRMWARE SPECIFICATION +EVISION 3.3, chapter 4.6.5 is added to PCIe host bridge +and PCIe expander bridge objects. + +Signed-off-by: Eric Auger +Suggested-by: Jean-Philippe Brucker +Signed-off-by: Nicolin Chen +Signed-off-by: Shameer Kolothum +--- + hw/arm/virt-acpi-build.c | 71 +++++++++++++++++++++++++++++++++++----- + 1 file changed, 63 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 1d7839e4a0..ad0f79e03d 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -417,6 +417,14 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, + .bus = vms->bus, + }; + ++ /* ++ * Nested SMMU requires RMRs for MSI 1-1 mapping, which ++ * require _DSM for PreservingPCI Boot Configurations ++ */ ++ if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) { ++ cfg.preserve_config = true; ++ } ++ + if (vms->highmem_mmio) { + cfg.mmio64 = memmap[VIRT_HIGH_PCIE_MMIO]; + } +@@ -495,7 +503,7 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms) + #define IORT_NODE_OFFSET 48 + + static void build_iort_id_mapping(GArray *table_data, uint32_t input_base, +- uint32_t id_count, uint32_t out_ref) ++ uint32_t id_count, uint32_t out_ref, uint32_t flags) + { + /* Table 4 ID mapping format */ + build_append_int_noprefix(table_data, input_base, 4); /* Input base */ +@@ -503,7 +511,7 @@ static void build_iort_id_mapping(GArray *table_data, uint32_t input_base, + build_append_int_noprefix(table_data, input_base, 4); /* Output base */ + build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */ + /* Flags */ +- build_append_int_noprefix(table_data, 0 /* Single mapping (disabled) */, 4); ++ build_append_int_noprefix(table_data, flags, 4); /* Flags */ + } + + struct AcpiIortIdMapping { +@@ -545,6 +553,50 @@ static int iort_idmap_compare(gconstpointer a, gconstpointer b) + return idmap_a->input_base - idmap_b->input_base; + } + ++static void ++build_iort_rmr_nodes(GArray *table_data, GArray *smmu_idmaps, ++ size_t *smmu_offset, uint32_t *id) ++{ ++ AcpiIortIdMapping *range; ++ int i; ++ ++ for (i = 0; i < smmu_idmaps->len; i++) { ++ range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); ++ int bdf = range->input_base; ++ ++ /* Table 18 Reserved Memory Range Node */ ++ ++ build_append_int_noprefix(table_data, 6 /* RMR */, 1); /* Type */ ++ /* Length */ ++ build_append_int_noprefix(table_data, 28 + ID_MAPPING_ENTRY_SIZE + 20, 2); ++ build_append_int_noprefix(table_data, 3, 1); /* Revision */ ++ build_append_int_noprefix(table_data, *id, 4); /* Identifier */ ++ /* Number of ID mappings */ ++ build_append_int_noprefix(table_data, 1, 4); ++ /* Reference to ID Array */ ++ build_append_int_noprefix(table_data, 28, 4); ++ ++ /* RMR specific data */ ++ ++ /* Flags */ ++ build_append_int_noprefix(table_data, 0 /* Disallow remapping */, 4); ++ /* Number of Memory Range Descriptors */ ++ build_append_int_noprefix(table_data, 1 , 4); ++ /* Reference to Memory Range Descriptors */ ++ build_append_int_noprefix(table_data, 28 + ID_MAPPING_ENTRY_SIZE, 4); ++ build_iort_id_mapping(table_data, bdf, range->id_count, smmu_offset[i], 1); ++ ++ /* Table 19 Memory Range Descriptor */ ++ ++ /* Physical Range offset */ ++ build_append_int_noprefix(table_data, 0x8000000, 8); ++ /* Physical Range length */ ++ build_append_int_noprefix(table_data, 0x100000, 8); ++ build_append_int_noprefix(table_data, 0, 4); /* Reserved */ ++ *id += 1; ++ } ++} ++ + /* + * Input Output Remapping Table (IORT) + * Conforms to "IO Remapping Table System Software on ARM Platforms", +@@ -554,7 +606,6 @@ static void + build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + int i, nb_nodes, rc_mapping_count; +- const uint32_t iort_node_offset = IORT_NODE_OFFSET; + size_t node_size, *smmu_offset; + AcpiIortIdMapping *idmap; + hwaddr base; +@@ -563,7 +614,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); + GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); + +- AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id, ++ AcpiTable table = { .sig = "IORT", .rev = 5, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; + /* Table 2 The IORT */ + acpi_table_begin(&table, table_data); +@@ -668,7 +719,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, 0, 4); + + /* output IORT node is the ITS group node (the first node) */ +- build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET); ++ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET, 0); + } + + /* Table 17 Root Complex Node */ +@@ -709,7 +760,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + /* output IORT node is the smmuv3 node */ + build_iort_id_mapping(table_data, range->input_base, +- range->id_count, smmu_offset[i]); ++ range->id_count, smmu_offset[i], 0); + } + + /* bypassed RIDs connect to ITS group node directly: RC -> ITS */ +@@ -717,11 +768,15 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + range = &g_array_index(its_idmaps, AcpiIortIdMapping, i); + /* output IORT node is the ITS group node (the first node) */ + build_iort_id_mapping(table_data, range->input_base, +- range->id_count, iort_node_offset); ++ range->id_count, IORT_NODE_OFFSET, 0); + } + } else { + /* output IORT node is the ITS group node (the first node) */ +- build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET); ++ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET, 0); ++ } ++ ++ if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) { ++ build_iort_rmr_nodes(table_data, smmu_idmaps, smmu_offset, &id); + } + + acpi_table_end(linker, &table); +-- +2.41.0.windows.1 + diff --git a/hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch b/hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch deleted file mode 100644 index 0602ab8d4d0d7af63f034c9b66c984e5aed627a7..0000000000000000000000000000000000000000 --- a/hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 8d287871fd4e1b4654fe9e5011b80614cb44f6d8 Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:28 +0100 -Subject: [PATCH] hw/arm/virt-acpi-build: Add PC-DIMM in SRAT - -Generate Memory Affinity Structures for PC-DIMM ranges. - -Also, Linux and Windows need ACPI SRAT table to make memory hotplug -work properly, however currently QEMU doesn't create SRAT table if -numa options aren't present on CLI. Hence add support(>=4.2) to -create numa node automatically (auto_enable_numa_with_memhp) when -QEMU is started with memory hotplug enabled but without '-numa' -options on CLI. - -Signed-off-by: Shameer Kolothum -Signed-off-by: Eric Auger -Reviewed-by: Igor Mammedov -Message-Id: <20190918130633.4872-7-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/arm/virt-acpi-build.c | 9 +++++++++ - hw/arm/virt.c | 2 ++ - 2 files changed, 11 insertions(+) - -diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index fca53ae01f..9622994e50 100644 ---- a/hw/arm/virt-acpi-build.c -+++ b/hw/arm/virt-acpi-build.c -@@ -592,6 +592,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - int i, srat_start; - uint64_t mem_base; - MachineClass *mc = MACHINE_GET_CLASS(vms); -+ MachineState *ms = MACHINE(vms); - const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(MACHINE(vms)); - - srat_start = table_data->len; -@@ -617,6 +618,14 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) - } - } - -+ if (ms->device_memory) { -+ numamem = acpi_data_push(table_data, sizeof *numamem); -+ build_srat_memory(numamem, ms->device_memory->base, -+ memory_region_size(&ms->device_memory->mr), -+ nb_numa_nodes - 1, -+ MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); -+ } -+ - build_header(linker, table_data, (void *)(table_data->data + srat_start), - "SRAT", table_data->len - srat_start, 3, NULL, NULL); - } -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 8ccabd5159..ab33cce4b3 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2173,6 +2173,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - hc->plug = virt_machine_device_plug_cb; - hc->unplug_request = virt_machine_device_unplug_request_cb; - mc->numa_mem_supported = true; -+ mc->auto_enable_numa_with_memhp = true; - } - - static void virt_instance_init(Object *obj) -@@ -2278,6 +2279,7 @@ static void virt_machine_4_0_options(MachineClass *mc) - virt_machine_4_1_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); - vmc->no_ged = true; -+ mc->auto_enable_numa_with_memhp = false; - } - DEFINE_VIRT_MACHINE(4, 0) - --- -2.19.1 diff --git a/hw-arm-virt-acpi-build-Build-IORT-with-multiple-SMMU.patch b/hw-arm-virt-acpi-build-Build-IORT-with-multiple-SMMU.patch new file mode 100644 index 0000000000000000000000000000000000000000..3451d6d5a700e010f714da317e431347599141c5 --- /dev/null +++ b/hw-arm-virt-acpi-build-Build-IORT-with-multiple-SMMU.patch @@ -0,0 +1,155 @@ +From a7ffb5856940a1515ef84a4d4644b7c7c07afb8f Mon Sep 17 00:00:00 2001 +From: Nicolin Chen +Date: Wed, 6 Nov 2024 19:22:13 +0000 +Subject: [PATCH] hw/arm/virt-acpi-build: Build IORT with multiple SMMU nodes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Now that we can have multiple user-creatable smmuv3-nested +devices, each associated with different pci buses, update +IORT ID mappings accordingly. + +Signed-off-by: Nicolin Chen +Signed-off-by: Shameer Kolothum +--- + hw/arm/virt-acpi-build.c | 43 ++++++++++++++++++++++++++++------------ + include/hw/arm/virt.h | 6 ++++++ + 2 files changed, 36 insertions(+), 13 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 076781423b..1d7839e4a0 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -555,8 +555,10 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + int i, nb_nodes, rc_mapping_count; + const uint32_t iort_node_offset = IORT_NODE_OFFSET; +- size_t node_size, smmu_offset = 0; ++ size_t node_size, *smmu_offset; + AcpiIortIdMapping *idmap; ++ hwaddr base; ++ int irq, num_smmus = 0; + uint32_t id = 0; + GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); + GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); +@@ -566,7 +568,21 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + /* Table 2 The IORT */ + acpi_table_begin(&table, table_data); + +- if (vms->iommu == VIRT_IOMMU_SMMUV3) { ++ if (vms->smmu_nested_count) { ++ irq = vms->irqmap[VIRT_SMMU_NESTED] + ARM_SPI_BASE; ++ base = vms->memmap[VIRT_SMMU_NESTED].base; ++ num_smmus = vms->smmu_nested_count; ++ } else if (virt_has_smmuv3(vms)) { ++ irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; ++ base = vms->memmap[VIRT_SMMU].base; ++ num_smmus = 1; ++ } ++ ++ smmu_offset = g_new0(size_t, num_smmus); ++ nb_nodes = 2; /* RC, ITS */ ++ nb_nodes += num_smmus; /* SMMU nodes */ ++ ++ if (virt_has_smmuv3(vms)) { + AcpiIortIdMapping next_range = {0}; + + object_child_foreach_recursive(object_get_root(), +@@ -588,18 +604,19 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + } + + next_range.input_base = idmap->input_base + idmap->id_count; ++ if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) { ++ nb_nodes++; /* RMR node per SMMU */ ++ } + } + + /* Append the last RC -> ITS ID mapping */ +- if (next_range.input_base < 0xFFFF) { +- next_range.id_count = 0xFFFF - next_range.input_base; ++ if (next_range.input_base < 0x10000) { ++ next_range.id_count = 0x10000 - next_range.input_base; + g_array_append_val(its_idmaps, next_range); + } + +- nb_nodes = 3; /* RC, ITS, SMMUv3 */ + rc_mapping_count = smmu_idmaps->len + its_idmaps->len; + } else { +- nb_nodes = 2; /* RC, ITS */ + rc_mapping_count = 1; + } + /* Number of IORT Nodes */ +@@ -621,10 +638,9 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + /* GIC ITS Identifier Array */ + build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4); + +- if (vms->iommu == VIRT_IOMMU_SMMUV3) { +- int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; ++ for (i = 0; i < num_smmus; i++) { ++ smmu_offset[i] = table_data->len - table.table_offset; + +- smmu_offset = table_data->len - table.table_offset; + /* Table 9 SMMUv3 Format */ + build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */ + node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE; +@@ -635,7 +651,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + /* Reference to ID Array */ + build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4); + /* Base address */ +- build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8); ++ build_append_int_noprefix(table_data, base + (i * SMMU_IO_LEN), 8); + /* Flags */ + build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ +@@ -646,12 +662,13 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, irq + 1, 4); /* PRI */ + build_append_int_noprefix(table_data, irq + 3, 4); /* GERR */ + build_append_int_noprefix(table_data, irq + 2, 4); /* Sync */ ++ irq += NUM_SMMU_IRQS; + build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */ + /* DeviceID mapping index (ignored since interrupts are GSIV based) */ + build_append_int_noprefix(table_data, 0, 4); + + /* output IORT node is the ITS group node (the first node) */ +- build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET); ++ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET); + } + + /* Table 17 Root Complex Node */ +@@ -684,7 +701,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + + /* Output Reference */ +- if (vms->iommu == VIRT_IOMMU_SMMUV3) { ++ if (virt_has_smmuv3(vms)) { + AcpiIortIdMapping *range; + + /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */ +@@ -692,7 +709,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + /* output IORT node is the smmuv3 node */ + build_iort_id_mapping(table_data, range->input_base, +- range->id_count, smmu_offset); ++ range->id_count, smmu_offset[i]); + } + + /* bypassed RIDs connect to ITS group node directly: RC -> ITS */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index cd41e28202..bc3c8b70da 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -295,4 +295,10 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + vms->highmem_redists) ? 2 : 1; + } + ++static inline bool virt_has_smmuv3(const VirtMachineState *vms) ++{ ++ return vms->iommu == VIRT_IOMMU_SMMUV3 || ++ vms->iommu == VIRT_IOMMU_SMMUV3_NESTED; ++} ++ + #endif /* QEMU_ARM_VIRT_H */ +-- +2.41.0.windows.1 + diff --git a/hw-arm-virt-acpi-build.c-Migrate-SPCR-creation-to-co.patch b/hw-arm-virt-acpi-build.c-Migrate-SPCR-creation-to-co.patch new file mode 100644 index 0000000000000000000000000000000000000000..3a6851ede8c527ef3ff589c1eadd2c25d4b26dd8 --- /dev/null +++ b/hw-arm-virt-acpi-build.c-Migrate-SPCR-creation-to-co.patch @@ -0,0 +1,237 @@ +From f6b4a18ba78b1daa2a69fccfb768ec2bdcafb1d4 Mon Sep 17 00:00:00 2001 +From: Sia Jee Heng +Date: Sun, 28 Jan 2024 18:14:39 -0800 +Subject: [PATCH] hw/arm/virt-acpi-build.c: Migrate SPCR creation to common + location + +RISC-V should also generate the SPCR in a manner similar to ARM. +Therefore, instead of replicating the code, relocate this function +to the common AML build. + +Signed-off-by: Sia Jee Heng +Reviewed-by: Alistair Francis +Message-ID: <20240129021440.17640-2-jeeheng.sia@starfivetech.com> +[ Changes by AF: + - Add missing Language SPCR entry +] +Signed-off-by: Alistair Francis +Signed-off-by: Xianglai Li +--- + hw/acpi/aml-build.c | 53 +++++++++++++++++++++++++++++ + hw/arm/virt-acpi-build.c | 68 +++++++++++++++---------------------- + include/hw/acpi/acpi-defs.h | 33 ++++++++++++++++++ + include/hw/acpi/aml-build.h | 4 +++ + 4 files changed, 117 insertions(+), 41 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 0d4994baf..3fb996c03 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -2016,6 +2016,59 @@ void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + ++void build_spcr(GArray *table_data, BIOSLinker *linker, ++ const AcpiSpcrData *f, const uint8_t rev, ++ const char *oem_id, const char *oem_table_id) ++{ ++ AcpiTable table = { .sig = "SPCR", .rev = rev, .oem_id = oem_id, ++ .oem_table_id = oem_table_id }; ++ ++ acpi_table_begin(&table, table_data); ++ /* Interface type */ ++ build_append_int_noprefix(table_data, f->interface_type, 1); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 3); ++ /* Base Address */ ++ build_append_gas(table_data, f->base_addr.id, f->base_addr.width, ++ f->base_addr.offset, f->base_addr.size, ++ f->base_addr.addr); ++ /* Interrupt type */ ++ build_append_int_noprefix(table_data, f->interrupt_type, 1); ++ /* IRQ */ ++ build_append_int_noprefix(table_data, f->pc_interrupt, 1); ++ /* Global System Interrupt */ ++ build_append_int_noprefix(table_data, f->interrupt, 4); ++ /* Baud Rate */ ++ build_append_int_noprefix(table_data, f->baud_rate, 1); ++ /* Parity */ ++ build_append_int_noprefix(table_data, f->parity, 1); ++ /* Stop Bits */ ++ build_append_int_noprefix(table_data, f->stop_bits, 1); ++ /* Flow Control */ ++ build_append_int_noprefix(table_data, f->flow_control, 1); ++ /* Language */ ++ build_append_int_noprefix(table_data, f->language, 1); ++ /* Terminal Type */ ++ build_append_int_noprefix(table_data, f->terminal_type, 1); ++ /* PCI Device ID */ ++ build_append_int_noprefix(table_data, f->pci_device_id, 2); ++ /* PCI Vendor ID */ ++ build_append_int_noprefix(table_data, f->pci_vendor_id, 2); ++ /* PCI Bus Number */ ++ build_append_int_noprefix(table_data, f->pci_bus, 1); ++ /* PCI Device Number */ ++ build_append_int_noprefix(table_data, f->pci_device, 1); ++ /* PCI Function Number */ ++ build_append_int_noprefix(table_data, f->pci_function, 1); ++ /* PCI Flags */ ++ build_append_int_noprefix(table_data, f->pci_flags, 4); ++ /* PCI Segment */ ++ build_append_int_noprefix(table_data, f->pci_segment, 1); ++ /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 4); ++ ++ acpi_table_end(linker, &table); ++} + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 86984b716..076781423 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -717,48 +717,34 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * Rev: 1.07 + */ + static void +-build_spcr(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) ++spcr_setup(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { +- AcpiTable table = { .sig = "SPCR", .rev = 2, .oem_id = vms->oem_id, +- .oem_table_id = vms->oem_table_id }; +- +- acpi_table_begin(&table, table_data); +- +- /* Interface Type */ +- build_append_int_noprefix(table_data, 3, 1); /* ARM PL011 UART */ +- build_append_int_noprefix(table_data, 0, 3); /* Reserved */ +- /* Base Address */ +- build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 32, 0, 3, +- vms->memmap[VIRT_UART].base); +- /* Interrupt Type */ +- build_append_int_noprefix(table_data, +- (1 << 3) /* Bit[3] ARMH GIC interrupt */, 1); +- build_append_int_noprefix(table_data, 0, 1); /* IRQ */ +- /* Global System Interrupt */ +- build_append_int_noprefix(table_data, +- vms->irqmap[VIRT_UART] + ARM_SPI_BASE, 4); +- build_append_int_noprefix(table_data, 3 /* 9600 */, 1); /* Baud Rate */ +- build_append_int_noprefix(table_data, 0 /* No Parity */, 1); /* Parity */ +- /* Stop Bits */ +- build_append_int_noprefix(table_data, 1 /* 1 Stop bit */, 1); +- /* Flow Control */ +- build_append_int_noprefix(table_data, +- (1 << 1) /* RTS/CTS hardware flow control */, 1); +- /* Terminal Type */ +- build_append_int_noprefix(table_data, 0 /* VT100 */, 1); +- build_append_int_noprefix(table_data, 0, 1); /* Language */ +- /* PCI Device ID */ +- build_append_int_noprefix(table_data, 0xffff /* not a PCI device*/, 2); +- /* PCI Vendor ID */ +- build_append_int_noprefix(table_data, 0xffff /* not a PCI device*/, 2); +- build_append_int_noprefix(table_data, 0, 1); /* PCI Bus Number */ +- build_append_int_noprefix(table_data, 0, 1); /* PCI Device Number */ +- build_append_int_noprefix(table_data, 0, 1); /* PCI Function Number */ +- build_append_int_noprefix(table_data, 0, 4); /* PCI Flags */ +- build_append_int_noprefix(table_data, 0, 1); /* PCI Segment */ +- build_append_int_noprefix(table_data, 0, 4); /* Reserved */ ++ AcpiSpcrData serial = { ++ .interface_type = 3, /* ARM PL011 UART */ ++ .base_addr.id = AML_AS_SYSTEM_MEMORY, ++ .base_addr.width = 32, ++ .base_addr.offset = 0, ++ .base_addr.size = 3, ++ .base_addr.addr = vms->memmap[VIRT_UART].base, ++ .interrupt_type = (1 << 3),/* Bit[3] ARMH GIC interrupt*/ ++ .pc_interrupt = 0, /* IRQ */ ++ .interrupt = (vms->irqmap[VIRT_UART] + ARM_SPI_BASE), ++ .baud_rate = 3, /* 9600 */ ++ .parity = 0, /* No Parity */ ++ .stop_bits = 1, /* 1 Stop bit */ ++ .flow_control = 1 << 1, /* RTS/CTS hardware flow control */ ++ .terminal_type = 0, /* VT100 */ ++ .language = 0, /* Language */ ++ .pci_device_id = 0xffff, /* not a PCI device*/ ++ .pci_vendor_id = 0xffff, /* not a PCI device*/ ++ .pci_bus = 0, ++ .pci_device = 0, ++ .pci_function = 0, ++ .pci_flags = 0, ++ .pci_segment = 0, ++ }; + +- acpi_table_end(linker, &table); ++ build_spcr(table_data, linker, &serial, 2, vms->oem_id, vms->oem_table_id); + } + + /* +@@ -1316,7 +1302,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) + } + + acpi_add_table(table_offsets, tables_blob); +- build_spcr(tables_blob, tables->linker, vms); ++ spcr_setup(tables_blob, tables->linker, vms); + + acpi_add_table(table_offsets, tables_blob); + build_dbg2(tables_blob, tables->linker, vms); +diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h +index b1f389fb4..7a8b708cd 100644 +--- a/include/hw/acpi/acpi-defs.h ++++ b/include/hw/acpi/acpi-defs.h +@@ -90,6 +90,39 @@ typedef struct AcpiFadtData { + unsigned *xdsdt_tbl_offset; + } AcpiFadtData; + ++typedef struct AcpiGas { ++ uint8_t id; /* Address space ID */ ++ uint8_t width; /* Register bit width */ ++ uint8_t offset; /* Register bit offset */ ++ uint8_t size; /* Access size */ ++ uint64_t addr; /* Address */ ++} AcpiGas; ++ ++/* SPCR (Serial Port Console Redirection table) */ ++typedef struct AcpiSpcrData { ++ uint8_t interface_type; ++ uint8_t reserved[3]; ++ struct AcpiGas base_addr; ++ uint8_t interrupt_type; ++ uint8_t pc_interrupt; ++ uint32_t interrupt; /* Global system interrupt */ ++ uint8_t baud_rate; ++ uint8_t parity; ++ uint8_t stop_bits; ++ uint8_t flow_control; ++ uint8_t terminal_type; ++ uint8_t language; ++ uint8_t reserved1; ++ uint16_t pci_device_id; /* Must be 0xffff if not PCI device */ ++ uint16_t pci_vendor_id; /* Must be 0xffff if not PCI device */ ++ uint8_t pci_bus; ++ uint8_t pci_device; ++ uint8_t pci_function; ++ uint32_t pci_flags; ++ uint8_t pci_segment; ++ uint32_t reserved2; ++} AcpiSpcrData; ++ + #define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) + #define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) + +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 91f9cbf4f..381ad4a8a 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -506,4 +506,8 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + + void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog, + const char *oem_id, const char *oem_table_id); ++ ++void build_spcr(GArray *table_data, BIOSLinker *linker, ++ const AcpiSpcrData *f, const uint8_t rev, ++ const char *oem_id, const char *oem_table_id); + #endif +-- +2.43.0 + diff --git a/hw-arm-virt-acpi-build.c-Migrate-fw_cfg-creation-to-.patch b/hw-arm-virt-acpi-build.c-Migrate-fw_cfg-creation-to-.patch new file mode 100644 index 0000000000000000000000000000000000000000..10b2a1dae0819112a6915758cd22d99ec8cdf2fb --- /dev/null +++ b/hw-arm-virt-acpi-build.c-Migrate-fw_cfg-creation-to-.patch @@ -0,0 +1,180 @@ +From 948c605badb09d87eb439a711940d932d07cdd1e Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:24 +0800 +Subject: [PATCH 01/18] hw/arm/virt-acpi-build.c: Migrate fw_cfg creation to + common location + +commit 4c7f4f4f0516ad1bad45b011235202f5be6899de upstream + +RISC-V also needs to use the same code to create fw_cfg in DSDT. So, +avoid code duplication by moving the code in arm and riscv to a device +specific file. + +Suggested-by: Igor Mammedov +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Alistair Francis +Reviewed-by: Andrew Jones +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-2-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/arm/virt-acpi-build.c | 19 ++----------------- + hw/nvram/fw_cfg-acpi.c | 23 +++++++++++++++++++++++ + hw/nvram/meson.build | 1 + + hw/riscv/virt-acpi-build.c | 19 ++----------------- + include/hw/nvram/fw_cfg_acpi.h | 15 +++++++++++++++ + 5 files changed, 43 insertions(+), 34 deletions(-) + create mode 100644 hw/nvram/fw_cfg-acpi.c + create mode 100644 include/hw/nvram/fw_cfg_acpi.h + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 5e949671a1..81ca26c052 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -35,7 +35,7 @@ + #include "target/arm/cpu.h" + #include "hw/acpi/acpi-defs.h" + #include "hw/acpi/acpi.h" +-#include "hw/nvram/fw_cfg.h" ++#include "hw/nvram/fw_cfg_acpi.h" + #include "hw/acpi/bios-linker-loader.h" + #include "hw/acpi/aml-build.h" + #include "hw/acpi/utils.h" +@@ -341,21 +341,6 @@ static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, + aml_append(scope, dev); + } + +-static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap) +-{ +- Aml *dev = aml_device("FWCF"); +- aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002"))); +- /* device present, functioning, decoding, not shown in UI */ +- aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); +- aml_append(dev, aml_name_decl("_CCA", aml_int(1))); +- +- Aml *crs = aml_resource_template(); +- aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base, +- fw_cfg_memmap->size, AML_READ_WRITE)); +- aml_append(dev, aml_name_decl("_CRS", crs)); +- aml_append(scope, dev); +-} +- + static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap) + { + Aml *dev, *crs; +@@ -1318,7 +1303,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + if (vmc->acpi_expose_flash) { + acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); + } +- acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]); ++ fw_cfg_acpi_dsdt_add(scope, &memmap[VIRT_FW_CFG]); + acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO], + (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS); + acpi_dsdt_add_pci(scope, memmap, irqmap[VIRT_PCIE] + ARM_SPI_BASE, vms); +diff --git a/hw/nvram/fw_cfg-acpi.c b/hw/nvram/fw_cfg-acpi.c +new file mode 100644 +index 0000000000..4e48baeaa0 +--- /dev/null ++++ b/hw/nvram/fw_cfg-acpi.c +@@ -0,0 +1,23 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* ++ * Add fw_cfg device in DSDT ++ * ++ */ ++ ++#include "hw/nvram/fw_cfg_acpi.h" ++#include "hw/acpi/aml-build.h" ++ ++void fw_cfg_acpi_dsdt_add(Aml *scope, const MemMapEntry *fw_cfg_memmap) ++{ ++ Aml *dev = aml_device("FWCF"); ++ aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002"))); ++ /* device present, functioning, decoding, not shown in UI */ ++ aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); ++ aml_append(dev, aml_name_decl("_CCA", aml_int(1))); ++ ++ Aml *crs = aml_resource_template(); ++ aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base, ++ fw_cfg_memmap->size, AML_READ_WRITE)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ aml_append(scope, dev); ++} +diff --git a/hw/nvram/meson.build b/hw/nvram/meson.build +index 75e415b1a0..4996c72456 100644 +--- a/hw/nvram/meson.build ++++ b/hw/nvram/meson.build +@@ -17,3 +17,4 @@ system_ss.add(when: 'CONFIG_XLNX_EFUSE_ZYNQMP', if_true: files( + system_ss.add(when: 'CONFIG_XLNX_BBRAM', if_true: files('xlnx-bbram.c')) + + specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_nvram.c')) ++specific_ss.add(when: 'CONFIG_ACPI', if_true: files('fw_cfg-acpi.c')) +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 7331248f59..d8772c2821 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -28,6 +28,7 @@ + #include "hw/acpi/acpi.h" + #include "hw/acpi/aml-build.h" + #include "hw/acpi/utils.h" ++#include "hw/nvram/fw_cfg_acpi.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/reset.h" +@@ -97,22 +98,6 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) + } + } + +-static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap) +-{ +- Aml *dev = aml_device("FWCF"); +- aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002"))); +- +- /* device present, functioning, decoding, not shown in UI */ +- aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); +- aml_append(dev, aml_name_decl("_CCA", aml_int(1))); +- +- Aml *crs = aml_resource_template(); +- aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base, +- fw_cfg_memmap->size, AML_READ_WRITE)); +- aml_append(dev, aml_name_decl("_CRS", crs)); +- aml_append(scope, dev); +-} +- + /* RHCT Node[N] starts at offset 56 */ + #define RHCT_NODE_ARRAY_OFFSET 56 + +@@ -226,7 +211,7 @@ static void build_dsdt(GArray *table_data, + scope = aml_scope("\\_SB"); + acpi_dsdt_add_cpus(scope, s); + +- acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]); ++ fw_cfg_acpi_dsdt_add(scope, &memmap[VIRT_FW_CFG]); + + aml_append(dsdt, scope); + +diff --git a/include/hw/nvram/fw_cfg_acpi.h b/include/hw/nvram/fw_cfg_acpi.h +new file mode 100644 +index 0000000000..b6553d86fc +--- /dev/null ++++ b/include/hw/nvram/fw_cfg_acpi.h +@@ -0,0 +1,15 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ ++/* ++ * ACPI support for fw_cfg ++ * ++ */ ++ ++#ifndef FW_CFG_ACPI_H ++#define FW_CFG_ACPI_H ++ ++#include "qemu/osdep.h" ++#include "exec/hwaddr.h" ++ ++void fw_cfg_acpi_dsdt_add(Aml *scope, const MemMapEntry *fw_cfg_memmap); ++ ++#endif +-- +2.33.0 + diff --git a/hw-arm-virt-acpi-build.c-Migrate-virtio-creation-to-.patch b/hw-arm-virt-acpi-build.c-Migrate-virtio-creation-to-.patch new file mode 100644 index 0000000000000000000000000000000000000000..da6031d80a1c638e9177c880b02ebec1dc9a9dad --- /dev/null +++ b/hw-arm-virt-acpi-build.c-Migrate-virtio-creation-to-.patch @@ -0,0 +1,158 @@ +From 4339104293871dba77a50502357ed96962edae2c Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:25 +0800 +Subject: [PATCH 02/18] hw/arm/virt-acpi-build.c: Migrate virtio creation to + common location + +commit 57ba8436282940b59d9a069cc01a601bbc8036c5 upstream + +RISC-V also needs to create the virtio in DSDT in the same way as ARM. +So, instead of duplicating the code, move this function to the device +specific file which is common across architectures. + +Suggested-by: Igor Mammedov +Signed-off-by: Sunil V L +Reviewed-by: Alistair Francis +Reviewed-by: Andrew Jones +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-3-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/arm/virt-acpi-build.c | 32 ++++---------------------------- + hw/virtio/meson.build | 1 + + hw/virtio/virtio-acpi.c | 33 +++++++++++++++++++++++++++++++++ + include/hw/virtio/virtio-acpi.h | 16 ++++++++++++++++ + 4 files changed, 54 insertions(+), 28 deletions(-) + create mode 100644 hw/virtio/virtio-acpi.c + create mode 100644 include/hw/virtio/virtio-acpi.h + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 81ca26c052..b389ef7622 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -59,6 +59,7 @@ + #include "hw/acpi/ghes.h" + #include "hw/acpi/viot.h" + #include "kvm_arm.h" ++#include "hw/virtio/virtio-acpi.h" + + #define ARM_SPI_BASE 32 + +@@ -365,32 +366,6 @@ static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap) + aml_append(scope, dev); + } + +-static void acpi_dsdt_add_virtio(Aml *scope, +- const MemMapEntry *virtio_mmio_memmap, +- uint32_t mmio_irq, int num) +-{ +- hwaddr base = virtio_mmio_memmap->base; +- hwaddr size = virtio_mmio_memmap->size; +- int i; +- +- for (i = 0; i < num; i++) { +- uint32_t irq = mmio_irq + i; +- Aml *dev = aml_device("VR%02u", i); +- aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005"))); +- aml_append(dev, aml_name_decl("_UID", aml_int(i))); +- aml_append(dev, aml_name_decl("_CCA", aml_int(1))); +- +- Aml *crs = aml_resource_template(); +- aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE)); +- aml_append(crs, +- aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH, +- AML_EXCLUSIVE, &irq, 1)); +- aml_append(dev, aml_name_decl("_CRS", crs)); +- aml_append(scope, dev); +- base += size; +- } +-} +- + static void acpi_dsdt_add_hisi_sec(Aml *scope, + const MemMapEntry *virtio_mmio_memmap, + int dev_id) +@@ -1304,8 +1279,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); + } + fw_cfg_acpi_dsdt_add(scope, &memmap[VIRT_FW_CFG]); +- acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO], +- (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS); ++ virtio_acpi_dsdt_add(scope, memmap[VIRT_MMIO].base, memmap[VIRT_MMIO].size, ++ (irqmap[VIRT_MMIO] + ARM_SPI_BASE), ++ 0, NUM_VIRTIO_TRANSPORTS); + acpi_dsdt_add_pci(scope, memmap, irqmap[VIRT_PCIE] + ARM_SPI_BASE, vms); + + if (virtcca_cvm_enabled()) { +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index 67291563d3..7f29622099 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -79,3 +79,4 @@ system_ss.add(when: 'CONFIG_ALL', if_true: files('virtio-stub.c')) + system_ss.add(files('virtio-hmp-cmds.c')) + + specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: specific_virtio_ss) ++system_ss.add(when: 'CONFIG_ACPI', if_true: files('virtio-acpi.c')) +diff --git a/hw/virtio/virtio-acpi.c b/hw/virtio/virtio-acpi.c +new file mode 100644 +index 0000000000..e18cb38bdb +--- /dev/null ++++ b/hw/virtio/virtio-acpi.c +@@ -0,0 +1,33 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* ++ * virtio ACPI Support ++ * ++ */ ++ ++#include "hw/virtio/virtio-acpi.h" ++#include "hw/acpi/aml-build.h" ++ ++void virtio_acpi_dsdt_add(Aml *scope, const hwaddr base, const hwaddr size, ++ uint32_t mmio_irq, long int start_index, int num) ++{ ++ hwaddr virtio_base = base; ++ uint32_t irq = mmio_irq; ++ long int i; ++ ++ for (i = start_index; i < start_index + num; i++) { ++ Aml *dev = aml_device("VR%02u", (unsigned)i); ++ aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005"))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(i))); ++ aml_append(dev, aml_name_decl("_CCA", aml_int(1))); ++ ++ Aml *crs = aml_resource_template(); ++ aml_append(crs, aml_memory32_fixed(virtio_base, size, AML_READ_WRITE)); ++ aml_append(crs, ++ aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH, ++ AML_EXCLUSIVE, &irq, 1)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ aml_append(scope, dev); ++ virtio_base += size; ++ irq++; ++ } ++} +diff --git a/include/hw/virtio/virtio-acpi.h b/include/hw/virtio/virtio-acpi.h +new file mode 100644 +index 0000000000..844e102569 +--- /dev/null ++++ b/include/hw/virtio/virtio-acpi.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: GPL-2.0+ */ ++/* ++ * ACPI support for virtio ++ */ ++ ++#ifndef VIRTIO_ACPI_H ++#define VIRTIO_ACPI_H ++ ++#include "qemu/osdep.h" ++#include "exec/hwaddr.h" ++ ++void virtio_acpi_dsdt_add(Aml *scope, const hwaddr virtio_mmio_base, ++ const hwaddr virtio_mmio_size, uint32_t mmio_irq, ++ long int start_index, int num); ++ ++#endif +-- +2.33.0 + diff --git a/hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch b/hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch deleted file mode 100644 index 3d711678a6bbd365da89b3039509259f9ffe3c2e..0000000000000000000000000000000000000000 --- a/hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch +++ /dev/null @@ -1,25 +0,0 @@ -From fbcb4ffa8648d0aa5be01c11816423a483f245ae Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Tue, 26 May 2020 22:39:23 +0800 -Subject: [PATCH] hw/arm/virt: add missing compat for kvm-no-adjvtime - -Machine compatibility for kvm-no-adjvtime is missed, -let's add it for virt machine 4.0 - -Signed-off-by: Ying Fang - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 4c727939..133d36a4 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2492,6 +2492,7 @@ static void virt_machine_4_0_options(MachineClass *mc) - compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); - vmc->no_ged = true; - mc->auto_enable_numa_with_memhp = false; -+ vmc->kvm_no_adjvtime = true; - } - DEFINE_VIRT_MACHINE(4, 0) - --- -2.23.0 - diff --git a/hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch b/hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f525755130938ba86ac09f32cc83344a81a9d06 --- /dev/null +++ b/hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch @@ -0,0 +1,56 @@ +From d13e44fe048159d48891887169f756ac974d07fb Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 16:49:00 +0800 +Subject: [PATCH 1/4] hw/arm/virt: decouple migrate_hdbss_buffer_size() with + kvm_update_hdbss_cap() + +So that we can move kvm_update_hdbss_cap() to accel/kvm/kvm-all.c + +Signed-of-by: Jason Zeng +--- + migration/ram.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index a8308eb005..ee57da62f6 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2813,7 +2813,7 @@ static void xbzrle_cleanup(void) + } + + #ifdef TARGET_AARCH64 +-static void kvm_update_hdbss_cap(bool enable) ++static void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) + { + KVMState *s = kvm_state; + int size, ret; +@@ -2822,7 +2822,7 @@ static void kvm_update_hdbss_cap(bool enable) + return; + } + +- size = migrate_hdbss_buffer_size(); ++ size = hdbss_buffer_size; + if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { + fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); + return; +@@ -2856,7 +2856,7 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_start/stop used in pairs + */ + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(false); ++ kvm_update_hdbss_cap(false, 0); + #endif + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } +@@ -3262,7 +3262,7 @@ static void ram_init_bitmaps(RAMState *rs) + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(true); ++ kvm_update_hdbss_cap(true, migrate_hdbss_buffer_size()); + #endif + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); +-- +2.33.0 + diff --git a/hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch b/hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a9572dc681fd68af15749c2f7ffb17bb6ee1286 --- /dev/null +++ b/hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch @@ -0,0 +1,54 @@ +From e8587f657fd33f223227a167e94ed69db729e2ac Mon Sep 17 00:00:00 2001 +From: eillon +Date: Sun, 25 May 2025 22:22:58 +0800 +Subject: [PATCH] hw/arm/virt: only support the HDBSS feature in aarch64 + +Only support the HDBSS feature in aarch64 architecture as it +depends on the kvm. +--- + migration/ram.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 6acf518a34..a8308eb005 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2812,6 +2812,7 @@ static void xbzrle_cleanup(void) + XBZRLE_cache_unlock(); + } + ++#ifdef TARGET_AARCH64 + static void kvm_update_hdbss_cap(bool enable) + { + KVMState *s = kvm_state; +@@ -2836,6 +2837,7 @@ static void kvm_update_hdbss_cap(bool enable) + + return; + } ++#endif + + static void ram_save_cleanup(void *opaque) + { +@@ -2853,7 +2855,9 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_stop will assert that + * memory_global_dirty_log_start/stop used in pairs + */ ++#ifdef TARGET_AARCH64 + kvm_update_hdbss_cap(false); ++#endif + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } + } +@@ -3257,7 +3261,9 @@ static void ram_init_bitmaps(RAMState *rs) + ram_list_init_bitmaps(); + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { ++#ifdef TARGET_AARCH64 + kvm_update_hdbss_cap(true); ++#endif + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); + } +-- +2.33.0 + diff --git a/hw-arm-virt-support-the-HDBSS-feature.patch b/hw-arm-virt-support-the-HDBSS-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..874f05e41d49bac6be89a39533e33ba902b2d3f0 --- /dev/null +++ b/hw-arm-virt-support-the-HDBSS-feature.patch @@ -0,0 +1,285 @@ +From e549f32b1a88cb9ffdc4fc88fa818854a918498e Mon Sep 17 00:00:00 2001 +From: eillon +Date: Mon, 14 Apr 2025 22:33:21 +0800 +Subject: [PATCH] hw/arm/virt: support the HDBSS feature + +We use QEMU to enable or disable the HDBSS feature during live +migration. We can use the migration-parameter to control the size +of the HDBSS buffer, such as: + migrate_set_parameter hdbss-buffer-size 3 + info migrate_parameters + +Signed-off-by: eillon +--- + linux-headers/linux/kvm.h | 2 ++ + migration/migration-hmp-cmds.c | 9 +++++++++ + migration/migration.h | 7 +++++++ + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + migration/ram.c | 28 ++++++++++++++++++++++++++++ + qapi/migration.json | 17 ++++++++++++++--- + 7 files changed, 82 insertions(+), 3 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index b94c5fd90f..57d6e12744 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1212,6 +1212,8 @@ struct kvm_ppc_resize_hpt { + /* support request to inject secret to CSV3 guest */ + #define KVM_CAP_HYGON_COCO_EXT_CSV3_INJ_SECRET (1 << 2) + ++#define KVM_CAP_ARM_HW_DIRTY_STATE_TRACK 502 ++ + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + + #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index aac5e7a73a..9857e2c97f 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -409,6 +409,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_SEV_AMD_CERT), + params->sev_amd_cert); ++ ++ assert(params->has_hdbss_buffer_size); ++ monitor_printf(mon, "%s: %u\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_HDBSS_BUFFER_SIZE), ++ params->hdbss_buffer_size); + } + + qapi_free_MigrationParameters(params); +@@ -725,6 +730,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->sev_amd_cert->type = QTYPE_QSTRING; + visit_type_str(v, param, &p->sev_amd_cert->u.s, &err); + break; ++ case MIGRATION_PARAMETER_HDBSS_BUFFER_SIZE: ++ p->has_hdbss_buffer_size = true; ++ visit_type_uint8(v, param, &p->hdbss_buffer_size, &err); ++ break; + default: + assert(0); + } +diff --git a/migration/migration.h b/migration/migration.h +index eeddb7c0bd..4a95f00157 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -48,6 +48,13 @@ struct PostcopyBlocktimeContext; + */ + #define CLEAR_BITMAP_SHIFT_MAX 31 + ++/* ++ * The default HDBSS size. The value ranges [0, 9]. ++ * Set to 0 to disable the HDBSS feature. ++ */ ++#define DEFAULT_HDBSS_BUFFER_SIZE 0 ++#define MAX_HDBSS_BUFFER_SIZE 9 ++ + /* This is an abstraction of a "temp huge page" for postcopy's purpose */ + typedef struct { + /* +diff --git a/migration/options.c b/migration/options.c +index 71e71ea801..71645c8721 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -186,6 +186,9 @@ Property migration_properties[] = { + DEFINE_PROP_STRING("sev-pdh", MigrationState, parameters.sev_pdh), + DEFINE_PROP_STRING("sev-plat-cert", MigrationState, parameters.sev_plat_cert), + DEFINE_PROP_STRING("sev-amd-cert", MigrationState, parameters.sev_amd_cert), ++ DEFINE_PROP_UINT8("hdbss-buffer-size", MigrationState, ++ parameters.hdbss_buffer_size, ++ DEFAULT_HDBSS_BUFFER_SIZE), + + /* Migration capabilities */ + DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), +@@ -853,6 +856,13 @@ MigMode migrate_mode(void) + return s->parameters.mode; + } + ++int migrate_hdbss_buffer_size(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->parameters.hdbss_buffer_size; ++} ++ + int migrate_multifd_channels(void) + { + MigrationState *s = migrate_get_current(); +@@ -1032,6 +1042,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; + params->has_mode = true; + params->mode = s->parameters.mode; ++ params->has_hdbss_buffer_size = true; ++ params->hdbss_buffer_size = s->parameters.hdbss_buffer_size; + + return params; + } +@@ -1069,6 +1081,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_x_vcpu_dirty_limit_period = true; + params->has_vcpu_dirty_limit = true; + params->has_mode = true; ++ params->has_hdbss_buffer_size = true; + + params->sev_pdh = g_strdup(""); + params->sev_plat_cert = g_strdup(""); +@@ -1415,6 +1428,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + assert(params->sev_amd_cert->type == QTYPE_QSTRING); + dest->sev_amd_cert = params->sev_amd_cert->u.s; + } ++ ++ if (params->has_hdbss_buffer_size) { ++ dest->hdbss_buffer_size = params->hdbss_buffer_size; ++ } + } + + static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +@@ -1579,6 +1596,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + assert(params->sev_amd_cert->type == QTYPE_QSTRING); + s->parameters.sev_amd_cert = g_strdup(params->sev_amd_cert->u.s); + } ++ ++ if (params->has_hdbss_buffer_size) { ++ s->parameters.hdbss_buffer_size = params->hdbss_buffer_size; ++ } + } + + void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) +diff --git a/migration/options.h b/migration/options.h +index 9aca5e41ad..987fc81a18 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -85,6 +85,7 @@ uint64_t migrate_max_bandwidth(void); + uint64_t migrate_avail_switchover_bandwidth(void); + uint64_t migrate_max_postcopy_bandwidth(void); + MigMode migrate_mode(void); ++int migrate_hdbss_buffer_size(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +diff --git a/migration/ram.c b/migration/ram.c +index 1f9348fd06..f1ff38cf39 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -39,6 +39,7 @@ + #include "migration-stats.h" + #include "migration/register.h" + #include "migration/misc.h" ++#include "migration/options.h" + #include "qemu-file.h" + #include "postcopy-ram.h" + #include "page_cache.h" +@@ -2790,6 +2791,31 @@ static void xbzrle_cleanup(void) + XBZRLE_cache_unlock(); + } + ++static void kvm_update_hdbss_cap(bool enable) ++{ ++ KVMState *s = kvm_state; ++ int size, ret; ++ ++ if (s == NULL || !kvm_check_extension(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK)) { ++ return; ++ } ++ ++ size = migrate_hdbss_buffer_size(); ++ if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { ++ fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); ++ return; ++ } ++ ++ ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK, 0, ++ enable ? size : 0); ++ if (ret) { ++ fprintf(stderr, "Could not %s KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: %d\n", ++ enable ? "enable" : "disable", ret); ++ } ++ ++ return; ++} ++ + static void ram_save_cleanup(void *opaque) + { + RAMState **rsp = opaque; +@@ -2806,6 +2832,7 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_stop will assert that + * memory_global_dirty_log_start/stop used in pairs + */ ++ kvm_update_hdbss_cap(false); + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } + } +@@ -3209,6 +3236,7 @@ static void ram_init_bitmaps(RAMState *rs) + ram_list_init_bitmaps(); + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { ++ kvm_update_hdbss_cap(true); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); + } +diff --git a/qapi/migration.json b/qapi/migration.json +index 3aed216c3b..f672da5c0d 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -902,6 +902,9 @@ + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in + # base64, or vendor cert filename for hygon (Since 4.2) + # ++# @hdbss-buffer-size: Size of the HDBSS(Hardware Dirty state tracking Structure). ++# Defaults to 0. (Since 8.6) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -937,7 +940,7 @@ + { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, + 'vcpu-dirty-limit', + 'mode', +- 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert'] } ++ 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert', 'hdbss-buffer-size'] } + + ## + # @MigrateSetParameters: +@@ -1106,6 +1109,9 @@ + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in + # base64, or vendor cert filename for hygon (Since 4.2) + # ++# @hdbss-buffer-size: Size of the HDBSS(Hardware Dirty state tracking Structure). ++# Defaults to 0. (Since 8.6) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -1165,7 +1171,8 @@ + '*mode': 'MigMode', + '*sev-pdh': 'StrOrNull', + '*sev-plat-cert': 'StrOrNull', +- '*sev-amd-cert' : 'StrOrNull' } } ++ '*sev-amd-cert' : 'StrOrNull', ++ '*hdbss-buffer-size': 'uint8'} } + + + ## +@@ -1355,6 +1362,9 @@ + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in + # base64, or vendor cert filename for hygon (Since 4.2) + # ++# @hdbss-buffer-size: Size of the HDBSS(Hardware Dirty state tracking Structure). ++# Defaults to 0. (Since 8.6) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -1410,7 +1420,8 @@ + '*mode': 'MigMode', + '*sev-pdh': 'str', + '*sev-plat-cert': 'str', +- '*sev-amd-cert' : 'str'} } ++ '*sev-amd-cert' : 'str', ++ '*hdbss-buffer-size': 'uint8'} } + + ## + # @query-migrate-parameters: +-- +2.33.0 + diff --git a/hw-arm-virt-vTPM-support.patch b/hw-arm-virt-vTPM-support.patch deleted file mode 100644 index cbdc68e62f88981b10ebd459ce101caaf10dba6d..0000000000000000000000000000000000000000 --- a/hw-arm-virt-vTPM-support.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 443ebab9c299b04f020a6873454facb078723141 Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Thu, 13 Aug 2020 20:01:10 +0800 -Subject: [PATCH 15/19] hw/arm/virt: vTPM support - -Let the TPM TIS SYSBUS device be dynamically instantiable -in ARM virt. A device tree node is dynamically created -(TPM via MMIO). - -The TPM Physical Presence interface (PPI) is not supported. - -To run with the swtmp TPM emulator, the qemu command line must -be augmented with: - - -chardev socket,id=chrtpm,path=swtpm-sock - -tpmdev emulator,id=tpm0,chardev=chrtpm - -device tpm-tis-device,tpmdev=tpm0 - -swtpm/libtpms command line example: - -swtpm socket --tpm2 -t -d --tpmstate dir=/tmp/tpm ---ctrl type=unixio,path=swtpm-sock - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Tested-by: Ard Biesheuvel -Acked-by: Ard Biesheuvel -Message-id: 20200305165149.618-7-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - hw/arm/Kconfig | 1 + - hw/arm/sysbus-fdt.c | 33 +++++++++++++++++++++++++++++++++ - hw/arm/virt.c | 7 +++++++ - 3 files changed, 41 insertions(+) - -diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig -index 15e18b0a..06e49f26 100644 ---- a/hw/arm/Kconfig -+++ b/hw/arm/Kconfig -@@ -5,6 +5,7 @@ config ARM_VIRT - imply VFIO_AMD_XGBE - imply VFIO_PLATFORM - imply VFIO_XGMAC -+ imply TPM_TIS_SYSBUS - select A15MPCORE - select ACPI - select ARM_SMMUV3 -diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c -index 57f94e65..c725d325 100644 ---- a/hw/arm/sysbus-fdt.c -+++ b/hw/arm/sysbus-fdt.c -@@ -30,6 +30,7 @@ - #include "hw/arm/sysbus-fdt.h" - #include "qemu/error-report.h" - #include "sysemu/device_tree.h" -+#include "sysemu/tpm.h" - #include "hw/platform-bus.h" - #include "sysemu/sysemu.h" - #include "hw/vfio/vfio-platform.h" -@@ -437,6 +438,37 @@ static bool vfio_platform_match(SysBusDevice *sbdev, - - #endif /* CONFIG_LINUX */ - -+/* -+ * add_tpm_tis_fdt_node: Create a DT node for TPM TIS -+ * -+ * See kernel documentation: -+ * Documentation/devicetree/bindings/security/tpm/tpm_tis_mmio.txt -+ * Optional interrupt for command completion is not exposed -+ */ -+static int add_tpm_tis_fdt_node(SysBusDevice *sbdev, void *opaque) -+{ -+ PlatformBusFDTData *data = opaque; -+ PlatformBusDevice *pbus = data->pbus; -+ void *fdt = data->fdt; -+ const char *parent_node = data->pbus_node_name; -+ char *nodename; -+ uint32_t reg_attr[2]; -+ uint64_t mmio_base; -+ -+ mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0); -+ nodename = g_strdup_printf("%s/tpm_tis@%" PRIx64, parent_node, mmio_base); -+ qemu_fdt_add_subnode(fdt, nodename); -+ -+ qemu_fdt_setprop_string(fdt, nodename, "compatible", "tcg,tpm-tis-mmio"); -+ -+ reg_attr[0] = cpu_to_be32(mmio_base); -+ reg_attr[1] = cpu_to_be32(0x5000); -+ qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, 2 * sizeof(uint32_t)); -+ -+ g_free(nodename); -+ return 0; -+} -+ - static int no_fdt_node(SysBusDevice *sbdev, void *opaque) - { - return 0; -@@ -457,6 +489,7 @@ static const BindingEntry bindings[] = { - TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node), - VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node), - #endif -+ TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node), - TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node), - TYPE_BINDING("", NULL), /* last element */ - }; -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 133d36a4..7afc6c5e 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -47,6 +47,7 @@ - #include "sysemu/numa.h" - #include "sysemu/cpus.h" - #include "sysemu/sysemu.h" -+#include "sysemu/tpm.h" - #include "sysemu/kvm.h" - #include "sysemu/cpus.h" - #include "sysemu/hw_accel.h" -@@ -2368,6 +2369,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM); -+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); - mc->block_default_type = IF_VIRTIO; - mc->no_cdrom = 1; - mc->pci_allow_0_address = true; -@@ -2481,6 +2483,11 @@ type_init(machvirt_machine_init); - - static void virt_machine_4_1_options(MachineClass *mc) - { -+ static GlobalProperty compat[] = { -+ { TYPE_TPM_TIS_SYSBUS, "ppi", "false" }, -+ }; -+ -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - } - DEFINE_VIRT_MACHINE_AS_LATEST(4, 1) - --- -2.23.0 - diff --git a/hw-arm64-add-vcpu-cache-info-support.patch b/hw-arm64-add-vcpu-cache-info-support.patch index 79e1dede39def063dc9d8a4f4b87339bcd39c435..1e09d4fa537e3ed220bfb67e95685c60c64ee016 100644 --- a/hw-arm64-add-vcpu-cache-info-support.patch +++ b/hw-arm64-add-vcpu-cache-info-support.patch @@ -1,63 +1,68 @@ -From 5a0ed254f99ca37498bd81994b906b6984b5ffa9 Mon Sep 17 00:00:00 2001 +From 7d3d37d3af4278aee627952d6a81b63dec6ac62b Mon Sep 17 00:00:00 2001 From: Ying Fang -Date: Wed, 22 Apr 2020 19:25:00 +0800 +Date: Sun, 17 Mar 2024 18:56:09 +0800 Subject: [PATCH] hw/arm64: add vcpu cache info support Support VCPU Cache info by dtb and PPTT table, including L1, L2 and L3 Cache. Signed-off-by: zhanghailiang Signed-off-by: Honghao +Signed-off-by: Ying Fang +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang --- - hw/acpi/aml-build.c | 126 ++++++++++++++++++++++++++++++++++++ - hw/arm/virt.c | 80 ++++++++++++++++++++++- - include/hw/acpi/aml-build.h | 46 +++++++++++++ - 3 files changed, 251 insertions(+), 1 deletion(-) + hw/acpi/aml-build.c | 158 ++++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 72 ++++++++++++++++ + include/hw/acpi/aml-build.h | 47 +++++++++++ + 3 files changed, 277 insertions(+) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index f2c8c28f..74e95005 100644 +index af66bde0f5..2968df5562 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c -@@ -55,6 +55,131 @@ static void build_append_array(GArray *array, GArray *val) - /* - * ACPI 6.2 Processor Properties Topology Table (PPTT) - */ +@@ -1994,6 +1994,163 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + +#ifdef __aarch64__ -+static void build_cache_head(GArray *tbl, uint32_t next_level) ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29.2 Cache Type Structure (Type 1) ++ */ ++static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, ++ uint32_t cache_type) +{ + build_append_byte(tbl, 1); + build_append_byte(tbl, 24); + build_append_int_noprefix(tbl, 0, 2); + build_append_int_noprefix(tbl, 127, 4); + build_append_int_noprefix(tbl, next_level, 4); -+} + -+static void build_cache_tail(GArray *tbl, uint32_t cache_type) -+{ + switch (cache_type) { -+ case ARM_L1D_CACHE: /* L1 dcache info*/ ++ case ARM_L1D_CACHE: /* L1 dcache info */ + build_append_int_noprefix(tbl, ARM_L1DCACHE_SIZE, 4); -+ build_append_int_noprefix(tbl, ARM_L1DCACHE_SET, 4); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SETS, 4); + build_append_byte(tbl, ARM_L1DCACHE_ASSOCIATIVITY); + build_append_byte(tbl, ARM_L1DCACHE_ATTRIBUTES); + build_append_int_noprefix(tbl, ARM_L1DCACHE_LINE_SIZE, 2); + break; -+ case ARM_L1I_CACHE: /* L1 icache info*/ ++ case ARM_L1I_CACHE: /* L1 icache info */ + build_append_int_noprefix(tbl, ARM_L1ICACHE_SIZE, 4); -+ build_append_int_noprefix(tbl, ARM_L1ICACHE_SET, 4); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SETS, 4); + build_append_byte(tbl, ARM_L1ICACHE_ASSOCIATIVITY); + build_append_byte(tbl, ARM_L1ICACHE_ATTRIBUTES); + build_append_int_noprefix(tbl, ARM_L1ICACHE_LINE_SIZE, 2); + break; -+ case ARM_L2_CACHE: /* L2 cache info*/ ++ case ARM_L2_CACHE: /* L2 cache info */ + build_append_int_noprefix(tbl, ARM_L2CACHE_SIZE, 4); -+ build_append_int_noprefix(tbl, ARM_L2CACHE_SET, 4); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SETS, 4); + build_append_byte(tbl, ARM_L2CACHE_ASSOCIATIVITY); + build_append_byte(tbl, ARM_L2CACHE_ATTRIBUTES); + build_append_int_noprefix(tbl, ARM_L2CACHE_LINE_SIZE, 2); + break; -+ case ARM_L3_CACHE: /* L3 cache info*/ ++ case ARM_L3_CACHE: /* L3 cache info */ + build_append_int_noprefix(tbl, ARM_L3CACHE_SIZE, 4); -+ build_append_int_noprefix(tbl, ARM_L3CACHE_SET, 4); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SETS, 4); + build_append_byte(tbl, ARM_L3CACHE_ASSOCIATIVITY); + build_append_byte(tbl, ARM_L3CACHE_ATTRIBUTES); + build_append_int_noprefix(tbl, ARM_L3CACHE_LINE_SIZE, 2); @@ -68,208 +73,230 @@ index f2c8c28f..74e95005 100644 + build_append_byte(tbl, 0); + build_append_byte(tbl, 0); + build_append_int_noprefix(tbl, 0, 2); -+ break; + } +} + -+static void build_cache_hierarchy(GArray *tbl, -+ uint32_t next_level, uint32_t cache_type) ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29 Processor Properties Topology Table (PPTT) ++ */ ++void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++ const char *oem_id, const char *oem_table_id) +{ -+ build_cache_head(tbl, next_level); -+ build_cache_tail(tbl, cache_type); -+} ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ GQueue *list = g_queue_new(); ++ guint pptt_start = table_data->len; ++ guint parent_offset; ++ guint length, i; ++ int uid = 0; ++ int socket; ++ AcpiTable table = { .sig = "PPTT", .rev = 2, ++ .oem_id = oem_id, .oem_table_id = oem_table_id }; + -+static void build_arm_socket_hierarchy(GArray *tbl, -+ uint32_t offset, uint32_t id) -+{ -+ build_append_byte(tbl, 0); /* Type 0 - processor */ -+ build_append_byte(tbl, 24); /* Length, add private resources */ -+ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ -+ build_append_int_noprefix(tbl, 1, 4); /* Processor boundary and id invalid*/ -+ build_append_int_noprefix(tbl, 0, 4); -+ build_append_int_noprefix(tbl, id, 4); -+ build_append_int_noprefix(tbl, 1, 4); /* Num private resources */ -+ build_append_int_noprefix(tbl, offset, 4); -+} ++ acpi_table_begin(&table, table_data); + -+static void build_arm_cpu_hierarchy(GArray *tbl, -+ struct offset_status *offset, uint32_t id) -+{ -+ if (!offset) { -+ return; ++ for (socket = 0; socket < ms->smp.sockets; socket++) { ++ uint32_t l3_cache_offset = table_data->len - pptt_start; ++ build_cache_hierarchy_node(table_data, 0, ARM_L3_CACHE); ++ ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ /* ++ * Physical package - represents the boundary ++ * of a physical package ++ */ ++ (1 << 0), ++ 0, socket, &l3_cache_offset, 1); + } -+ build_append_byte(tbl, 0); /* Type 0 - processor */ -+ build_append_byte(tbl, 32); /* Length, add private resources */ -+ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ -+ build_append_int_noprefix(tbl, 2, 4); /* Valid id*/ -+ build_append_int_noprefix(tbl, offset->parent, 4); -+ build_append_int_noprefix(tbl, id, 4); -+ build_append_int_noprefix(tbl, 3, 4); /* Num private resources */ -+ build_append_int_noprefix(tbl, offset->l1d_offset, 4); -+ build_append_int_noprefix(tbl, offset->l1i_offset, 4); -+ build_append_int_noprefix(tbl, offset->l2_offset, 4); -+} + -+void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) -+{ -+ int pptt_start = table_data->len; -+ int uid = 0, cpus = 0, socket; -+ struct offset_status offset; -+ const MachineState *ms = MACHINE(qdev_get_machine()); -+ unsigned int smp_cores = ms->smp.cores; ++ if (mc->smp_props.clusters_supported) { ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int cluster; + -+ acpi_data_push(table_data, sizeof(AcpiTableHeader)); ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (cluster = 0; cluster < ms->smp.clusters; cluster++) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, cluster, NULL, 0); ++ } ++ } ++ } + -+ for (socket = 0; cpus < possible_cpus; socket++) { ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { + int core; -+ uint32_t l3_offset = table_data->len - pptt_start; -+ build_cache_hierarchy(table_data, 0, ARM_L3_CACHE); -+ -+ offset.parent = table_data->len - pptt_start; -+ build_arm_socket_hierarchy(table_data, l3_offset, socket); -+ -+ for (core = 0; core < smp_cores; core++) { -+ offset.l2_offset = table_data->len - pptt_start; -+ build_cache_hierarchy(table_data, 0, ARM_L2_CACHE); -+ offset.l1d_offset = table_data->len - pptt_start; -+ build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1D_CACHE); -+ offset.l1i_offset = table_data->len - pptt_start; -+ build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1I_CACHE); -+ build_arm_cpu_hierarchy(table_data, &offset, uid++); -+ cpus++; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (core = 0; core < ms->smp.cores; core++) { ++ uint32_t priv_rsrc[3] = {}; ++ priv_rsrc[0] = table_data->len - pptt_start; /* L2 cache offset */ ++ build_cache_hierarchy_node(table_data, 0, ARM_L2_CACHE); ++ ++ priv_rsrc[1] = table_data->len - pptt_start; /* L1 dcache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1D_CACHE); ++ ++ priv_rsrc[2] = table_data->len - pptt_start; /* L1 icache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1I_CACHE); ++ ++ if (ms->smp.threads > 1) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, core, priv_rsrc, 3); ++ } else { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, priv_rsrc, 3); ++ } ++ } ++ } ++ ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int thread; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (thread = 0; thread < ms->smp.threads; thread++) { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 2) | /* Processor is a Thread */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, NULL, 0); + } + } + -+ build_header(linker, table_data, -+ (void *)(table_data->data + pptt_start), "PPTT", -+ table_data->len - pptt_start, 1, NULL, NULL); ++ g_queue_free(list); ++ acpi_table_end(linker, &table); +} + +#else - static void build_cpu_hierarchy(GArray *tbl, uint32_t flags, - uint32_t parent, uint32_t id) - { -@@ -103,6 +228,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) - (void *)(table_data->data + pptt_start), "PPTT", - table_data->len - pptt_start, 1, NULL, NULL); + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +@@ -2069,6 +2226,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + + acpi_table_end(linker, &table); } +#endif - - #define ACPI_NAMESEG_LEN 4 - + + /* build rev1/rev3/rev5.1/rev6.0 FADT */ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 272455bc..9669c70b 100644 +index 500a15aa5b..b82bd1b8c8 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -308,6 +308,81 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags); +@@ -379,6 +379,72 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) + INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags); } - + +static void fdt_add_l3cache_nodes(const VirtMachineState *vms) +{ + int i; -+ const MachineState *ms = MACHINE(qdev_get_machine()); -+ unsigned int smp_cores = ms->smp.cores; -+ unsigned int sockets = vms->smp_cpus / smp_cores; -+ -+ /* If current is not equal to max */ -+ if (vms->smp_cpus % smp_cores) -+ sockets++; ++ const MachineState *ms = MACHINE(vms); ++ int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads; ++ int sockets = (ms->smp.cpus + cpus_per_socket - 1) / cpus_per_socket; + + for (i = 0; i < sockets; i++) { + char *nodename = g_strdup_printf("/cpus/l3-cache%d", i); -+ qemu_fdt_add_subnode(vms->fdt, nodename); -+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cache"); -+ qemu_fdt_setprop_string(vms->fdt, nodename, "cache-unified", "true"); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-level", 3); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-size", 0x2000000); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-line-size", 128); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-sets", 2048); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", -+ qemu_fdt_alloc_phandle(vms->fdt)); ++ ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "cache-unified", "true"); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-level", 3); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x2000000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 128); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 2048); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(ms->fdt)); + g_free(nodename); + } +} + -+ +static void fdt_add_l2cache_nodes(const VirtMachineState *vms) +{ -+ int i, j; -+ const MachineState *ms = MACHINE(qdev_get_machine()); -+ unsigned int smp_cores = ms->smp.cores; -+ signed int sockets = vms->smp_cpus / smp_cores; ++ const MachineState *ms = MACHINE(vms); ++ int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads; ++ int cpu; + -+ /* If current is not equal to max */ -+ if (vms->smp_cpus % smp_cores) -+ sockets++; ++ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { ++ char *next_path = g_strdup_printf("/cpus/l3-cache%d", ++ cpu / cpus_per_socket); ++ char *nodename = g_strdup_printf("/cpus/l2-cache%d", cpu); ++ ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x80000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 1024); ++ qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", ++ next_path); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(ms->fdt)); + -+ for (i = 0; i < sockets; i++) { -+ char *next_path = g_strdup_printf("/cpus/l3-cache%d", i); -+ for (j = 0; j < smp_cores; j++) { -+ char *nodename = g_strdup_printf("/cpus/l2-cache%d", -+ i * smp_cores + j); -+ qemu_fdt_add_subnode(vms->fdt, nodename); -+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cache"); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-size", 0x80000); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-line-size", 64); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-sets", 1024); -+ qemu_fdt_setprop_phandle(vms->fdt, nodename, -+ "next-level-cache", next_path); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", -+ qemu_fdt_alloc_phandle(vms->fdt)); -+ g_free(nodename); -+ } + g_free(next_path); ++ g_free(nodename); + } +} + +static void fdt_add_l1cache_prop(const VirtMachineState *vms, -+ char *nodename, int cpu) ++ char *nodename, int cpu) +{ ++ const MachineState *ms = MACHINE(vms); + char *cachename = g_strdup_printf("/cpus/l2-cache%d", cpu); + -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "d-cache-size", 0x10000); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "d-cache-line-size", 64); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "d-cache-sets", 256); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "i-cache-size", 0x10000); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "i-cache-line-size", 64); -+ qemu_fdt_setprop_cell(vms->fdt, nodename, "i-cache-sets", 256); -+ qemu_fdt_setprop_phandle(vms->fdt, nodename, -+ "next-level-cache", cachename); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-sets", 256); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-sets", 256); ++ qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", ++ cachename); + g_free(cachename); +} -+ + static void fdt_add_cpu_nodes(const VirtMachineState *vms) { int cpu; -@@ -341,6 +416,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) - qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells); - qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0); - -+ fdt_add_l3cache_nodes(vms); -+ fdt_add_l2cache_nodes(vms); +@@ -413,6 +479,11 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", addr_cells); + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0); + ++ if (!vmc->no_cpu_topology) { ++ fdt_add_l3cache_nodes(vms); ++ fdt_add_l2cache_nodes(vms); ++ } + - for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) { + for (cpu = smp_cpus - 1; cpu >= 0; cpu--) { char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); -@@ -369,7 +447,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) - qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", - ms->possible_cpus->cpus[cs->cpu_index].props.node_id); +@@ -442,6 +513,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + } + + if (!vmc->no_cpu_topology) { ++ fdt_add_l1cache_prop(vms, nodename, cpu); + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(ms->fdt)); } -- -+ fdt_add_l1cache_prop(vms, nodename, cpu); - qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", - qemu_fdt_alloc_phandle(vms->fdt)); - diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h -index bfb0b100..0be3453a 100644 +index ff2a310270..84ded2ecd3 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h -@@ -223,6 +223,52 @@ struct AcpiBuildTables { +@@ -221,6 +221,53 @@ struct AcpiBuildTables { BIOSLinker *linker; } AcpiBuildTables; - + +#ifdef __aarch64__ +/* Definitions of the hardcoded cache info*/ + @@ -282,28 +309,28 @@ index bfb0b100..0be3453a 100644 + +/* L1 data cache: */ +#define ARM_L1DCACHE_SIZE 65536 -+#define ARM_L1DCACHE_SET 256 ++#define ARM_L1DCACHE_SETS 256 +#define ARM_L1DCACHE_ASSOCIATIVITY 4 +#define ARM_L1DCACHE_ATTRIBUTES 2 +#define ARM_L1DCACHE_LINE_SIZE 64 + +/* L1 instruction cache: */ +#define ARM_L1ICACHE_SIZE 65536 -+#define ARM_L1ICACHE_SET 256 ++#define ARM_L1ICACHE_SETS 256 +#define ARM_L1ICACHE_ASSOCIATIVITY 4 +#define ARM_L1ICACHE_ATTRIBUTES 4 +#define ARM_L1ICACHE_LINE_SIZE 64 + +/* Level 2 unified cache: */ +#define ARM_L2CACHE_SIZE 524288 -+#define ARM_L2CACHE_SET 1024 ++#define ARM_L2CACHE_SETS 1024 +#define ARM_L2CACHE_ASSOCIATIVITY 8 +#define ARM_L2CACHE_ATTRIBUTES 10 +#define ARM_L2CACHE_LINE_SIZE 64 + +/* Level 3 unified cache: */ +#define ARM_L3CACHE_SIZE 33554432 -+#define ARM_L3CACHE_SET 2048 ++#define ARM_L3CACHE_SETS 2048 +#define ARM_L3CACHE_ASSOCIATIVITY 15 +#define ARM_L3CACHE_ATTRIBUTES 10 +#define ARM_L3CACHE_LINE_SIZE 128 @@ -316,8 +343,10 @@ index bfb0b100..0be3453a 100644 +}; + +#endif - /** - * init_aml_allocator: - * --- -2.23.0 ++ + typedef + struct CrsRangeEntry { + uint64_t base; +-- +2.27.0 + diff --git a/hw-audio-cs4231a-fix-assertion-error-in-isa_bus_get_.patch b/hw-audio-cs4231a-fix-assertion-error-in-isa_bus_get_.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bb243fa0424755ccd70ab2ace9e4bd907e93363 --- /dev/null +++ b/hw-audio-cs4231a-fix-assertion-error-in-isa_bus_get_.patch @@ -0,0 +1,37 @@ +From 3e4513fcbbb00aff1d8147cee3b93c2bbf3a68fb Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 14 Jun 2025 17:09:25 +0800 +Subject: [PATCH] hw/audio/cs4231a: fix assertion error in isa_bus_get_irq + + This patch fixes an assertion error in isa_bus_get_irq() in + /hw/isa/isa-bus.c by adding a constraint to the irq property. + Patch v1 misused ISA_NUM_IRQS, pls ignore that. + + Signed-off-by: Zheng Huang + Link: https://lore.kernel.org/r/6d228069-e38f-4c46-813f-edcccc5c47e4@gmail.com + Signed-off-by: Paolo Bonzini + +Signed-off-by: dinglimin +--- + hw/audio/cs4231a.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c +index 3aa105748d..88dfd0bb7f 100644 +--- a/hw/audio/cs4231a.c ++++ b/hw/audio/cs4231a.c +@@ -682,6 +682,11 @@ static void cs4231a_realizefn (DeviceState *dev, Error **errp) + return; + } + ++ if (s->irq >= ISA_NUM_IRQS) { ++ error_setg(errp, "Invalid IRQ %d (max %d)", s->irq, ISA_NUM_IRQS - 1); ++ return; ++ } ++ + s->pic = isa_bus_get_irq(bus, s->irq); + k = ISADMA_GET_CLASS(s->isa_dma); + k->register_channel(s->isa_dma, s->dma, cs_dma_read, s); +-- +2.33.0 + diff --git a/hw-audio-hda-fix-memory-leak-on-audio-setup.patch b/hw-audio-hda-fix-memory-leak-on-audio-setup.patch new file mode 100644 index 0000000000000000000000000000000000000000..de45fe979ee4038291664320ade45278ba45c2d9 --- /dev/null +++ b/hw-audio-hda-fix-memory-leak-on-audio-setup.patch @@ -0,0 +1,85 @@ +From ecca2052693cc2a91459ac418bface2f1e635c88 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 14 Nov 2024 13:53:18 +0100 +Subject: [PATCH] hw/audio/hda: fix memory leak on audio setup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When SET_STREAM_FORMAT is called, the st->buft timer is overwritten, thus +causing a memory leak. This was originally fixed in commit 816139ae6a5 +("hw/audio/hda: fix memory leak on audio setup", 2024-11-14) but that +caused the audio to break in SPICE. + +Fortunately, a simpler fix is possible. The timer only needs to be +reset, because the callback is always the same (st->output is set at +realize time in hda_audio_init); call to timer_new_ns overkill. Replace +it with timer_del and only initialize the timer once; for simplicity, +do it even if use_timer is false. + +An even simpler fix would be to free the old time in hda_audio_setup(). +However, it seems better to place the initialization of the timer close +to that of st->ouput. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Reviewed-by: Michael Tokarev +Message-ID: <20241114125318.1707590-3-pbonzini@redhat.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 626b39006d2f9b1378a04cb88a2187bb852cb055) +Signed-off-by: zhujun2 +--- + hw/audio/hda-codec.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/hw/audio/hda-codec.c b/hw/audio/hda-codec.c +index 19f401cabe..ac908e56c6 100644 +--- a/hw/audio/hda-codec.c ++++ b/hw/audio/hda-codec.c +@@ -487,8 +487,7 @@ static void hda_audio_setup(HDAAudioStream *st) + if (st->output) { + if (use_timer) { + cb = hda_audio_output_cb; +- st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, +- hda_audio_output_timer, st); ++ timer_del(st->buft); + } else { + cb = hda_audio_compat_output_cb; + } +@@ -497,8 +496,7 @@ static void hda_audio_setup(HDAAudioStream *st) + } else { + if (use_timer) { + cb = hda_audio_input_cb; +- st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, +- hda_audio_input_timer, st); ++ timer_del(st->buft); + } else { + cb = hda_audio_compat_input_cb; + } +@@ -726,8 +724,12 @@ static void hda_audio_init(HDACodecDevice *hda, + st->gain_right = QEMU_HDA_AMP_STEPS; + st->compat_bpos = sizeof(st->compat_buf); + st->output = true; ++ st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, ++ hda_audio_output_timer, st); + } else { + st->output = false; ++ st->buft = timer_new_ns(QEMU_CLOCK_VIRTUAL, ++ hda_audio_input_timer, st); + } + st->format = AC_FMT_TYPE_PCM | AC_FMT_BITS_16 | + (1 << AC_FMT_CHAN_SHIFT); +@@ -750,9 +752,7 @@ static void hda_audio_exit(HDACodecDevice *hda) + if (st->node == NULL) { + continue; + } +- if (a->use_timer) { +- timer_free(st->buft); +- } ++ timer_free(st->buft); + if (st->output) { + AUD_close_out(&a->card, st->voice.out); + } else { +-- +2.41.0.windows.1 + diff --git a/hw-audio-hda-free-timer-on-exit.patch b/hw-audio-hda-free-timer-on-exit.patch new file mode 100644 index 0000000000000000000000000000000000000000..e08e69c00039af896880e40513ff25a790cbbbae --- /dev/null +++ b/hw-audio-hda-free-timer-on-exit.patch @@ -0,0 +1,36 @@ +From 28bf94c86d3914b8b517dae483d1d69b3afabacc Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Tue, 5 Nov 2024 07:03:48 -0500 +Subject: [PATCH] hw/audio/hda: free timer on exit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from f27206ceedbe2efae37c8d143c5eb2db05251508 + +Fixes: 280c1e1cd ("audio/hda: create millisecond timers that handle IO") + +Signed-off-by: Marc-André Lureau +Reviewed-by: Akihiko Odaki +Message-ID: <20241008125028.1177932-2-marcandre.lureau@redhat.com> +Signed-off-by: qihao_yewu +--- + hw/audio/hda-codec.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/audio/hda-codec.c b/hw/audio/hda-codec.c +index 0bc20d49f6..19f401cabe 100644 +--- a/hw/audio/hda-codec.c ++++ b/hw/audio/hda-codec.c +@@ -751,7 +751,7 @@ static void hda_audio_exit(HDACodecDevice *hda) + continue; + } + if (a->use_timer) { +- timer_del(st->buft); ++ timer_free(st->buft); + } + if (st->output) { + AUD_close_out(&a->card, st->voice.out); +-- +2.41.0.windows.1 + diff --git a/hw-audio-virtio-snd-Always-use-little-endian-audio-f.patch b/hw-audio-virtio-snd-Always-use-little-endian-audio-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..0932b2df97c33f3501abdf3a248d7b16d2c9ea52 --- /dev/null +++ b/hw-audio-virtio-snd-Always-use-little-endian-audio-f.patch @@ -0,0 +1,42 @@ +From 482808a35957c10d9eb4264492a8e11a2ba749c1 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Fri, 22 Nov 2024 17:49:38 +0800 +Subject: [PATCH] hw/audio/virtio-snd: Always use little endian audio format +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from a276ec8e2632c9015d0f9b4e47194e4e91dfa8bb + +The VIRTIO Sound Device conforms with the Virtio spec v1.2, +thus only use little endianness. + +Remove the suspicious target_words_bigendian() noticed during +code review. + +Cc: qemu-stable@nongnu.org +Fixes: eb9ad377bb ("virtio-sound: handle control messages and streams") +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240422211830.25606-1-philmd@linaro.org> +Signed-off-by: gubin +--- + hw/audio/virtio-snd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c +index 817fdcd910..9f7a69e408 100644 +--- a/hw/audio/virtio-snd.c ++++ b/hw/audio/virtio-snd.c +@@ -377,7 +377,7 @@ static void virtio_snd_get_qemu_audsettings(audsettings *as, + as->nchannels = MIN(AUDIO_MAX_CHANNELS, params->channels); + as->fmt = virtio_snd_get_qemu_format(params->format); + as->freq = virtio_snd_get_qemu_freq(params->rate); +- as->endianness = target_words_bigendian() ? 1 : 0; ++ as->endianness = 0; /* Conforming to VIRTIO 1.0: always little endian. */ + } + + /* +-- +2.41.0.windows.1 + diff --git a/hw-audio-virtio-sound-fix-heap-buffer-overflow.patch b/hw-audio-virtio-sound-fix-heap-buffer-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..a5ccb91b2f64c91043395aac99dc2647b7cf5d86 --- /dev/null +++ b/hw-audio-virtio-sound-fix-heap-buffer-overflow.patch @@ -0,0 +1,86 @@ +From 0981edabf57b5728211deeca459fb15927e7cc36 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Volker=20R=C3=BCmelin?= +Date: Sun, 1 Sep 2024 15:01:12 +0200 +Subject: [PATCH] hw/audio/virtio-sound: fix heap buffer overflow +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently, the guest may write to the device configuration space, +whereas the virtio sound device specification in chapter 5.14.4 +clearly states that the fields in the device configuration space +are driver-read-only. + +Remove the set_config function from the virtio_snd class. + +This also prevents a heap buffer overflow. See QEMU issue #2296. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2296 +Signed-off-by: Volker Rümelin +Message-Id: <20240901130112.8242-1-vr_qemu@t-online.de> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7fc6611cad3e9627b23ce83e550b668abba6c886) +Signed-off-by: zhujun2 +--- + hw/audio/trace-events | 1 - + hw/audio/virtio-snd.c | 24 ------------------------ + 2 files changed, 25 deletions(-) + +diff --git a/hw/audio/trace-events b/hw/audio/trace-events +index b1870ff224..b8ef572767 100644 +--- a/hw/audio/trace-events ++++ b/hw/audio/trace-events +@@ -41,7 +41,6 @@ asc_update_irq(int irq, int a, int b) "set IRQ to %d (A: 0x%x B: 0x%x)" + + #virtio-snd.c + virtio_snd_get_config(void *vdev, uint32_t jacks, uint32_t streams, uint32_t chmaps) "snd %p: get_config jacks=%"PRIu32" streams=%"PRIu32" chmaps=%"PRIu32"" +-virtio_snd_set_config(void *vdev, uint32_t jacks, uint32_t new_jacks, uint32_t streams, uint32_t new_streams, uint32_t chmaps, uint32_t new_chmaps) "snd %p: set_config jacks from %"PRIu32"->%"PRIu32", streams from %"PRIu32"->%"PRIu32", chmaps from %"PRIu32"->%"PRIu32 + virtio_snd_get_features(void *vdev, uint64_t features) "snd %p: get_features 0x%"PRIx64 + virtio_snd_vm_state_running(void) "vm state running" + virtio_snd_vm_state_stopped(void) "vm state stopped" +diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c +index 137fa77a01..cb7049abb3 100644 +--- a/hw/audio/virtio-snd.c ++++ b/hw/audio/virtio-snd.c +@@ -107,29 +107,6 @@ virtio_snd_get_config(VirtIODevice *vdev, uint8_t *config) + + } + +-static void +-virtio_snd_set_config(VirtIODevice *vdev, const uint8_t *config) +-{ +- VirtIOSound *s = VIRTIO_SND(vdev); +- const virtio_snd_config *sndconfig = +- (const virtio_snd_config *)config; +- +- +- trace_virtio_snd_set_config(vdev, +- s->snd_conf.jacks, +- sndconfig->jacks, +- s->snd_conf.streams, +- sndconfig->streams, +- s->snd_conf.chmaps, +- sndconfig->chmaps); +- +- memcpy(&s->snd_conf, sndconfig, sizeof(virtio_snd_config)); +- le32_to_cpus(&s->snd_conf.jacks); +- le32_to_cpus(&s->snd_conf.streams); +- le32_to_cpus(&s->snd_conf.chmaps); +- +-} +- + static void + virtio_snd_pcm_buffer_free(VirtIOSoundPCMBuffer *buffer) + { +@@ -1399,7 +1376,6 @@ static void virtio_snd_class_init(ObjectClass *klass, void *data) + vdc->realize = virtio_snd_realize; + vdc->unrealize = virtio_snd_unrealize; + vdc->get_config = virtio_snd_get_config; +- vdc->set_config = virtio_snd_set_config; + vdc->get_features = get_features; + vdc->reset = virtio_snd_reset; + vdc->legacy_features = 0; +-- +2.41.0.windows.1 + diff --git a/hw-block-fix-uint32-overflow.patch b/hw-block-fix-uint32-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..09fa41f84f817faa2bd6075bc5666d29e8e1e75f --- /dev/null +++ b/hw-block-fix-uint32-overflow.patch @@ -0,0 +1,36 @@ +From fc5b9cb39257527568911f65c64d80e23f9f6ae3 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Wed, 18 Sep 2024 10:32:42 -0400 +Subject: [PATCH] hw/block: fix uint32 overflow + +cheery-pick from 89cd6254b80784a1b3f574407192493ef92fe65f + +The product bs->bl.zone_size * (bs->bl.nr_zones - 1) may overflow +uint32. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Signed-off-by: Dmitry Frolov +Message-id: 20240917080356.270576-2-frolov@swemel.ru +Signed-off-by: Stefan Hajnoczi +Signed-off-by: qihao_yewu +--- + hw/block/virtio-blk.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 2eb096a6dc..beedc0cf5f 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -860,7 +860,7 @@ static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) + } else { + if (bs->bl.zone_size > capacity - offset) { + /* The zoned device allows the last smaller zone. */ +- len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1); ++ len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1ull); + } else { + len = bs->bl.zone_size; + } +-- +2.41.0.windows.1 + diff --git a/hw-block-nvme-fix-pci-doorbell-size-calculation.patch b/hw-block-nvme-fix-pci-doorbell-size-calculation.patch deleted file mode 100644 index f0aa09670e471a344c220ae38b8f5ba43b263eaf..0000000000000000000000000000000000000000 --- a/hw-block-nvme-fix-pci-doorbell-size-calculation.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 1aa42c9269c762ad1b7efa41e92f734b093dce1c Mon Sep 17 00:00:00 2001 -From: Klaus Jensen -Date: Tue, 9 Jun 2020 21:03:12 +0200 -Subject: [PATCH 10/11] hw/block/nvme: fix pci doorbell size calculation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The size of the BAR is 0x1000 (main registers) + 8 bytes for each -queue. Currently, the size of the BAR is calculated like so: - - n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); - -Since the 'num_queues' parameter already accounts for the admin queue, -this should in any case not need to be incremented by one. Also, the -size should be initialized to (0x1000). - - n->reg_size = pow2ceil(0x1000 + 2 * n->num_queues * 4); - -This, with the default value of num_queues (64), we will set aside room -for 1 admin queue and 63 I/O queues (4 bytes per doorbell, 2 doorbells -per queue). - -Signed-off-by: Klaus Jensen -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Maxim Levitsky -Reviewed-by: Keith Busch -Message-Id: <20200609190333.59390-2-its@irrelevant.dk> -Signed-off-by: Kevin Wolf -Signed-off-by: BiaoXiang Ye ---- - hw/block/nvme.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/block/nvme.c b/hw/block/nvme.c -index 417068d8..edac2f1d 100644 ---- a/hw/block/nvme.c -+++ b/hw/block/nvme.c -@@ -42,6 +42,9 @@ - #include "trace.h" - #include "nvme.h" - -+#define NVME_REG_SIZE 0x1000 -+#define NVME_DB_SIZE 4 -+ - #define NVME_GUEST_ERR(trace, fmt, ...) \ - do { \ - (trace_##trace)(__VA_ARGS__); \ -@@ -1348,7 +1351,9 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) - pcie_endpoint_cap_init(pci_dev, 0x80); - - n->num_namespaces = 1; -- n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); -+ -+ /* num_queues is really number of pairs, so each has two doorbells */ -+ n->reg_size = pow2ceil(NVME_REG_SIZE + 2 * n->num_queues * NVME_DB_SIZE); - n->ns_size = bs_size / (uint64_t)n->num_namespaces; - - n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); --- -2.27.0.dirty - diff --git a/hw-block-nvme-fix-pin-based-interrupt-behavior.patch b/hw-block-nvme-fix-pin-based-interrupt-behavior.patch deleted file mode 100644 index 1fe1213d998869c0f87eabd5d75fc62c3750f06b..0000000000000000000000000000000000000000 --- a/hw-block-nvme-fix-pin-based-interrupt-behavior.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 74ef18c90684f0ae18aef071b9e11a5e8796177b Mon Sep 17 00:00:00 2001 -From: alexchen -Date: Tue, 8 Sep 2020 11:17:20 +0000 -Subject: [PATCH] hw/block/nvme: fix pin-based interrupt behavior - -First, since the device only supports MSI-X or pin-based interrupt, if -MSI-X is not enabled, it should not accept interrupt vectors different -from 0 when creating completion queues. - -Secondly, the irq_status NvmeCtrl member is meant to be compared to the -INTMS register, so it should only be 32 bits wide. And it is really only -useful when used with multi-message MSI. - -Third, since we do not force a 1-to-1 correspondence between cqid and -interrupt vector, the irq_status register should not have bits set -according to cqid, but according to the associated interrupt vector. - -Fix these issues, but keep irq_status available so we can easily support -multi-message MSI down the line. - -Fixes: 5e9aa92eb1a5 ("hw/block: Fix pin-based interrupt behaviour of NVMe") -Cc: "Michael S. Tsirkin" -Cc: Marcel Apfelbaum -Signed-off-by: Klaus Jensen -Reviewed-by: Keith Busch -Message-Id: <20200609190333.59390-8-its@irrelevant.dk> -Signed-off-by: Kevin Wolf -Signed-off-by: BiaoXiang Ye -Signed-off-by: Zhenyu Ye ---- - hw/block/nvme.c | 12 ++++++++---- - hw/block/nvme.h | 2 +- - 2 files changed, 9 insertions(+), 5 deletions(-) - -diff --git a/hw/block/nvme.c b/hw/block/nvme.c -index 36d6a8bb..e35c2e10 100644 ---- a/hw/block/nvme.c -+++ b/hw/block/nvme.c -@@ -115,8 +115,8 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) - msix_notify(&(n->parent_obj), cq->vector); - } else { - trace_nvme_irq_pin(); -- assert(cq->cqid < 64); -- n->irq_status |= 1 << cq->cqid; -+ assert(cq->vector < 32); -+ n->irq_status |= 1 << cq->vector; - nvme_irq_check(n); - } - } else { -@@ -130,8 +130,8 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) - if (msix_enabled(&(n->parent_obj))) { - return; - } else { -- assert(cq->cqid < 64); -- n->irq_status &= ~(1 << cq->cqid); -+ assert(cq->vector < 32); -+ n->irq_status &= ~(1 << cq->vector); - nvme_irq_check(n); - } - } -@@ -630,6 +630,10 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) - trace_nvme_err_invalid_create_cq_addr(prp1); - return NVME_INVALID_FIELD | NVME_DNR; - } -+ if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { -+ trace_nvme_err_invalid_create_cq_vector(vector); -+ return NVME_INVALID_IRQ_VECTOR | NVME_DNR; -+ } - if (unlikely(vector > n->num_queues)) { - trace_nvme_err_invalid_create_cq_vector(vector); - return NVME_INVALID_IRQ_VECTOR | NVME_DNR; -diff --git a/hw/block/nvme.h b/hw/block/nvme.h -index 557194ee..f4c1ff91 100644 ---- a/hw/block/nvme.h -+++ b/hw/block/nvme.h -@@ -78,7 +78,7 @@ typedef struct NvmeCtrl { - uint32_t cmbsz; - uint32_t cmbloc; - uint8_t *cmbuf; -- uint64_t irq_status; -+ uint32_t irq_status; - uint64_t host_timestamp; /* Timestamp sent by the host */ - uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - --- -2.23.0 - diff --git a/hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch b/hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch new file mode 100644 index 0000000000000000000000000000000000000000..2008350d4590acdec21769fdb53102371500287a --- /dev/null +++ b/hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch @@ -0,0 +1,42 @@ +From fa62831c301fa2a1d4226e0fefdeb6b7a280fca6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:35 +0200 +Subject: [PATCH] hw/char/virtio-serial-bus: Protect from DMA re-entrancy + bugs(CVE-2024-3446) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Replace qemu_bh_new_guarded() by virtio_bh_new_guarded() +so the bus and device use the same guard. Otherwise the +DMA-reentrancy protection can be bypassed. + +Fixes: CVE-2024-3446 +Cc: qemu-stable@nongnu.org +Suggested-by: Alexander Bulekov +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-4-philmd@linaro.org> +--- + hw/char/virtio-serial-bus.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index 44906057be..096214b11b 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -990,8 +990,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + return; + } + +- port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, +- &dev->mem_reentrancy_guard); ++ port->bh = virtio_bh_new_guarded(dev, flush_queued_data_bh, port); + port->elem = NULL; + } + +-- +2.27.0 + diff --git a/hw-core-loader-Add-ROM-loader-notifier.patch b/hw-core-loader-Add-ROM-loader-notifier.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4ce70d03e7010df9b3631c8bc6fa4c10f9d7d2d --- /dev/null +++ b/hw-core-loader-Add-ROM-loader-notifier.patch @@ -0,0 +1,96 @@ +From 9964f1260d5e67c2bc54031136629b10a4d81a2c Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Tue, 13 Jun 2023 18:01:50 +0100 +Subject: [PATCH] hw/core/loader: Add ROM loader notifier + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/4575987ee573474185f8ad8c715dffa9a40494ed + +Add a function to register a notifier, that is invoked after a ROM gets +loaded into guest memory. + +It will be used by Arm confidential guest support, in order to register +all blobs loaded into memory with KVM, so that their content is moved +into Realm state and measured into the initial VM state. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/core/loader.c | 14 ++++++++++++++ + include/hw/loader.h | 15 +++++++++++++++ + 2 files changed, 29 insertions(+) + +diff --git a/hw/core/loader.c b/hw/core/loader.c +index e7a9b3775b..1627ef1976 100644 +--- a/hw/core/loader.c ++++ b/hw/core/loader.c +@@ -67,6 +67,8 @@ + #include + + static int roms_loaded; ++static NotifierList rom_loader_notifier = ++ NOTIFIER_LIST_INITIALIZER(rom_loader_notifier); + + /* return the size or -1 if error */ + int64_t get_image_size(const char *filename) +@@ -1209,6 +1211,11 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, + return mr; + } + ++void rom_add_load_notifier(Notifier *notifier) ++{ ++ notifier_list_add(&rom_loader_notifier, notifier); ++} ++ + /* This function is specific for elf program because we don't need to allocate + * all the rom. We just allocate the first part and the rest is just zeros. This + * is why romsize and datasize are different. Also, this function takes its own +@@ -1250,6 +1257,7 @@ ssize_t rom_add_option(const char *file, int32_t bootindex) + static void rom_reset(void *unused) + { + Rom *rom; ++ RomLoaderNotifyData notify; + + QTAILQ_FOREACH(rom, &roms, next) { + if (rom->fw_file) { +@@ -1298,6 +1306,12 @@ static void rom_reset(void *unused) + cpu_flush_icache_range(rom->addr, rom->datasize); + + trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom); ++ ++ notify = (RomLoaderNotifyData) { ++ .addr = rom->addr, ++ .len = rom->datasize, ++ }; ++ notifier_list_notify(&rom_loader_notifier, ¬ify); + } + } + +diff --git a/include/hw/loader.h b/include/hw/loader.h +index 8685e27334..5df632c5bd 100644 +--- a/include/hw/loader.h ++++ b/include/hw/loader.h +@@ -356,6 +356,21 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); + ssize_t rom_add_vga(const char *file); + ssize_t rom_add_option(const char *file, int32_t bootindex); + ++typedef struct RomLoaderNotifyData { ++ /* Address of the blob in guest memory */ ++ hwaddr addr; ++ /* Length of the blob */ ++ size_t len; ++} RomLoaderNotifyData; ++ ++/** ++ * rom_add_load_notifier - Add a notifier for loaded images ++ * ++ * Add a notifier that will be invoked with a RomLoaderNotifyData structure for ++ * each blob loaded into guest memory, after the blob is loaded. ++ */ ++void rom_add_load_notifier(Notifier *notifier); ++ + /* This is the usual maximum in uboot, so if a uImage overflows this, it would + * overflow on real hardware too. */ + #define UBOOT_MAX_GUNZIP_BYTES (64 << 20) +-- +2.33.0 + diff --git a/hw-core-loader-Add-fields-to-RomLoaderNotify.patch b/hw-core-loader-Add-fields-to-RomLoaderNotify.patch new file mode 100644 index 0000000000000000000000000000000000000000..437ecce2af331befa49aa7fd68ef131c9896fd73 --- /dev/null +++ b/hw-core-loader-Add-fields-to-RomLoaderNotify.patch @@ -0,0 +1,48 @@ +From b398484a5425336c57256dde48b1ee6630be1552 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Thu, 7 Nov 2024 14:03:34 +0000 +Subject: [PATCH] hw/core/loader: Add fields to RomLoaderNotify + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/3bf3a64142d22868078d191d5ff0e6a3ddf0644c + +In order to write an event log, the ROM load notification handler needs +two more fields. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/core/loader.c | 2 ++ + include/hw/loader.h | 4 ++++ + 2 files changed, 6 insertions(+) + +diff --git a/hw/core/loader.c b/hw/core/loader.c +index 1627ef1976..7990147ade 100644 +--- a/hw/core/loader.c ++++ b/hw/core/loader.c +@@ -1308,6 +1308,8 @@ static void rom_reset(void *unused) + trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom); + + notify = (RomLoaderNotifyData) { ++ .name = rom->name, ++ .blob_ptr = rom->data, + .addr = rom->addr, + .len = rom->datasize, + }; +diff --git a/include/hw/loader.h b/include/hw/loader.h +index 5df632c5bd..3a5212b897 100644 +--- a/include/hw/loader.h ++++ b/include/hw/loader.h +@@ -357,6 +357,10 @@ ssize_t rom_add_vga(const char *file); + ssize_t rom_add_option(const char *file, int32_t bootindex); + + typedef struct RomLoaderNotifyData { ++ /* Description of the loaded ROM */ ++ const char *name; ++ /* Blob */ ++ void *blob_ptr; + /* Address of the blob in guest memory */ + hwaddr addr; + /* Length of the blob */ +-- +2.33.0 + diff --git a/hw-core-loader-Fix-possible-crash-in-rom_copy.patch b/hw-core-loader-Fix-possible-crash-in-rom_copy.patch deleted file mode 100644 index 770f12b1acf9dfc3c4289e9a9bea7d5936df1968..0000000000000000000000000000000000000000 --- a/hw-core-loader-Fix-possible-crash-in-rom_copy.patch +++ /dev/null @@ -1,45 +0,0 @@ -From aae0faa5d3bee91c66dc4c1543190f55a242771e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 25 Sep 2019 14:16:43 +0200 -Subject: [PATCH] hw/core/loader: Fix possible crash in rom_copy() - -Both, "rom->addr" and "addr" are derived from the binary image -that can be loaded with the "-kernel" paramer. The code in -rom_copy() then calculates: - - d = dest + (rom->addr - addr); - -and uses "d" as destination in a memcpy() some lines later. Now with -bad kernel images, it is possible that rom->addr is smaller than addr, -thus "rom->addr - addr" gets negative and the memcpy() then tries to -copy contents from the image to a bad memory location. This could -maybe be used to inject code from a kernel image into the QEMU binary, -so we better fix it with an additional sanity check here. - -Cc: qemu-stable@nongnu.org -Reported-by: Guangming Liu -Buglink: https://bugs.launchpad.net/qemu/+bug/1844635 -Message-Id: <20190925130331.27825-1-thuth@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Thomas Huth -(cherry picked from commit e423455c4f23a1a828901c78fe6d03b7dde79319) -Signed-off-by: Michael Roth ---- - hw/core/loader.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/core/loader.c b/hw/core/loader.c -index 425bf69a99..838a34174a 100644 ---- a/hw/core/loader.c -+++ b/hw/core/loader.c -@@ -1242,7 +1242,7 @@ int rom_copy(uint8_t *dest, hwaddr addr, size_t size) - if (rom->addr + rom->romsize < addr) { - continue; - } -- if (rom->addr > end) { -+ if (rom->addr > end || rom->addr < addr) { - break; - } - --- -2.23.0 diff --git a/hw-core-ptimer-fix-timer-zero-period-condition-for-f.patch b/hw-core-ptimer-fix-timer-zero-period-condition-for-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..20d112f0d532747ade8bd7db01cbfd377a7724b3 --- /dev/null +++ b/hw-core-ptimer-fix-timer-zero-period-condition-for-f.patch @@ -0,0 +1,101 @@ +From fcd3ff011e62739b824c2e465e01b98c47e364f5 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Fri, 16 Aug 2024 17:01:07 +0800 +Subject: [PATCH] hw/core/ptimer: fix timer zero period condition for freq > + 1GHz + +cheery-pick from 446e5e8b4515e9a7be69ef6a29852975289bb6f0 + +The real period is zero when both period and period_frac are zero. +Check the method ptimer_set_freq, if freq is larger than 1000 MHz, +the period is zero, but the period_frac is not, in this case, the +ptimer will work but the current code incorrectly recognizes that +the ptimer is disabled. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2306 +Signed-off-by: JianZhou Yue +Message-id: 3DA024AEA8B57545AF1B3CAA37077D0FB75E82C8@SHASXM03.verisilicon.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +Signed-off-by: qihao_yewu +--- + hw/core/ptimer.c | 4 ++-- + tests/unit/ptimer-test.c | 33 +++++++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c +index e03165febf..7177ecfab0 100644 +--- a/hw/core/ptimer.c ++++ b/hw/core/ptimer.c +@@ -83,7 +83,7 @@ static void ptimer_reload(ptimer_state *s, int delta_adjust) + delta = s->delta = s->limit; + } + +- if (s->period == 0) { ++ if (s->period == 0 && s->period_frac == 0) { + if (!qtest_enabled()) { + fprintf(stderr, "Timer with period zero, disabling\n"); + } +@@ -309,7 +309,7 @@ void ptimer_run(ptimer_state *s, int oneshot) + + assert(s->in_transaction); + +- if (was_disabled && s->period == 0) { ++ if (was_disabled && s->period == 0 && s->period_frac == 0) { + if (!qtest_enabled()) { + fprintf(stderr, "Timer with period zero, disabling\n"); + } +diff --git a/tests/unit/ptimer-test.c b/tests/unit/ptimer-test.c +index 04b5f4e3d0..08240594bb 100644 +--- a/tests/unit/ptimer-test.c ++++ b/tests/unit/ptimer-test.c +@@ -763,6 +763,33 @@ static void check_oneshot_with_load_0(gconstpointer arg) + ptimer_free(ptimer); + } + ++static void check_freq_more_than_1000M(gconstpointer arg) ++{ ++ const uint8_t *policy = arg; ++ ptimer_state *ptimer = ptimer_init(ptimer_trigger, NULL, *policy); ++ bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN); ++ ++ triggered = false; ++ ++ ptimer_transaction_begin(ptimer); ++ ptimer_set_freq(ptimer, 2000000000); ++ ptimer_set_limit(ptimer, 8, 1); ++ ptimer_run(ptimer, 1); ++ ptimer_transaction_commit(ptimer); ++ ++ qemu_clock_step(3); ++ ++ g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 3 : 2); ++ g_assert_false(triggered); ++ ++ qemu_clock_step(1); ++ ++ g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0); ++ g_assert_true(triggered); ++ ++ ptimer_free(ptimer); ++} ++ + static void add_ptimer_tests(uint8_t policy) + { + char policy_name[256] = ""; +@@ -857,6 +884,12 @@ static void add_ptimer_tests(uint8_t policy) + policy_name), + g_memdup2(&policy, 1), check_oneshot_with_load_0, g_free); + g_free(tmp); ++ ++ g_test_add_data_func_full( ++ tmp = g_strdup_printf("/ptimer/freq_more_than_1000M policy=%s", ++ policy_name), ++ g_memdup2(&policy, 1), check_freq_more_than_1000M, g_free); ++ g_free(tmp); + } + + static void add_all_ptimer_policies_comb_tests(void) +-- +2.41.0.windows.1 + diff --git a/hw-cxl-Ensure-there-is-enough-data-for-the-header-in.patch b/hw-cxl-Ensure-there-is-enough-data-for-the-header-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a7e3a27e92c3349109b64380e580804457e7af6 --- /dev/null +++ b/hw-cxl-Ensure-there-is-enough-data-for-the-header-in.patch @@ -0,0 +1,37 @@ +From 830009038a73e496598c26679b7e30d7e931a1cf Mon Sep 17 00:00:00 2001 +From: Jonathan Cameron +Date: Fri, 1 Nov 2024 13:39:16 +0000 +Subject: [PATCH] hw/cxl: Ensure there is enough data for the header in + cmd_ccls_set_lsa() + +The properties of the requested set command cannot be established if +len_in is less than the size of the header. + +Reported-by: Esifiel +Signed-off-by: Jonathan Cameron +Message-Id: <20241101133917.27634-10-Jonathan.Cameron@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Zhongrui Tang +--- + hw/cxl/cxl-mailbox-utils.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index 6eff56fb1b..9f2304389b 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -897,8 +897,8 @@ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd, + const size_t hdr_len = offsetof(struct set_lsa_pl, data); + + *len_out = 0; +- if (!len_in) { +- return CXL_MBOX_SUCCESS; ++ if (len_in < hdr_len) { ++ return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + if (set_lsa_payload->offset + len_in > cvc->get_lsa_size(ct3d) + hdr_len) { +-- +2.41.0.windows.1 + diff --git a/hw-cxl-Ensure-there-is-enough-data-to-read-the-input.patch b/hw-cxl-Ensure-there-is-enough-data-to-read-the-input.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d59b7d9b363d116580dea6840838e8901b46cc1 --- /dev/null +++ b/hw-cxl-Ensure-there-is-enough-data-to-read-the-input.patch @@ -0,0 +1,37 @@ +From d96c34e132df55ca7be458095f23d81dfc14e0d5 Mon Sep 17 00:00:00 2001 +From: Jonathan Cameron +Date: Fri, 1 Nov 2024 13:39:17 +0000 +Subject: [PATCH] hw/cxl: Ensure there is enough data to read the input header + in cmd_get_physical_port_state() + +If len_in is smaller than the header length then the accessing the +number of ports will result in an out of bounds access. +Add a check to avoid this. + +Reported-by: Esifiel +Signed-off-by: Jonathan Cameron +Message-Id: <20241101133917.27634-11-Jonathan.Cameron@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Zhongrui Tang +--- + hw/cxl/cxl-mailbox-utils.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c +index 6eff56fb1b..11a26525a2 100644 +--- a/hw/cxl/cxl-mailbox-utils.c ++++ b/hw/cxl/cxl-mailbox-utils.c +@@ -505,6 +505,9 @@ static CXLRetCode cmd_get_physical_port_state(const struct cxl_cmd *cmd, + in = (struct cxl_fmapi_get_phys_port_state_req_pl *)payload_in; + out = (struct cxl_fmapi_get_phys_port_state_resp_pl *)payload_out; + ++ if (len_in < sizeof(*in)) { ++ return CXL_MBOX_INVALID_PAYLOAD_LENGTH; ++ } + /* Check if what was requested can fit */ + if (sizeof(*out) + sizeof(*out->ports) * in->num_ports > cci->payload_max) { + return CXL_MBOX_INVALID_INPUT; +-- +2.41.0.windows.1 + diff --git a/hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch b/hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch new file mode 100644 index 0000000000000000000000000000000000000000..ceb933b88035c1b6fca5c3128fc46223e139dc6b --- /dev/null +++ b/hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch @@ -0,0 +1,79 @@ +From 66d91f8cb6c9668744cf0acda4402f75c5e533e0 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 19 Mar 2024 14:36:46 +0800 +Subject: [PATCH] hw/cxl/cxl-host: Fix missing ERRP_GUARD() in + cxl_fixed_memory_window_config() + +cheery-pick from 2a0e0a35002db7ac64f4e82ea2a4ad2fb6d934b0 + +As the comment in qapi/error, dereferencing @errp requires +ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +* - It must not be dereferenced, because it may be null. +... +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. +* +* Using it when it's not needed is safe, but please avoid cluttering +* the source with useless code. + +But in cxl_fixed_memory_window_config(), @errp is dereferenced in 2 +places without ERRP_GUARD(): + +fw->enc_int_ways = cxl_interleave_ways_enc(fw->num_targets, errp); +if (*errp) { + return; +} + +and + +fw->enc_int_gran = + cxl_interleave_granularity_enc(object->interleave_granularity, + errp); +if (*errp) { + return; +} + +For the above 2 places, we check "*errp", because neither function +returns a suitable error code. And since machine_set_cfmw() - the caller +of cxl_fixed_memory_window_config() - doesn't get the NULL @errp +parameter as the "set" method of object property, +cxl_fixed_memory_window_config() hasn't triggered the bug that +dereferencing the NULL @errp. + +To follow the requirement of @errp, add missing ERRP_GUARD() in +cxl_fixed_memory_window_config(). + +Suggested-by: Markus Armbruster +Signed-off-by: Zhao Liu +Reviewed-by: Markus Armbruster +Message-Id: <20240223085653.1255438-2-zhao1.liu@linux.intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jonathan Cameron +Signed-off-by: qihao_yewu +--- + hw/cxl/cxl-host.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c +index 2aa776c79c..c5f5fcfd64 100644 +--- a/hw/cxl/cxl-host.c ++++ b/hw/cxl/cxl-host.c +@@ -26,6 +26,7 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state, + CXLFixedMemoryWindowOptions *object, + Error **errp) + { ++ ERRP_GUARD(); + g_autofree CXLFixedWindow *fw = g_malloc0(sizeof(*fw)); + strList *target; + int i; +-- +2.27.0 + diff --git a/hw-display-bcm2835_fb-fix-fb_use_offsets-condition.patch b/hw-display-bcm2835_fb-fix-fb_use_offsets-condition.patch new file mode 100644 index 0000000000000000000000000000000000000000..0c2397611cb300b3c9eb942f84b431fcba0024eb --- /dev/null +++ b/hw-display-bcm2835_fb-fix-fb_use_offsets-condition.patch @@ -0,0 +1,51 @@ +From 1d3ea28fd7da9a23e278be70c7e028fbd2b69bf3 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 25 Jul 2024 10:29:20 +0800 +Subject: [PATCH] hw/display/bcm2835_fb: fix fb_use_offsets condition +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 345acc443905eda8008a1d328dd89b73c4a3f89e + +It is common practice when implementing double-buffering on VideoCore +to do so by multiplying the height of the virtual buffer by the +number of virtual screens desired (i.e., two - in the case of +double-bufferring). + +At present, this won't work in QEMU because the logic in +fb_use_offsets require that both the virtual width and height exceed +their physical counterparts. + +This appears to be unintentional/a typo and indeed the comment +states; "Experimentally, the hardware seems to do this only if the +viewport size is larger than the physical screen". The +viewport/virtual size would be larger than the physical size if +either virtual dimension were larger than their physical counterparts +and not necessarily both. + +Signed-off-by: SamJakob +Message-id: 20240713160353.62410-1-me@samjakob.com +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Peter Maydell +Signed-off-by: qihao_yewu +--- + hw/display/bcm2835_fb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c +index a05277674f..c45da149d9 100644 +--- a/hw/display/bcm2835_fb.c ++++ b/hw/display/bcm2835_fb.c +@@ -145,7 +145,7 @@ static bool fb_use_offsets(BCM2835FBConfig *config) + * viewport size is larger than the physical screen. (It doesn't + * prevent the guest setting this silly viewport setting, though...) + */ +- return config->xres_virtual > config->xres && ++ return config->xres_virtual > config->xres || + config->yres_virtual > config->yres; + } + +-- +2.41.0.windows.1 + diff --git a/hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch b/hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch deleted file mode 100644 index 98e3c3bed9a221c978c8f733e5d587dc2803180b..0000000000000000000000000000000000000000 --- a/hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch +++ /dev/null @@ -1,46 +0,0 @@ -From b47d7ad29bc7f30d4ea3fdb0ef86942468416b79 Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Mon, 2 Nov 2020 16:52:17 +0000 -Subject: [PATCH] hw/display/exynos4210_fimd: Fix potential NULL pointer - dereference -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -In exynos4210_fimd_update(), the pointer s is dereferinced before -being check if it is valid, which may lead to NULL pointer dereference. -So move the assignment to global_width after checking that the s is valid. - -Reported-by: Euler Robot -Signed-off-by: Alex Chen -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 5F9F8D88.9030102@huawei.com -Signed-off-by: Peter Maydell -(cherry-picked from commit 18520fa465) ---- - hw/display/exynos4210_fimd.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c -index 61f7408b1c..85b0ebf23a 100644 ---- a/hw/display/exynos4210_fimd.c -+++ b/hw/display/exynos4210_fimd.c -@@ -1271,12 +1271,14 @@ static void exynos4210_fimd_update(void *opaque) - bool blend = false; - uint8_t *host_fb_addr; - bool is_dirty = false; -- const int global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1; -+ int global_width; - - if (!s || !s->console || !s->enabled || - surface_bits_per_pixel(qemu_console_surface(s->console)) == 0) { - return; - } -+ -+ global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1; - exynos4210_update_resolution(s); - surface = qemu_console_surface(s->console); - --- -2.27.0 - diff --git a/hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch b/hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba043b631bb37466c47d20e091ce68c900cdafb3 --- /dev/null +++ b/hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch @@ -0,0 +1,68 @@ +From c9ee283913cc9df8998a21544a68ac1d2f86aa49 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 19 Mar 2024 15:07:51 +0800 +Subject: [PATCH] hw/display/macfb: Fix missing ERRP_GUARD() in + macfb_nubus_realize() + +cheery-pick from 5aa4a6417b0f7acbfd7f4c21dca26293bc3d9348 + +As the comment in qapi/error, dereferencing @errp requires +ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +* - It must not be dereferenced, because it may be null. +... +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. +* +* Using it when it's not needed is safe, but please avoid cluttering +* the source with useless code. + +But in macfb_nubus_realize(), @errp is dereferenced without +ERRP_GUARD(): + +ndc->parent_realize(dev, errp); +if (*errp) { + return; +} + +Here we check *errp, because the ndc->parent_realize(), as a +DeviceClass.realize() callback, returns void. And since +macfb_nubus_realize(), also as a DeviceClass.realize(), doesn't get the +NULL @errp parameter, it hasn't triggered the bug that dereferencing the +NULL @errp. + +To follow the requirement of @errp, add missing ERRP_GUARD() in +macfb_nubus_realize(). + +Suggested-by: Markus Armbruster +Signed-off-by: Zhao Liu +Reviewed-by: Markus Armbruster +Message-Id: <20240223085653.1255438-3-zhao1.liu@linux.intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: qihao_yewu +--- + hw/display/macfb.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/display/macfb.c b/hw/display/macfb.c +index d61541ccb5..170da35757 100644 +--- a/hw/display/macfb.c ++++ b/hw/display/macfb.c +@@ -714,6 +714,7 @@ static void macfb_nubus_set_irq(void *opaque, int n, int level) + + static void macfb_nubus_realize(DeviceState *dev, Error **errp) + { ++ ERRP_GUARD(); + NubusDevice *nd = NUBUS_DEVICE(dev); + MacfbNubusState *s = NUBUS_MACFB(dev); + MacfbNubusDeviceClass *ndc = NUBUS_MACFB_GET_CLASS(dev); +-- +2.27.0 + diff --git a/hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch b/hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch deleted file mode 100644 index 9f11b2d8bbc047a93dd11cf9c6a16eb757676f86..0000000000000000000000000000000000000000 --- a/hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 38697076a98034a078c2411234b8979cf3cec6da Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Mon, 2 Nov 2020 16:52:17 +0000 -Subject: [PATCH] hw/display/omap_lcdc: Fix potential NULL pointer dereference - -In omap_lcd_interrupts(), the pointer omap_lcd is dereferinced before -being check if it is valid, which may lead to NULL pointer dereference. -So move the assignment to surface after checking that the omap_lcd is valid -and move surface_bits_per_pixel(surface) to after the surface assignment. - -Reported-by: Euler Robot -Signed-off-by: AlexChen -Message-id: 5F9CDB8A.9000001@huawei.com -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell -(cherry-picked from commit 0080edc45e) ---- - hw/display/omap_lcdc.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/hw/display/omap_lcdc.c b/hw/display/omap_lcdc.c -index 07a5effe04..13ab73ec61 100644 ---- a/hw/display/omap_lcdc.c -+++ b/hw/display/omap_lcdc.c -@@ -77,14 +77,18 @@ static void omap_lcd_interrupts(struct omap_lcd_panel_s *s) - static void omap_update_display(void *opaque) - { - struct omap_lcd_panel_s *omap_lcd = (struct omap_lcd_panel_s *) opaque; -- DisplaySurface *surface = qemu_console_surface(omap_lcd->con); -+ DisplaySurface *surface; - draw_line_func draw_line; - int size, height, first, last; - int width, linesize, step, bpp, frame_offset; - hwaddr frame_base; - -- if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable || -- !surface_bits_per_pixel(surface)) { -+ if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable) { -+ return; -+ } -+ -+ surface = qemu_console_surface(omap_lcd->con); -+ if (!surface_bits_per_pixel(surface)) { - return; - } - --- -2.27.0 - diff --git a/hw-display-vhost-user-gpu.c-fix-vhost_user_gpu_chr_r.patch b/hw-display-vhost-user-gpu.c-fix-vhost_user_gpu_chr_r.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b4ac413455e11b93cc72ce1275c1a45f8097352 --- /dev/null +++ b/hw-display-vhost-user-gpu.c-fix-vhost_user_gpu_chr_r.patch @@ -0,0 +1,39 @@ +From f2efa9729b4cb4ec98f93c1eafe38459fd82e7ae Mon Sep 17 00:00:00 2001 +From: qihao +Date: Mon, 26 Aug 2024 09:34:05 +0800 +Subject: [PATCH] hw/display/vhost-user-gpu.c: fix vhost_user_gpu_chr_read() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from d6192f3f7593536a4285e8ab6c6cf3f34973ce62 + +fix vhost_user_gpu_chr_read() where `size` was incorrectly passed to `msg->flags`. + +Fixes: 267f664658 ("hw/display: add vhost-user-vga & gpu-pci") +Signed-off-by: Haoran Zhang +Reviewed-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +Signed-off-by: qihao_yewu +--- + hw/display/vhost-user-gpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c +index 709c8a02a1..373f04a7b4 100644 +--- a/hw/display/vhost-user-gpu.c ++++ b/hw/display/vhost-user-gpu.c +@@ -385,7 +385,7 @@ vhost_user_gpu_chr_read(void *opaque) + } + + msg->request = request; +- msg->flags = size; ++ msg->flags = flags; + msg->size = size; + + if (request == VHOST_USER_GPU_CURSOR_UPDATE || +-- +2.41.0.windows.1 + diff --git a/hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch b/hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ba7daca6cc386215a7180e045d4b9f5a67b0510 --- /dev/null +++ b/hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch @@ -0,0 +1,142 @@ +From e72177cc2b3a4425c4be5ca8cc12bc99e63e2788 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:27 +0200 +Subject: [PATCH] hw/display/virtio-gpu: Protect from DMA re-entrancy + bugs(CVE-2024-3446) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Replace qemu_bh_new_guarded() by virtio_bh_new_guarded() +so the bus and device use the same guard. Otherwise the +DMA-reentrancy protection can be bypassed: + + $ cat << EOF | qemu-system-i386 -display none -nodefaults \ + -machine q35,accel=qtest \ + -m 512M \ + -device virtio-gpu \ + -qtest stdio + outl 0xcf8 0x80000820 + outl 0xcfc 0xe0004000 + outl 0xcf8 0x80000804 + outw 0xcfc 0x06 + write 0xe0004030 0x4 0x024000e0 + write 0xe0004028 0x1 0xff + write 0xe0004020 0x4 0x00009300 + write 0xe000401c 0x1 0x01 + write 0x101 0x1 0x04 + write 0x103 0x1 0x1c + write 0x9301c8 0x1 0x18 + write 0x105 0x1 0x1c + write 0x107 0x1 0x1c + write 0x109 0x1 0x1c + write 0x10b 0x1 0x00 + write 0x10d 0x1 0x00 + write 0x10f 0x1 0x00 + write 0x111 0x1 0x00 + write 0x113 0x1 0x00 + write 0x115 0x1 0x00 + write 0x117 0x1 0x00 + write 0x119 0x1 0x00 + write 0x11b 0x1 0x00 + write 0x11d 0x1 0x00 + write 0x11f 0x1 0x00 + write 0x121 0x1 0x00 + write 0x123 0x1 0x00 + write 0x125 0x1 0x00 + write 0x127 0x1 0x00 + write 0x129 0x1 0x00 + write 0x12b 0x1 0x00 + write 0x12d 0x1 0x00 + write 0x12f 0x1 0x00 + write 0x131 0x1 0x00 + write 0x133 0x1 0x00 + write 0x135 0x1 0x00 + write 0x137 0x1 0x00 + write 0x139 0x1 0x00 + write 0xe0007003 0x1 0x00 + EOF + ... + ================================================================= + ==276099==ERROR: AddressSanitizer: heap-use-after-free on address 0x60d000011178 + at pc 0x562cc3b736c7 bp 0x7ffed49dee60 sp 0x7ffed49dee58 + READ of size 8 at 0x60d000011178 thread T0 + #0 0x562cc3b736c6 in virtio_gpu_ctrl_response hw/display/virtio-gpu.c:180:42 + #1 0x562cc3b7c40b in virtio_gpu_ctrl_response_nodata hw/display/virtio-gpu.c:192:5 + #2 0x562cc3b7c40b in virtio_gpu_simple_process_cmd hw/display/virtio-gpu.c:1015:13 + #3 0x562cc3b82873 in virtio_gpu_process_cmdq hw/display/virtio-gpu.c:1050:9 + #4 0x562cc4a85514 in aio_bh_call util/async.c:169:5 + #5 0x562cc4a85c52 in aio_bh_poll util/async.c:216:13 + #6 0x562cc4a1a79b in aio_dispatch util/aio-posix.c:423:5 + #7 0x562cc4a8a2da in aio_ctx_dispatch util/async.c:358:5 + #8 0x7f36840547a8 in g_main_context_dispatch (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x547a8) + #9 0x562cc4a8b753 in glib_pollfds_poll util/main-loop.c:290:9 + #10 0x562cc4a8b753 in os_host_main_loop_wait util/main-loop.c:313:5 + #11 0x562cc4a8b753 in main_loop_wait util/main-loop.c:592:11 + #12 0x562cc3938186 in qemu_main_loop system/runstate.c:782:9 + #13 0x562cc43b7af5 in qemu_default_main system/main.c:37:14 + #14 0x7f3683a6c189 in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16 + #15 0x7f3683a6c244 in __libc_start_main csu/../csu/libc-start.c:381:3 + #16 0x562cc2a58ac0 in _start (qemu-system-i386+0x231bac0) + + 0x60d000011178 is located 56 bytes inside of 136-byte region [0x60d000011140,0x60d0000111c8) + freed by thread T0 here: + #0 0x562cc2adb662 in __interceptor_free (qemu-system-i386+0x239e662) + #1 0x562cc3b86b21 in virtio_gpu_reset hw/display/virtio-gpu.c:1524:9 + #2 0x562cc416e20e in virtio_reset hw/virtio/virtio.c:2145:9 + #3 0x562cc37c5644 in virtio_pci_reset hw/virtio/virtio-pci.c:2249:5 + #4 0x562cc4233758 in memory_region_write_accessor system/memory.c:497:5 + #5 0x562cc4232eea in access_with_adjusted_size system/memory.c:573:18 + + previously allocated by thread T0 here: + #0 0x562cc2adb90e in malloc (qemu-system-i386+0x239e90e) + #1 0x7f368405a678 in g_malloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x5a678) + #2 0x562cc4163ffc in virtqueue_split_pop hw/virtio/virtio.c:1612:12 + #3 0x562cc4163ffc in virtqueue_pop hw/virtio/virtio.c:1783:16 + #4 0x562cc3b91a95 in virtio_gpu_handle_ctrl hw/display/virtio-gpu.c:1112:15 + #5 0x562cc4a85514 in aio_bh_call util/async.c:169:5 + #6 0x562cc4a85c52 in aio_bh_poll util/async.c:216:13 + #7 0x562cc4a1a79b in aio_dispatch util/aio-posix.c:423:5 + + SUMMARY: AddressSanitizer: heap-use-after-free hw/display/virtio-gpu.c:180:42 in virtio_gpu_ctrl_response + +With this change, the same reproducer triggers: + + qemu-system-i386: warning: Blocked re-entrant IO on MemoryRegion: virtio-pci-common-virtio-gpu at addr: 0x6 + +Fixes: CVE-2024-3446 +Cc: qemu-stable@nongnu.org +Reported-by: Alexander Bulekov +Reported-by: Yongkang Jia +Reported-by: Xiao Lei +Reported-by: Yiming Tao +Buglink: https://bugs.launchpad.net/qemu/+bug/1888606 +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-3-philmd@linaro.org> +--- + hw/display/virtio-gpu.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index b02d1e3a4c..a714638822 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1456,10 +1456,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + + g->ctrl_vq = virtio_get_queue(vdev, 0); + g->cursor_vq = virtio_get_queue(vdev, 1); +- g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, +- &qdev->mem_reentrancy_guard); +- g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, +- &qdev->mem_reentrancy_guard); ++ g->ctrl_bh = virtio_bh_new_guarded(qdev, virtio_gpu_ctrl_bh, g); ++ g->cursor_bh = virtio_bh_new_guarded(qdev, virtio_gpu_cursor_bh, g); + g->reset_bh = qemu_bh_new(virtio_gpu_reset_bh, g); + qemu_cond_init(&g->reset_cond); + QTAILQ_INIT(&g->reslist); +-- +2.27.0 + diff --git a/hw-ehci-check-return-value-of-usb_packet_map.patch b/hw-ehci-check-return-value-of-usb_packet_map.patch deleted file mode 100644 index 2c05a2e61f8deef9a36ca8500cfabcb9736d14bc..0000000000000000000000000000000000000000 --- a/hw-ehci-check-return-value-of-usb_packet_map.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 02d63f9fd9655f1899dabbccaf0568bfaa3e97df Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Wed, 12 Aug 2020 09:17:27 -0700 -Subject: [PATCH] hw: ehci: check return value of 'usb_packet_map' - -If 'usb_packet_map' fails, we should stop to process the usb -request. - -Signed-off-by: Li Qiang -Message-Id: <20200812161727.29412-1-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann -(cherry-picked from 2fdb42d8) -Fix CVE-2020-25723 -Signed-off-by: Alex Chen ---- - hw/usb/hcd-ehci.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c -index 5f089f3005..433e6a4fc0 100644 ---- a/hw/usb/hcd-ehci.c -+++ b/hw/usb/hcd-ehci.c -@@ -1370,7 +1370,10 @@ static int ehci_execute(EHCIPacket *p, const char *action) - spd = (p->pid == USB_TOKEN_IN && NLPTR_TBIT(p->qtd.altnext) == 0); - usb_packet_setup(&p->packet, p->pid, ep, 0, p->qtdaddr, spd, - (p->qtd.token & QTD_TOKEN_IOC) != 0); -- usb_packet_map(&p->packet, &p->sgl); -+ if (usb_packet_map(&p->packet, &p->sgl)) { -+ qemu_sglist_destroy(&p->sgl); -+ return -1; -+ } - p->async = EHCI_ASYNC_INITIALIZED; - } - -@@ -1449,7 +1452,10 @@ static int ehci_process_itd(EHCIState *ehci, - if (ep && ep->type == USB_ENDPOINT_XFER_ISOC) { - usb_packet_setup(&ehci->ipacket, pid, ep, 0, addr, false, - (itd->transact[i] & ITD_XACT_IOC) != 0); -- usb_packet_map(&ehci->ipacket, &ehci->isgl); -+ if (usb_packet_map(&ehci->ipacket, &ehci->isgl)) { -+ qemu_sglist_destroy(&ehci->isgl); -+ return -1; -+ } - usb_handle_packet(dev, &ehci->ipacket); - usb_packet_unmap(&ehci->ipacket, &ehci->isgl); - } else { --- -2.27.0 - diff --git a/hw-gpio-aspeed_gpio-Avoid-shift-into-sign-bit.patch b/hw-gpio-aspeed_gpio-Avoid-shift-into-sign-bit.patch new file mode 100644 index 0000000000000000000000000000000000000000..9edcfec67400716ef2fc69a7f2857044190b387b --- /dev/null +++ b/hw-gpio-aspeed_gpio-Avoid-shift-into-sign-bit.patch @@ -0,0 +1,36 @@ +From 10794f7a9bc3c88c8a26f094e5d3ef42e9fd290f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E5=BC=A0=E6=A5=9A=E5=90=9B?= + +Date: Fri, 11 Oct 2024 14:04:54 +0800 +Subject: [PATCH] hw/gpio/aspeed_gpio: Avoid shift into sign bit + +In aspeed_gpio_update() we calculate "mask = 1 << gpio", where gpio can be between 0 and 31. +Coverity complains about this beacuse 1 << 31 won't fit in a signed integer. + +For QEMU this isn't an error because we enable -fwrapv, but we can keep Coverity happy +by doing the shift on unsigned numbers. + +Resolves: Coverity CID 1547742 +Signed-off-by: Peter Maydell +Reviewed-by: Cedric Le Goater +Signed-off-by: zhangchujun +--- + hw/gpio/aspeed_gpio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c +index 1e267dd482..0fc3d4c05f 100644 +--- a/hw/gpio/aspeed_gpio.c ++++ b/hw/gpio/aspeed_gpio.c +@@ -281,7 +281,7 @@ static void aspeed_gpio_update(AspeedGPIOState *s, GPIOSets *regs, + diff &= mode_mask; + if (diff) { + for (gpio = 0; gpio < ASPEED_GPIOS_PER_SET; gpio++) { +- uint32_t mask = 1 << gpio; ++ uint32_t mask = 1U << gpio; + + /* If the gpio needs to be updated... */ + if (!(diff & mask)) { +-- +2.41.0.windows.1 + diff --git a/hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch b/hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch new file mode 100644 index 0000000000000000000000000000000000000000..aa63deb3fd944ed52bfc5e662c862ca45868ddd7 --- /dev/null +++ b/hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch @@ -0,0 +1,49 @@ +From f8ed9dd954fbd558d549c7c2e2ab7322107218a1 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 17:40:21 +0800 +Subject: [PATCH] hw/i2c/smbus_slave: Add object path on error prints +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from fcc8299e29816c9b6f8d9766254fce6e8a50ee52 + +The current logging doesn't tell us which specific smbus device is an +error state. + +Signed-off-by: Joe Komlodi +Reviewed-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240202204847.2062798-3-komlodi@google.com> +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: dinglimin +--- + hw/i2c/smbus_slave.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/i2c/smbus_slave.c b/hw/i2c/smbus_slave.c +index 2ef2c7c5f6..b35516a404 100644 +--- a/hw/i2c/smbus_slave.c ++++ b/hw/i2c/smbus_slave.c +@@ -25,11 +25,15 @@ + #define DPRINTF(fmt, ...) \ + do { printf("smbus(%02x): " fmt , dev->i2c.address, ## __VA_ARGS__); } while (0) + #define BADF(fmt, ...) \ +-do { fprintf(stderr, "smbus: error: " fmt , ## __VA_ARGS__); exit(1);} while (0) ++do { g_autofree char *qom_path = object_get_canonical_path(OBJECT(dev)); \ ++ fprintf(stderr, "%s: smbus: error: " fmt , qom_path, ## __VA_ARGS__); \ ++ exit(1); } while (0) + #else + #define DPRINTF(fmt, ...) do {} while(0) + #define BADF(fmt, ...) \ +-do { fprintf(stderr, "smbus: error: " fmt , ## __VA_ARGS__);} while (0) ++do { g_autofree char *qom_path = object_get_canonical_path(OBJECT(dev)); \ ++ fprintf(stderr, "%s: smbus: error: " fmt , qom_path, ## __VA_ARGS__); \ ++ } while (0) + #endif + + enum { +-- +2.27.0 + diff --git a/hw-i386-Activate-IOMMUFD-for-q35-machines.patch b/hw-i386-Activate-IOMMUFD-for-q35-machines.patch new file mode 100644 index 0000000000000000000000000000000000000000..23ca9aae29d69f0cb1d7d4e04a23c3f819710538 --- /dev/null +++ b/hw-i386-Activate-IOMMUFD-for-q35-machines.patch @@ -0,0 +1,33 @@ +From 5405fa36c5f2784a9a6b19ee60d44b6cffb9f769 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Sat, 11 Jan 2025 10:52:57 +0800 +Subject: [PATCH] hw/i386: Activate IOMMUFD for q35 machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/i386/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index 682e324f1c..908f29e02b 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -105,6 +105,7 @@ config Q35 + imply E1000E_PCI_EXPRESS + imply VMPORT + imply VMMOUSE ++ imply IOMMUFD + select PC_PCI + select PC_ACPI + select PCI_EXPRESS_Q35 +-- +2.41.0.windows.1 + diff --git a/hw-i386-acpi-microvm.c-Use-common-function-to-add-vi.patch b/hw-i386-acpi-microvm.c-Use-common-function-to-add-vi.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea0ba795848ffbb7c74ead9e97e777da7b189e8b --- /dev/null +++ b/hw-i386-acpi-microvm.c-Use-common-function-to-add-vi.patch @@ -0,0 +1,57 @@ +From be7b030b8ae20a284aee2ab41f49337db3ebe48d Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:26 +0800 +Subject: [PATCH 03/18] hw/i386/acpi-microvm.c: Use common function to add + virtio in DSDT + +commit 8199bf48ea1fdb8e491311a0dc28cea30af18c95 uptream + +With common function to add virtio in DSDT created now, update microvm +code also to use it instead of duplicate code. + +Suggested-by: Andrew Jones +Signed-off-by: Sunil V L +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-4-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/i386/acpi-microvm.c | 15 ++------------- + 1 file changed, 2 insertions(+), 13 deletions(-) + +diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c +index 2909a73933..279da6b4aa 100644 +--- a/hw/i386/acpi-microvm.c ++++ b/hw/i386/acpi-microvm.c +@@ -37,6 +37,7 @@ + #include "hw/pci/pci.h" + #include "hw/pci/pcie_host.h" + #include "hw/usb/xhci.h" ++#include "hw/virtio/virtio-acpi.h" + #include "hw/virtio/virtio-mmio.h" + #include "hw/input/i8042.h" + +@@ -77,19 +78,7 @@ static void acpi_dsdt_add_virtio(Aml *scope, + uint32_t irq = mms->virtio_irq_base + index; + hwaddr base = VIRTIO_MMIO_BASE + index * 512; + hwaddr size = 512; +- +- Aml *dev = aml_device("VR%02u", (unsigned)index); +- aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005"))); +- aml_append(dev, aml_name_decl("_UID", aml_int(index))); +- aml_append(dev, aml_name_decl("_CCA", aml_int(1))); +- +- Aml *crs = aml_resource_template(); +- aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE)); +- aml_append(crs, +- aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH, +- AML_EXCLUSIVE, &irq, 1)); +- aml_append(dev, aml_name_decl("_CRS", crs)); +- aml_append(scope, dev); ++ virtio_acpi_dsdt_add(scope, base, size, irq, index, 1); + } + } + } +-- +2.33.0 + diff --git a/hw-i386-add-mem2-option-for-qemu.patch b/hw-i386-add-mem2-option-for-qemu.patch new file mode 100644 index 0000000000000000000000000000000000000000..eef09e89fe4164910e2c667fbd318d867216f02d --- /dev/null +++ b/hw-i386-add-mem2-option-for-qemu.patch @@ -0,0 +1,324 @@ +From d29bc8738131dcaaa1a1ae2870ea29b59a137f30 Mon Sep 17 00:00:00 2001 +From: xiongmengbiao +Date: Wed, 29 May 2024 00:05:44 +0800 +Subject: [PATCH] hw/i386: add mem2 option for qemu + +The '-mem2' option is used to create a set of hugepages +of memory and map them to a fixed address range of the guest. + +This allows some devices to easily obtain continuous host +physical address ranges for performing DMA operations. + +Signed-off-by: xiongmengbiao +--- + hw/i386/pc.c | 121 ++++++++++++++++++++++++++++++++++++++++++++ + include/hw/boards.h | 2 + + qemu-options.hx | 12 +++++ + system/vl.c | 76 ++++++++++++++++++++++++++++ + 4 files changed, 211 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 29b9964733..204e34db86 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -743,6 +743,111 @@ void xen_load_linux(PCMachineState *pcms) + x86ms->fw_cfg = fw_cfg; + } + ++static int try_create_2MB_page(uint32_t page_num) ++{ ++ char nr_hp_num_s[256] = {0}; ++ char free_hp_num_s[256] = {0}; ++ const char *nr_hugepages_dir = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"; ++ const char *free_hugepages_dir = "/sys/kernel/mm/hugepages/hugepages-2048kB/free_hugepages"; ++ int nr_hp_num = -1, free_hp_num = -1, ret = -1; ++ int nr_fd = qemu_open_old(nr_hugepages_dir, O_RDWR); ++ int free_fd = qemu_open_old(free_hugepages_dir, O_RDONLY); ++ ++ if (nr_fd < 0 || free_fd < 0) { ++ error_report("%s: qemu_open failed: %s\n", __func__, strerror(errno)); ++ goto end; ++ } ++ ++ if (read(nr_fd, nr_hp_num_s, 256) < 0) ++ goto end; ++ if (read(free_fd, free_hp_num_s, 256) < 0) ++ goto end; ++ ++ nr_hp_num = atoi(nr_hp_num_s); ++ free_hp_num = atoi(free_hp_num_s); ++ if (nr_hp_num < 0 || free_hp_num < 0) ++ goto end; ++ ++ if (page_num <= free_hp_num) { ++ ret = 0; ++ goto end; ++ } ++ ++ nr_hp_num += (page_num - free_hp_num); ++ snprintf (nr_hp_num_s, 256, "%d", nr_hp_num); ++ if (write(nr_fd, nr_hp_num_s, strlen(nr_hp_num_s)) < 0) ++ goto end; ++ ++ ret = 0; ++end: ++ if (nr_fd >= 0) ++ close(nr_fd); ++ if (free_fd >= 0) ++ close(free_fd); ++ return ret; ++} ++ ++#define HUGEPAGE_NUM_MAX 128 ++#define HUGEPAGE_SIZE (1024*1024*2) ++static void mem2_init(MachineState *ms, MemoryRegion *system_memory) ++{ ++ MemoryRegion *mem2_mr; ++ char mr_name[128] = {0}; ++ void *ram = NULL; ++ int ret = 0, lock_fd; ++ const char *lock_file = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_overcommit_hugepages"; ++ uint32_t page_num = ms->ram2_size / HUGEPAGE_SIZE, i; ++ ++ if (HUGEPAGE_NUM_MAX < page_num) { ++ error_report("\"-mem2 'size='\" needs to Less than %dM\n", ++ (HUGEPAGE_SIZE * HUGEPAGE_NUM_MAX) / (1024 * 1024)); ++ exit(EXIT_FAILURE); ++ } ++ ++ // Apply for hugepages from OS and use them, which needs to be synchronized ++ lock_fd = qemu_open_old(lock_file, O_WRONLY); ++ if (lock_fd < 0) { ++ error_report("%s: open %s failed: %s\n", __func__, lock_file, strerror(errno)); ++ exit(EXIT_FAILURE); ++ } ++ ++ while (qemu_lock_fd(lock_fd, 0, 0, true)) { ++ if (errno != EACCES && errno != EAGAIN) { ++ error_report("qemu_lock_fd failed: %s\n", strerror(errno)); ++ exit(EXIT_FAILURE); ++ } ++ } ++ ++ /** try to create hugepage. ++ * If there are enough free hugepages, then do nothing. ++ */ ++ ret = try_create_2MB_page(page_num); ++ if (ret) { ++ error_report("%s: Failed to allocate hugepage\n", __func__); ++ goto unlock; ++ } ++ ++ for (i = 0; i < page_num; ++i) { ++ mem2_mr = g_malloc(sizeof(*mem2_mr)); ++ ram = mmap(NULL, HUGEPAGE_SIZE, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | MAP_HUGETLB, -1, 0); ++ if (ram == MAP_FAILED) { ++ error_report("%s: mmap failed: %s", __func__, strerror(errno)); ++ goto unlock; ++ } ++ ++ sprintf(mr_name, "mem2-%d", i); ++ memory_region_init_ram_ptr(mem2_mr, NULL, mr_name, HUGEPAGE_SIZE, ram); ++ memory_region_add_subregion(system_memory, ms->ram2_base + (i * HUGEPAGE_SIZE), mem2_mr); ++ } ++ ++ ret = 0; ++unlock: ++ qemu_unlock_fd(lock_fd, 0, 0); ++ if (ret) ++ exit(EXIT_FAILURE); ++} ++ + #define PC_ROM_MIN_VGA 0xc0000 + #define PC_ROM_MIN_OPTION 0xc8000 + #define PC_ROM_MAX 0xe0000 +@@ -965,6 +1070,22 @@ void pc_memory_init(PCMachineState *pcms, + E820_RAM); + } + ++ if (machine->ram2_size && machine->ram2_base) { ++ if (0x100000000ULL + x86ms->above_4g_mem_size > machine->ram2_base) { ++ error_report("\"-mem2 'base'\" needs to greater 0x%llx\n", ++ 0x100000000ULL + x86ms->above_4g_mem_size); ++ exit(EXIT_FAILURE); ++ } ++ if (machine->ram2_base & (HUGEPAGE_SIZE - 1) || ++ machine->ram2_size & (HUGEPAGE_SIZE - 1)) { ++ error_report("\"-mem2 'base|size'\" needs to aligned to 0x%x\n", HUGEPAGE_SIZE); ++ exit(EXIT_FAILURE); ++ } ++ ++ mem2_init(machine, system_memory); ++ e820_add_entry(machine->ram2_base, machine->ram2_size, E820_RAM); ++ } ++ + if (pcms->sgx_epc.size != 0) { + e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED); + } +diff --git a/include/hw/boards.h b/include/hw/boards.h +index da85f86efb..8ac8cad2a2 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -389,6 +389,8 @@ struct MachineState { + + ram_addr_t ram_size; + ram_addr_t maxram_size; ++ ram_addr_t ram2_base; ++ ram_addr_t ram2_size; + uint64_t ram_slots; + BootConfiguration boot_config; + char *kernel_filename; +diff --git a/qemu-options.hx b/qemu-options.hx +index 42fd09e4de..bc8e66a037 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5845,6 +5845,18 @@ SRST + (qemu) qom-set /objects/iothread1 poll-max-ns 100000 + ERST + ++DEF("mem2", HAS_ARG, QEMU_OPTION_mem2, ++ "-mem2 base=addr[G],size=n[MG]\n" ++ " Map guest memory using host hugepages\n" ++ " base: starting position of guest physical address\n" ++ " size: the size of mmaped memory\n" ++ "NOTE: Both `base` and `size` need to be aligned according to 2MB\n", ++ QEMU_ARCH_I386) ++SRST ++``-mem2 base=addr[G],size=n[MG]`` ++ Map the host's large page memory at the specified guest address ++ so that some devices can use larger contiguous physical memory. ++ERST + + HXCOMM This is the last statement. Insert new options before this line! + +diff --git a/system/vl.c b/system/vl.c +index 8e3357c578..a1e5e68773 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -173,6 +173,8 @@ static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list); + static BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue); + static bool nographic = false; + static int mem_prealloc; /* force preallocation of physical target memory */ ++static ram_addr_t ram2_base; ++static ram_addr_t ram2_size; + static const char *vga_model = NULL; + static DisplayOptions dpy; + static int num_serial_hds; +@@ -504,6 +506,23 @@ static QemuOptsList qemu_action_opts = { + }, + }; + ++static QemuOptsList qemu_mem2_opts = { ++ .name = "mem2", ++ .merge_lists = true, ++ .head = QTAILQ_HEAD_INITIALIZER(qemu_mem2_opts.head), ++ .desc = { ++ { ++ .name = "base", ++ .type = QEMU_OPT_SIZE, ++ }, ++ { ++ .name = "size", ++ .type = QEMU_OPT_SIZE, ++ }, ++ { /* end of list */ } ++ }, ++}; ++ + const char *qemu_get_vm_name(void) + { + return qemu_name; +@@ -1932,6 +1951,9 @@ static void qemu_apply_machine_options(QDict *qdict) + { + object_set_properties_from_keyval(OBJECT(current_machine), qdict, false, &error_fatal); + ++ current_machine->ram2_size = ram2_size; ++ current_machine->ram2_base = ram2_base; ++ + if (semihosting_enabled(false) && !semihosting_get_argc()) { + /* fall back to the -kernel/-append */ + semihosting_arg_fallback(current_machine->kernel_filename, current_machine->kernel_cmdline); +@@ -2094,11 +2116,57 @@ static void parse_memory_options(void) + loc_pop(&loc); + } + ++static void set_mem2_options(void) ++{ ++ uint64_t sz, base; ++ const char *mem_str; ++ QemuOpts *opts = qemu_find_opts_singleton("mem2"); ++ Location loc; ++ ++ loc_push_none(&loc); ++ qemu_opts_loc_restore(opts); ++ ++ mem_str = qemu_opt_get(opts, "base"); ++ if (mem_str) { ++ if (!*mem_str) { ++ error_report("missing 'base' option value"); ++ exit(EXIT_FAILURE); ++ } ++ ++ base = qemu_opt_get_size(opts, "base", ram2_base); ++ ram2_base = base; ++ } ++ ++ mem_str = qemu_opt_get(opts, "size"); ++ if (mem_str) { ++ if (!*mem_str) { ++ error_report("missing 'base' option value"); ++ exit(EXIT_FAILURE); ++ } ++ ++ sz = qemu_opt_get_size(opts, "size", ram2_size); ++ ram2_size = sz; ++ } ++ ++ if (ram2_base && !ram2_size){ ++ error_report("missing 'size' option value"); ++ exit(EXIT_FAILURE); ++ } ++ if (!ram2_base && ram2_size){ ++ error_report("missing 'base' option value"); ++ exit(EXIT_FAILURE); ++ } ++ ++ loc_pop(&loc); ++} ++ + static void qemu_create_machine(QDict *qdict) + { + MachineClass *machine_class = select_machine(qdict, &error_fatal); + object_set_machine_compat_props(machine_class->compat_props); + ++ set_mem2_options(); ++ + current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class))); + object_property_add_child(object_get_root(), "machine", + OBJECT(current_machine)); +@@ -2777,6 +2845,7 @@ void qemu_init(int argc, char **argv) + qemu_add_opts(&qemu_semihosting_config_opts); + qemu_add_opts(&qemu_fw_cfg_opts); + qemu_add_opts(&qemu_action_opts); ++ qemu_add_opts(&qemu_mem2_opts); + qemu_add_run_with_opts(); + module_call_init(MODULE_INIT_OPTS); + +@@ -3596,6 +3665,13 @@ void qemu_init(int argc, char **argv) + case QEMU_OPTION_nouserconfig: + /* Nothing to be parsed here. Especially, do not error out below. */ + break; ++ case QEMU_OPTION_mem2: ++ opts = qemu_opts_parse_noisily(qemu_find_opts("mem2"), ++ optarg, false); ++ if (!opts) { ++ exit(EXIT_FAILURE); ++ } ++ break; + #if defined(CONFIG_POSIX) + case QEMU_OPTION_runas: + if (!os_set_runas(optarg)) { +-- +2.41.0.windows.1 + diff --git a/hw-i386-amd_iommu-Don-t-leak-memory-in-amdvi_update_.patch b/hw-i386-amd_iommu-Don-t-leak-memory-in-amdvi_update_.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6e73f02be6b54c61f6cc57d2ff58c1630c98745 --- /dev/null +++ b/hw-i386-amd_iommu-Don-t-leak-memory-in-amdvi_update_.patch @@ -0,0 +1,50 @@ +From 1b0d08faf1daaed39809ed1a3516eaa0f7d61534 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Wed, 31 Jul 2024 18:00:19 +0100 +Subject: [PATCH] hw/i386/amd_iommu: Don't leak memory in amdvi_update_iotlb() + +In amdvi_update_iotlb() we will only put a new entry in the hash +table if to_cache.perm is not IOMMU_NONE. However we allocate the +memory for the new AMDVIIOTLBEntry and for the hash table key +regardless. This means that in the IOMMU_NONE case we will leak the +memory we alloacted. + +Move the allocations into the if() to the point where we know we're +going to add the item to the hash table. + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2452 +Signed-off-by: Peter Maydell +Message-Id: <20240731170019.3590563-1-peter.maydell@linaro.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9a45b0761628cc59267b3283a85d15294464ac31) +Signed-off-by: zhujun2 +--- + hw/i386/amd_iommu.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c +index 4203144da9..12742b1433 100644 +--- a/hw/i386/amd_iommu.c ++++ b/hw/i386/amd_iommu.c +@@ -346,12 +346,12 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, + uint64_t gpa, IOMMUTLBEntry to_cache, + uint16_t domid) + { +- AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); +- uint64_t *key = g_new(uint64_t, 1); +- uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; +- + /* don't cache erroneous translations */ + if (to_cache.perm != IOMMU_NONE) { ++ AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); ++ uint64_t *key = g_new(uint64_t, 1); ++ uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; ++ + trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), + PCI_FUNC(devid), gpa, to_cache.translated_addr); + +-- +2.41.0.windows.1 + diff --git a/hw-ide-check-null-block-before-_cancel_dma_sync.patch b/hw-ide-check-null-block-before-_cancel_dma_sync.patch deleted file mode 100644 index 1ff20a9683ec88de3d3a67086ffc82eedff9697e..0000000000000000000000000000000000000000 --- a/hw-ide-check-null-block-before-_cancel_dma_sync.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 3b23698e240bd0efe987cf113e3bc8d233991d21 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Wed, 14 Oct 2020 15:57:18 +0800 -Subject: [PATCH] hw/ide: check null block before _cancel_dma_sync - -fix CVE-2020-25743 - -patch link: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg05967.html - -When canceling an i/o operation via ide_cancel_dam_sync(), -a block pointer may be null. Add check to avoid null pointer -dereference. - - -> https://ruhr-uni-bochum.sciebo.de/s/NNWP2GfwzYKeKwE?path=%2Fide_nullptr1 - ==1803100==Hint: address points to the zero page. - #0 blk_bs ../block/block-backend.c:714 - #1 blk_drain ../block/block-backend.c:1715 - #2 ide_cancel_dma_sync ../hw/ide/core.c:723 - #3 bmdma_cmd_writeb ../hw/ide/core.c:723 - #4 bmdma_write ../hw/ide/pci.c:298 - #5 memory_region_write_accessor ../softmmu/memory.c:483 - #6 access_with_adjusted_size ../softmmu/memory.c:544 - #7 memory_region_dispatch_write ../softmmu/memory.c:1465 - #8 flatview_write_continue ../exe.c:3176 - ... - -Reported-by: Ruhr-University -Signed-off-by: Prasad J Pandit ---- - hw/ide/core.c | 1 + - hw/ide/pci.c | 5 ++++- - 2 files changed, 5 insertions(+), 1 deletion(-) - -diff --git a/hw/ide/core.c b/hw/ide/core.c -index f76f7e5234..8105187f49 100644 ---- a/hw/ide/core.c -+++ b/hw/ide/core.c -@@ -718,6 +718,7 @@ void ide_cancel_dma_sync(IDEState *s) - * whole DMA operation will be submitted to disk with a single - * aio operation with preadv/pwritev. - */ -+ assert(s->blk); - if (s->bus->dma->aiocb) { - trace_ide_cancel_dma_sync_remaining(); - blk_drain(s->blk); -diff --git a/hw/ide/pci.c b/hw/ide/pci.c -index b50091b615..b47e675456 100644 ---- a/hw/ide/pci.c -+++ b/hw/ide/pci.c -@@ -295,7 +295,10 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) - /* Ignore writes to SSBM if it keeps the old value */ - if ((val & BM_CMD_START) != (bm->cmd & BM_CMD_START)) { - if (!(val & BM_CMD_START)) { -- ide_cancel_dma_sync(idebus_active_if(bm->bus)); -+ IDEState *s = idebus_active_if(bm->bus); -+ if (s->blk) { -+ ide_cancel_dma_sync(s); -+ } - bm->status &= ~BM_STATUS_DMAING; - } else { - bm->cur_addr = bm->addr; --- -2.23.0 - diff --git a/hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch b/hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f026bf5734f19a3cbb11f5ce0c44c8587be139c --- /dev/null +++ b/hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch @@ -0,0 +1,204 @@ +From d6f75f9e532a4a4b6bb4610049f4fa7f26160733 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Thu, 20 Feb 2025 19:24:18 +0800 +Subject: [PATCH] hw/intc: Add extioi ability of 256 vcpu interrupt routing + +Add the feature field for the CPU-encoded interrupt +route to extioi and the corresponding mechanism for +backup recovery. + +Signed-off-by: Xianglai Li +--- + hw/intc/loongarch_extioi_kvm.c | 65 ++++++++++++++++++++++++++++-- + hw/loongarch/virt.c | 2 + + include/hw/intc/loongarch_extioi.h | 4 ++ + linux-headers/asm-loongarch/kvm.h | 10 +++++ + 4 files changed, 77 insertions(+), 4 deletions(-) + +diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c +index f5bbc33255..2e7c764b7c 100644 +--- a/hw/intc/loongarch_extioi_kvm.c ++++ b/hw/intc/loongarch_extioi_kvm.c +@@ -18,8 +18,32 @@ + static void kvm_extioi_access_regs(int fd, uint64_t addr, + void *val, int is_write) + { +- kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS, +- addr, val, is_write, &error_abort); ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS, ++ addr, val, is_write, &error_abort); ++} ++ ++static void kvm_extioi_access_sw_status(int fd, uint64_t addr, ++ void *val, bool is_write) ++{ ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS, ++ addr, val, is_write, &error_abort); ++} ++ ++static void kvm_extioi_save_load_sw_status(void *opaque, bool is_write) ++{ ++ KVMLoongArchExtIOI *s = (KVMLoongArchExtIOI *)opaque; ++ KVMLoongArchExtIOIClass *class = KVM_LOONGARCH_EXTIOI_GET_CLASS(s); ++ int fd = class->dev_fd; ++ int addr; ++ ++ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU; ++ kvm_extioi_access_sw_status(fd, addr, (void *)&s->num_cpu, is_write); ++ ++ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE; ++ kvm_extioi_access_sw_status(fd, addr, (void *)&s->features, is_write); ++ ++ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE; ++ kvm_extioi_access_sw_status(fd, addr, (void *)&s->status, is_write); + } + + static int kvm_loongarch_extioi_pre_save(void *opaque) +@@ -41,6 +65,8 @@ static int kvm_loongarch_extioi_pre_save(void *opaque) + kvm_extioi_access_regs(fd, EXTIOI_COREISR_START, + (void *)s->coreisr, false); + ++ kvm_extioi_save_load_sw_status(opaque, false); ++ + return 0; + } + +@@ -61,12 +87,19 @@ static int kvm_loongarch_extioi_post_load(void *opaque, int version_id) + (void *)s->sw_coremap, true); + kvm_extioi_access_regs(fd, EXTIOI_COREISR_START, (void *)s->coreisr, true); + ++ kvm_extioi_save_load_sw_status(opaque, true); ++ ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, ++ KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED, ++ NULL, true, &error_abort); ++ + return 0; + } + + static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + { + KVMLoongArchExtIOIClass *extioi_class = KVM_LOONGARCH_EXTIOI_GET_CLASS(dev); ++ KVMLoongArchExtIOI *s = KVM_LOONGARCH_EXTIOI(dev); + struct kvm_create_device cd = {0}; + Error *err = NULL; + int ret,i; +@@ -77,6 +110,10 @@ static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + return; + } + ++ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { ++ s->features |= EXTIOI_VIRT_HAS_FEATURES; ++ } ++ + if (!extioi_class->is_created) { + cd.type = KVM_DEV_TYPE_LA_EXTIOI; + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); +@@ -87,6 +124,15 @@ static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + } + extioi_class->is_created = true; + extioi_class->dev_fd = cd.fd; ++ ++ kvm_device_access(cd.fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, ++ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU, ++ &s->num_cpu, true, NULL); ++ ++ kvm_device_access(cd.fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, ++ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE, ++ &s->features, true, NULL); ++ + fprintf(stdout, "Create LoongArch extioi irqchip in KVM done!\n"); + } + +@@ -102,8 +148,8 @@ static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + + static const VMStateDescription vmstate_kvm_extioi_core = { + .name = "kvm-extioi-single", +- .version_id = 1, +- .minimum_version_id = 1, ++ .version_id = 2, ++ .minimum_version_id = 2, + .pre_save = kvm_loongarch_extioi_pre_save, + .post_load = kvm_loongarch_extioi_post_load, + .fields = (VMStateField[]) { +@@ -119,10 +165,20 @@ static const VMStateDescription vmstate_kvm_extioi_core = { + EXTIOI_IRQS_IPMAP_SIZE / 4), + VMSTATE_UINT32_ARRAY(coremap, KVMLoongArchExtIOI, EXTIOI_IRQS / 4), + VMSTATE_UINT8_ARRAY(sw_coremap, KVMLoongArchExtIOI, EXTIOI_IRQS), ++ VMSTATE_UINT32(num_cpu, KVMLoongArchExtIOI), ++ VMSTATE_UINT32(features, KVMLoongArchExtIOI), ++ VMSTATE_UINT32(status, KVMLoongArchExtIOI), + VMSTATE_END_OF_LIST() + } + }; + ++static Property extioi_properties[] = { ++ DEFINE_PROP_UINT32("num-cpu", KVMLoongArchExtIOI, num_cpu, 1), ++ DEFINE_PROP_BIT("has-virtualization-extension", KVMLoongArchExtIOI, ++ features, EXTIOI_HAS_VIRT_EXTENSION, 0), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ + static void kvm_loongarch_extioi_class_init(ObjectClass *oc, void *data) + { + DeviceClass *dc = DEVICE_CLASS(oc); +@@ -131,6 +187,7 @@ static void kvm_loongarch_extioi_class_init(ObjectClass *oc, void *data) + extioi_class->parent_realize = dc->realize; + dc->realize = kvm_loongarch_extioi_realize; + extioi_class->is_created = false; ++ device_class_set_props(dc, extioi_properties); + dc->vmsd = &vmstate_kvm_extioi_core; + } + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index ce026a4c3c..233297d78f 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -874,6 +874,8 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + /* Create EXTIOI device */ + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + extioi = qdev_new(TYPE_KVM_LOONGARCH_EXTIOI); ++ qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.max_cpus); ++ qdev_prop_set_bit(extioi, "has-virtualization-extension", true); + sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); + } else { + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index 9966cd98d3..92b38d5c38 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -94,6 +94,10 @@ struct LoongArchExtIOI { + + struct KVMLoongArchExtIOI { + SysBusDevice parent_obj; ++ uint32_t num_cpu; ++ uint32_t features; ++ uint32_t status; ++ + /* hardware state */ + uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT]; +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 13c1280662..34abd65939 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -141,6 +141,16 @@ struct kvm_iocsr_entry { + + #define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 + ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS 0x40000006 ++#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU 0x0 ++#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE 0x1 ++#define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE 0x2 ++ ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL 0x40000007 ++#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU 0x0 ++#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE 0x1 ++#define KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED 0x3 ++ + #define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000004 + #define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 + +-- +2.41.0.windows.1 + diff --git a/hw-intc-Don-t-clear-pending-bits-on-IRQ-lowering.patch b/hw-intc-Don-t-clear-pending-bits-on-IRQ-lowering.patch new file mode 100644 index 0000000000000000000000000000000000000000..cf78d7e426755e9e71399aff2321496e82199802 --- /dev/null +++ b/hw-intc-Don-t-clear-pending-bits-on-IRQ-lowering.patch @@ -0,0 +1,39 @@ +From b44fc9f3fc91363c55f6ba739f6c09222f979d88 Mon Sep 17 00:00:00 2001 +From: Sergey Makarov +Date: Wed, 18 Sep 2024 17:02:29 +0300 +Subject: [PATCH] hw/intc: Don't clear pending bits on IRQ lowering + +According to PLIC specification (chapter 5), there +is only one case, when interrupt is claimed. Fix +PLIC controller to match this behavior. + +Signed-off-by: Sergey Makarov +Reviewed-by: Alistair Francis +Message-ID: <20240918140229.124329-3-s.makarov@syntacore.com> +Signed-off-by: Alistair Francis +(cherry picked from commit a84be2baa9eca8bc500f866ad943b8f63dc99adf) +Signed-off-by: zhujun2 +--- + hw/intc/sifive_plic.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c +index 5522ede2cf..e5de52bc44 100644 +--- a/hw/intc/sifive_plic.c ++++ b/hw/intc/sifive_plic.c +@@ -349,8 +349,10 @@ static void sifive_plic_irq_request(void *opaque, int irq, int level) + { + SiFivePLICState *s = opaque; + +- sifive_plic_set_pending(s, irq, level > 0); +- sifive_plic_update(s); ++ if (level > 0) { ++ sifive_plic_set_pending(s, irq, true); ++ sifive_plic_update(s); ++ } + } + + static void sifive_plic_realize(DeviceState *dev, Error **errp) +-- +2.41.0.windows.1 + diff --git a/intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch b/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch similarity index 30% rename from intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch rename to hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch index 95c60b02c66e48b2fbe6d64c5e139aa3ecdcfae9..d551fad998c56d23a82c912ab300dcf261bf381d 100644 --- a/intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch +++ b/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch @@ -1,99 +1,278 @@ -From de97ff4a01008ad98f7d69adc4b84843fff3ce19 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 10:59:55 +0800 -Subject: [PATCH] intc/gicv3_cpuif: Factor out gicv3_init_one_cpuif +From 4e0a4443b7c36608fc30dcaaf0db120220111dd2 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 15:26:27 +0100 +Subject: [PATCH] hw/intc/arm-gicv3*: Changes required to (re)init the vCPU + register info -The CPU object of hotplugged CPU will be defer-created (during -hotplug session), so we must factor out some code to let it can -be applied to individual CPU. +vCPU register info needs to be re-initialized each time vCPU is hot-plugged. +This has to be done both for emulation/TCG and KVM case. This is done in +context to the GIC update notification for any vCPU hot-(un)plug events. This +change adds that support and re-factors existing to maximize the code re-use. +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu Signed-off-by: Keqian Zhu Signed-off-by: Salil Mehta --- - hw/intc/arm_gicv3.c | 5 +- - hw/intc/arm_gicv3_cpuif.c | 122 ++++++++++++++++++-------------------- - hw/intc/gicv3_internal.h | 2 +- - 3 files changed, 64 insertions(+), 65 deletions(-) + hw/intc/arm_gicv3.c | 1 + + hw/intc/arm_gicv3_common.c | 7 +- + hw/intc/arm_gicv3_cpuif.c | 257 +++++++++++++++-------------- + hw/intc/arm_gicv3_kvm.c | 7 +- + hw/intc/gicv3_internal.h | 1 + + include/hw/intc/arm_gicv3_common.h | 1 + + 6 files changed, 150 insertions(+), 124 deletions(-) diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c -index 66eaa97198..2fe79f794d 100644 +index 0b8f79a122..e1c7c8c4bc 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c -@@ -367,6 +367,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) - GICv3State *s = ARM_GICV3(dev); - ARMGICv3Class *agc = ARM_GICV3_GET_CLASS(s); - Error *local_err = NULL; -+ int i; - - agc->parent_realize(dev, &local_err); - if (local_err) { -@@ -386,7 +387,9 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) - return; - } - -- gicv3_init_cpuif(s); -+ for (i = 0; i < s->num_cpu; i++) { -+ gicv3_init_one_cpuif(s, i); +@@ -410,6 +410,7 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data) + ARMGICv3Class *agc = ARM_GICV3_CLASS(klass); + + agcc->post_load = arm_gicv3_post_load; ++ agcc->init_cpu_reginfo = gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index fc87fa9369..d051024a30 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -345,10 +345,12 @@ static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) + { + GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data; + CPUState *cpu = gic_info->cpu; ++ ARMGICv3CommonClass *c; + int gic_cpuif_num; + GICv3State *s; + + s = ARM_GICV3_COMMON(gic_info->gic); ++ c = ARM_GICV3_COMMON_GET_CLASS(s); + + /* this shall get us mapped gicv3 cpuif corresponding to mpidr */ + gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu); +@@ -368,7 +370,10 @@ static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) + gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]); + gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu); + +- /* TODO: initialize the registers info for this newly added cpu */ ++ /* initialize the registers info for this newly added cpu */ ++ if (c->init_cpu_reginfo) { ++ c->init_cpu_reginfo(cpu); + } } - - static void arm_gicv3_class_init(ObjectClass *klass, void *data) + + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c -index 3b212d91c8..56aa5efede 100644 +index 0d0eb2f62f..a013510074 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c -@@ -2597,78 +2597,74 @@ static void gicv3_cpuif_el_change_hook(ARMCPU *cpu, void *opaque) - gicv3_cpuif_update(cs); - } - --void gicv3_init_cpuif(GICv3State *s) -+void gicv3_init_one_cpuif(GICv3State *s, int ncpu) +@@ -2782,6 +2782,127 @@ static const ARMCPRegInfo gicv3_cpuif_ich_apxr23_reginfo[] = { + }, + }; + ++void gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ GICv3CPUState *gcs = icc_cs_from_env(&cpu->env); ++ ++ /* ++ * If the CPU doesn't define a GICv3 configuration, probably because ++ * in real hardware it doesn't have one, then we use default values ++ * matching the one used by most Arm CPUs. This applies to: ++ * cpu->gic_num_lrs ++ * cpu->gic_vpribits ++ * cpu->gic_vprebits ++ * cpu->gic_pribits ++ */ ++ ++ /* ++ * Note that we can't just use the GICv3CPUState as an opaque pointer ++ * in define_arm_cp_regs_with_opaque(), because when we're called back ++ * it might be with code translated by CPU 0 but run by CPU 1, in ++ * which case we'd get the wrong value. ++ * So instead we define the regs with no ri->opaque info, and ++ * get back to the GICv3CPUState from the CPUARMState. ++ */ ++ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ ++ /* ++ * The CPU implementation specifies the number of supported ++ * bits of physical priority. For backwards compatibility ++ * of migration, we have a compat property that forces use ++ * of 8 priority bits regardless of what the CPU really has. ++ */ ++ if (gcs->gic->force_8bit_prio) { ++ gcs->pribits = 8; ++ } else { ++ gcs->pribits = cpu->gic_pribits ?: 5; ++ } ++ ++ /* ++ * The GICv3 has separate ID register fields for virtual priority ++ * and preemption bit values, but only a single ID register field ++ * for the physical priority bits. The preemption bit count is ++ * always the same as the priority bit count, except that 8 bits ++ * of priority means 7 preemption bits. We precalculate the ++ * preemption bits because it simplifies the code and makes the ++ * parallels between the virtual and physical bits of the GIC ++ * a bit clearer. ++ */ ++ gcs->prebits = gcs->pribits; ++ if (gcs->prebits == 8) { ++ gcs->prebits--; ++ } ++ /* ++ * Check that CPU code defining pribits didn't violate ++ * architectural constraints our implementation relies on. ++ */ ++ g_assert(gcs->pribits >= 4 && gcs->pribits <= 8); ++ ++ /* ++ * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions ++ * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. ++ */ ++ if (gcs->prebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); ++ } ++ if (gcs->prebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); ++ } ++ ++ if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { ++ int j; ++ ++ gcs->num_list_regs = cpu->gic_num_lrs ?: 4; ++ gcs->vpribits = cpu->gic_vpribits ?: 5; ++ gcs->vprebits = cpu->gic_vprebits ?: 5; ++ ++ /* ++ * Check against architectural constraints: getting these ++ * wrong would be a bug in the CPU code defining these, ++ * and the implementation relies on them holding. ++ */ ++ g_assert(gcs->vprebits <= gcs->vpribits); ++ g_assert(gcs->vprebits >= 5 && gcs->vprebits <= 7); ++ g_assert(gcs->vpribits >= 5 && gcs->vpribits <= 8); ++ ++ define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); ++ ++ for (j = 0; j < gcs->num_list_regs; j++) { ++ /* ++ * Note that the AArch64 LRs are 64-bit; the AArch32 LRs ++ * are split into two cp15 regs, LR (the low part, with the ++ * same encoding as the AArch64 LR) and LRC (the high part). ++ */ ++ ARMCPRegInfo lr_regset[] = { ++ { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, ++ .opc0 = 3, .opc1 = 4, .crn = 12, ++ .crm = 12 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, ++ .cp = 15, .opc1 = 4, .crn = 12, ++ .crm = 14 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ }; ++ define_arm_cp_regs(cpu, lr_regset); ++ } ++ if (gcs->vprebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); ++ } ++ if (gcs->vprebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ } ++ } ++} ++ + static void gicv3_cpuif_el_change_hook(ARMCPU *cpu, void *opaque) { - /* Called from the GICv3 realize function; register our system - * registers with the CPU - */ -- int i; -- -- for (i = 0; i < s->num_cpu; i++) { -- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); + GICv3CPUState *cs = opaque; +@@ -2804,131 +2925,23 @@ void gicv3_init_cpuif(GICv3State *s) + + for (i = 0; i < s->num_cpu; i++) { + ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); - GICv3CPUState *cs = &s->cpu[i]; - +- /* +- * If the CPU doesn't define a GICv3 configuration, probably because +- * in real hardware it doesn't have one, then we use default values +- * matching the one used by most Arm CPUs. This applies to: +- * cpu->gic_num_lrs +- * cpu->gic_vpribits +- * cpu->gic_vprebits +- * cpu->gic_pribits +- */ +- - /* Note that we can't just use the GICv3CPUState as an opaque pointer - * in define_arm_cp_regs_with_opaque(), because when we're called back - * it might be with code translated by CPU 0 but run by CPU 1, in - * which case we'd get the wrong value. - * So instead we define the regs with no ri->opaque info, and - * get back to the GICv3CPUState from the CPUARMState. +- * +- * These CP regs callbacks can be called from either TCG or HVF code. - */ - define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); -- if (arm_feature(&cpu->env, ARM_FEATURE_EL2) -- && cpu->gic_num_lrs) { +- +- /* +- * The CPU implementation specifies the number of supported +- * bits of physical priority. For backwards compatibility +- * of migration, we have a compat property that forces use +- * of 8 priority bits regardless of what the CPU really has. +- */ +- if (s->force_8bit_prio) { +- cs->pribits = 8; +- } else { +- cs->pribits = cpu->gic_pribits ?: 5; +- } +- +- /* +- * The GICv3 has separate ID register fields for virtual priority +- * and preemption bit values, but only a single ID register field +- * for the physical priority bits. The preemption bit count is +- * always the same as the priority bit count, except that 8 bits +- * of priority means 7 preemption bits. We precalculate the +- * preemption bits because it simplifies the code and makes the +- * parallels between the virtual and physical bits of the GIC +- * a bit clearer. +- */ +- cs->prebits = cs->pribits; +- if (cs->prebits == 8) { +- cs->prebits--; +- } +- /* +- * Check that CPU code defining pribits didn't violate +- * architectural constraints our implementation relies on. +- */ +- g_assert(cs->pribits >= 4 && cs->pribits <= 8); + +- /* +- * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions +- * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. +- */ +- if (cs->prebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); +- } +- if (cs->prebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); +- } +- +- if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { - int j; -+ ARMCPU *cpu = ARM_CPU(qemu_get_cpu(ncpu)); -+ GICv3CPUState *cs = &s->cpu[ncpu]; -+ -+ /* Note that we can't just use the GICv3CPUState as an opaque pointer -+ * in define_arm_cp_regs_with_opaque(), because when we're called back -+ * it might be with code translated by CPU 0 but run by CPU 1, in -+ * which case we'd get the wrong value. -+ * So instead we define the regs with no ri->opaque info, and -+ * get back to the GICv3CPUState from the CPUARMState. -+ */ -+ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); -+ if (arm_feature(&cpu->env, ARM_FEATURE_EL2) -+ && cpu->gic_num_lrs) { -+ int j; - -- cs->maintenance_irq = cpu->gicv3_maintenance_interrupt; -+ cs->maintenance_irq = cpu->gicv3_maintenance_interrupt; - -- cs->num_list_regs = cpu->gic_num_lrs; -- cs->vpribits = cpu->gic_vpribits; -- cs->vprebits = cpu->gic_vprebits; -+ cs->num_list_regs = cpu->gic_num_lrs; -+ cs->vpribits = cpu->gic_vpribits; -+ cs->vprebits = cpu->gic_vprebits; - +- +- cs->num_list_regs = cpu->gic_num_lrs ?: 4; +- cs->vpribits = cpu->gic_vpribits ?: 5; +- cs->vprebits = cpu->gic_vprebits ?: 5; +- - /* Check against architectural constraints: getting these - * wrong would be a bug in the CPU code defining these, - * and the implementation relies on them holding. @@ -101,22 +280,23 @@ index 3b212d91c8..56aa5efede 100644 - g_assert(cs->vprebits <= cs->vpribits); - g_assert(cs->vprebits >= 5 && cs->vprebits <= 7); - g_assert(cs->vpribits >= 5 && cs->vpribits <= 8); -+ /* Check against architectural constraints: getting these -+ * wrong would be a bug in the CPU code defining these, -+ * and the implementation relies on them holding. -+ */ -+ g_assert(cs->vprebits <= cs->vpribits); -+ g_assert(cs->vprebits >= 5 && cs->vprebits <= 7); -+ g_assert(cs->vpribits >= 5 && cs->vpribits <= 8); - +- - define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); -+ define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); - +- - for (j = 0; j < cs->num_list_regs; j++) { - /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs - * are split into two cp15 regs, LR (the low part, with the - * same encoding as the AArch64 LR) and LRC (the high part). -- */ ++ if (qemu_enabled_cpu(CPU(cpu))) { ++ GICv3CPUState *cs = icc_cs_from_env(&cpu->env); ++ gicv3_init_cpu_reginfo(CPU(cpu)); ++ if (tcg_enabled() || qtest_enabled()) { ++ /* ++ * We can only trap EL changes with TCG. However the GIC ++ * interrupt state only changes on EL changes involving EL2 or ++ * EL3, so for the non-TCG case this is OK, as EL2 and EL3 can't ++ * exist. + */ - ARMCPRegInfo lr_regset[] = { - { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 12, @@ -134,7 +314,6 @@ index 3b212d91c8..56aa5efede 100644 - .readfn = ich_lr_read, - .writefn = ich_lr_write, - }, -- REGINFO_SENTINEL - }; - define_arm_cp_regs(cpu, lr_regset); - } @@ -143,55 +322,82 @@ index 3b212d91c8..56aa5efede 100644 - } - if (cs->vprebits == 7) { - define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); -- } -+ for (j = 0; j < cs->num_list_regs; j++) { -+ /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs -+ * are split into two cp15 regs, LR (the low part, with the -+ * same encoding as the AArch64 LR) and LRC (the high part). -+ */ -+ ARMCPRegInfo lr_regset[] = { -+ { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, -+ .opc0 = 3, .opc1 = 4, .crn = 12, -+ .crm = 12 + (j >> 3), .opc2 = j & 7, -+ .type = ARM_CP_IO | ARM_CP_NO_RAW, -+ .access = PL2_RW, -+ .readfn = ich_lr_read, -+ .writefn = ich_lr_write, -+ }, -+ { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, -+ .cp = 15, .opc1 = 4, .crn = 12, -+ .crm = 14 + (j >> 3), .opc2 = j & 7, -+ .type = ARM_CP_IO | ARM_CP_NO_RAW, -+ .access = PL2_RW, -+ .readfn = ich_lr_read, -+ .writefn = ich_lr_write, -+ }, -+ REGINFO_SENTINEL -+ }; -+ define_arm_cp_regs(cpu, lr_regset); -+ } -+ if (cs->vprebits >= 6) { -+ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); -+ } -+ if (cs->vprebits == 7) { -+ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, ++ cs); ++ } else { ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); + } } -- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); +- if (tcg_enabled() || qtest_enabled()) { +- /* +- * We can only trap EL changes with TCG. However the GIC interrupt +- * state only changes on EL changes involving EL2 or EL3, so for +- * the non-TCG case this is OK, as EL2 and EL3 can't exist. +- */ +- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); +- } else { +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); +- } + } + } +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index db06c75e2b..dd2a60fa20 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -804,6 +804,10 @@ static void vm_change_state_handler(void *opaque, bool running, } -+ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); } + ++static void kvm_gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++} + + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + { +@@ -837,7 +841,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + for (i = 0; i < s->num_cpu; i++) { + CPUState *cs = qemu_get_cpu(i); + if (qemu_enabled_cpu(cs)) { +- define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++ kvm_gicv3_init_cpu_reginfo(cs); + } + } + +@@ -925,6 +929,7 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) + + agcc->pre_save = kvm_arm_gicv3_get; + agcc->post_load = kvm_arm_gicv3_put; ++ agcc->init_cpu_reginfo = kvm_gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, + &kgc->parent_realize); + resettable_class_set_parent_phases(rc, NULL, kvm_arm_gicv3_reset_hold, NULL, diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h -index 05303a55c8..cfbfe8a549 100644 +index 9d4c1209bd..0bed0f6e2a 100644 --- a/hw/intc/gicv3_internal.h +++ b/hw/intc/gicv3_internal.h -@@ -297,7 +297,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, - void gicv3_dist_set_irq(GICv3State *s, int irq, int level); - void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level); +@@ -709,6 +709,7 @@ void gicv3_redist_vinvall(GICv3CPUState *cs, uint64_t vptaddr); + void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); --void gicv3_init_cpuif(GICv3State *s); -+void gicv3_init_one_cpuif(GICv3State *s, int ncpu); - + void gicv3_init_cpuif(GICv3State *s); ++void gicv3_init_cpu_reginfo(CPUState *cs); + /** * gicv3_cpuif_update: --- -2.19.1 +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 97a48f44b9..b5f8ba17ff 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -325,6 +325,7 @@ struct ARMGICv3CommonClass { + + void (*pre_save)(GICv3State *s); + void (*post_load)(GICv3State *s); ++ void (*init_cpu_reginfo)(CPUState *cs); + }; + + void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, +-- +2.27.0 + diff --git a/hw-intc-arm_gic-Fix-handling-of-NS-view-of-GICC_APR-.patch b/hw-intc-arm_gic-Fix-handling-of-NS-view-of-GICC_APR-.patch new file mode 100644 index 0000000000000000000000000000000000000000..6f311c4c127e90ee8c38b2c65ff0c5fdce207fa8 --- /dev/null +++ b/hw-intc-arm_gic-Fix-handling-of-NS-view-of-GICC_APR-.patch @@ -0,0 +1,61 @@ +From 20541823659dc78a6a7be427f8fc03ccc58c88d1 Mon Sep 17 00:00:00 2001 +From: Andrey Shumilin +Date: Thu, 23 May 2024 16:06:20 +0100 +Subject: [PATCH] hw/intc/arm_gic: Fix handling of NS view of GICC_APR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In gic_cpu_read() and gic_cpu_write(), we delegate the handling of +reading and writing the Non-Secure view of the GICC_APR registers +to functions gic_apr_ns_view() and gic_apr_write_ns_view(). +Unfortunately we got the order of the arguments wrong, swapping the +CPU number and the register number (which the compiler doesn't catch +because they're both integers). + +Most guests probably didn't notice this bug because directly +accessing the APR registers is typically something only done by +firmware when it is doing state save for going into a sleep mode. + +Correct the mismatched call arguments. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Cc: qemu-stable@nongnu.org +Fixes: 51fd06e0ee ("hw/intc/arm_gic: Fix handling of GICC_APR, GICC_NSAPR registers") +Signed-off-by: Andrey Shumilin +[PMM: Rewrote commit message] +Signed-off-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Alex Bennée +(cherry picked from commit daafa78b297291fea36fb4daeed526705fa7c035) +Signed-off-by: zhujun2 +--- + hw/intc/arm_gic.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c +index dfe7a0a729..f0582f7a49 100644 +--- a/hw/intc/arm_gic.c ++++ b/hw/intc/arm_gic.c +@@ -1663,7 +1663,7 @@ static MemTxResult gic_cpu_read(GICState *s, int cpu, int offset, + *data = s->h_apr[gic_get_vcpu_real_id(cpu)]; + } else if (gic_cpu_ns_access(s, cpu, attrs)) { + /* NS view of GICC_APR is the top half of GIC_NSAPR */ +- *data = gic_apr_ns_view(s, regno, cpu); ++ *data = gic_apr_ns_view(s, cpu, regno); + } else { + *data = s->apr[regno][cpu]; + } +@@ -1751,7 +1751,7 @@ static MemTxResult gic_cpu_write(GICState *s, int cpu, int offset, + s->h_apr[gic_get_vcpu_real_id(cpu)] = value; + } else if (gic_cpu_ns_access(s, cpu, attrs)) { + /* NS view of GICC_APR is the top half of GIC_NSAPR */ +- gic_apr_write_ns_view(s, regno, cpu, value); ++ gic_apr_write_ns_view(s, cpu, regno, value); + } else { + s->apr[regno][cpu] = value; + } +-- +2.41.0.windows.1 + diff --git a/hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch b/hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch deleted file mode 100644 index de999b8c89a41d54879d2fd22cc3b852e6c16138..0000000000000000000000000000000000000000 --- a/hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 3e28567104500238b89ea6b4d684c5350194fea9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Mon, 21 Jun 2021 10:12:41 +0800 -Subject: [PATCH] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-20221 - -Per the ARM Generic Interrupt Controller Architecture specification -(document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit, -not 10: - - - 4.3 Distributor register descriptions - - 4.3.15 Software Generated Interrupt Register, GICD_SG - - - Table 4-21 GICD_SGIR bit assignments - - The Interrupt ID of the SGI to forward to the specified CPU - interfaces. The value of this field is the Interrupt ID, in - the range 0-15, for example a value of 0b0011 specifies - Interrupt ID 3. - -Correct the irq mask to fix an undefined behavior (which eventually -lead to a heap-buffer-overflow, see [Buglink]): - - $ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M virt,accel=qtest -qtest stdio - [I 1612088147.116987] OPENED - [R +0.278293] writel 0x8000f00 0xff4affb0 - ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for type 'uint8_t [16][8]' - SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../hw/intc/arm_gic.c:1498:13 - -This fixes a security issue when running with KVM on Arm with -kernel-irqchip=off. (The default is kernel-irqchip=on, which is -unaffected, and which is also the correct choice for performance.) - -Cc: qemu-stable@nongnu.org -Fixes: CVE-2021-20221 -Fixes: 9ee6e8bb ("ARMv7 support.") -Buglink: https://bugs.launchpad.net/qemu/+bug/1913916 -Buglink: https://bugs.launchpad.net/qemu/+bug/1913917 - -Reported-by: Alexander Bulekov's avatarAlexander Bulekov -Signed-off-by: Philippe Mathieu-Daudé's avatarPhilippe Mathieu-Daudé -Message-id: 20210131103401.217160-1-f4bug@amsat.org -Reviewed-by: Peter Maydell's avatarPeter Maydell -Signed-off-by: Peter Maydell's avatarPeter Maydell - -Signed-off-by: Jiajie Li ---- - hw/intc/arm_gic.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c -index 77427a4188..492dabaa1c 100644 ---- a/hw/intc/arm_gic.c -+++ b/hw/intc/arm_gic.c -@@ -1454,7 +1454,7 @@ static void gic_dist_writel(void *opaque, hwaddr offset, - int target_cpu; - - cpu = gic_get_current_cpu(s); -- irq = value & 0x3ff; -+ irq = value & 0xf; - switch ((value >> 24) & 3) { - case 0: - mask = (value >> 16) & ALL_CPU_MASK; --- -2.27.0 - diff --git a/hw-intc-arm_gic-fix-spurious-level-triggered-interru.patch b/hw-intc-arm_gic-fix-spurious-level-triggered-interru.patch new file mode 100644 index 0000000000000000000000000000000000000000..fae3826cd28c892244baea74ed75284156e00462 --- /dev/null +++ b/hw-intc-arm_gic-fix-spurious-level-triggered-interru.patch @@ -0,0 +1,82 @@ +From 0cb9a00d295cbf0ade0a55cea1039aec793fddf0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Kl=C3=B6tzke?= +Date: Fri, 13 Sep 2024 15:31:50 +0100 +Subject: [PATCH] hw/intc/arm_gic: fix spurious level triggered interrupts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On GICv2 and later, level triggered interrupts are pending when either +the interrupt line is asserted or the interrupt was made pending by a +GICD_ISPENDRn write. Making a level triggered interrupt pending by +software persists until either the interrupt is acknowledged or cleared +by writing GICD_ICPENDRn. As long as the interrupt line is asserted, +the interrupt is pending in any case. + +This logic is transparently implemented in gic_test_pending() for +GICv1 and GICv2. The function combines the "pending" irq_state flag +(used for edge triggered interrupts and software requests) and the +line status (tracked in the "level" field). However, we also +incorrectly set the pending flag on a guest write to GICD_ISENABLERn +if the line of a level triggered interrupt was asserted. This keeps +the interrupt pending even if the line is de-asserted after some +time. + +This incorrect logic is a leftover of the initial 11MPCore GIC +implementation. That handles things slightly differently to the +architected GICv1 and GICv2. The 11MPCore TRM does not give a lot of +detail on the corner cases of its GIC's behaviour, and historically +we have not wanted to investigate exactly what it does in reality, so +QEMU's GIC model takes the approach of "retain our existing behaviour +for 11MPCore, and implement the architectural standard for later GIC +revisions". + +On that basis, commit 8d999995e45c10 in 2013 is where we added the +"level-triggered interrupt with the line asserted" handling to +gic_test_pending(), and we deliberately kept the old behaviour of +gic_test_pending() for REV_11MPCORE. That commit should have added +the "only if 11MPCore" condition to the setting of the pending bit on +writes to GICD_ISENABLERn, but forgot it. + +Add the missing "if REV_11MPCORE" condition, so that our behaviour +on GICv1 and GICv2 matches the GIC architecture requirements. + +Cc: qemu-stable@nongnu.org +Fixes: 8d999995e45c10 ("arm_gic: Fix GIC pending behavior") +Signed-off-by: Jan Klötzke +Message-id: 20240911114826.3558302-1-jan.kloetzke@kernkonzept.com +Reviewed-by: Peter Maydell +[PMM: expanded comment a little and converted to coding-style form; + expanded commit message with the historical backstory] +Signed-off-by: Peter Maydell +(cherry picked from commit 110684c9a69a02cbabfbddcd3afa921826ad565c) +Signed-off-by: zhujun2 +--- + hw/intc/arm_gic.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c +index 074cf50af2..dfe7a0a729 100644 +--- a/hw/intc/arm_gic.c ++++ b/hw/intc/arm_gic.c +@@ -1263,9 +1263,14 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, + trace_gic_enable_irq(irq + i); + } + GIC_DIST_SET_ENABLED(irq + i, cm); +- /* If a raised level triggered IRQ enabled then mark +- is as pending. */ +- if (GIC_DIST_TEST_LEVEL(irq + i, mask) ++ /* ++ * If a raised level triggered IRQ enabled then mark ++ * it as pending on 11MPCore. For other GIC revisions we ++ * handle the "level triggered and line asserted" check ++ * at the other end in gic_test_pending(). ++ */ ++ if (s->revision == REV_11MPCORE ++ && GIC_DIST_TEST_LEVEL(irq + i, mask) + && !GIC_DIST_TEST_EDGE_TRIGGER(irq + i)) { + DPRINTF("Set %d pending mask %x\n", irq + i, mask); + GIC_DIST_SET_PENDING(irq + i, mask); +-- +2.41.0.windows.1 + diff --git a/hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch b/hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch deleted file mode 100644 index 2b77e0b0ce8a678b0c13b7f9f852522617b90c71..0000000000000000000000000000000000000000 --- a/hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 6bbfb186c8d66b745aeb08143d3198fcedc52d6c Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Mon, 6 Apr 2020 11:26:35 +0800 -Subject: [PATCH] hw/intc/gicv3: Add CPU hotplug realize hook - -GICv3 exposes individual CPU realization capability through -this hook. It will be used for hotplugged CPU. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/intc/arm_gicv3.c | 17 ++++++++++++++++- - hw/intc/arm_gicv3_common.c | 8 ++++++++ - hw/intc/arm_gicv3_kvm.c | 11 +++++++++++ - include/hw/intc/arm_gicv3.h | 2 ++ - include/hw/intc/arm_gicv3_common.h | 4 ++++ - 5 files changed, 41 insertions(+), 1 deletion(-) - -diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c -index 2fe79f794d..cacef26546 100644 ---- a/hw/intc/arm_gicv3.c -+++ b/hw/intc/arm_gicv3.c -@@ -361,6 +361,19 @@ static const MemoryRegionOps gic_ops[] = { - } - }; - -+static void gicv3_cpu_realize(GICv3State *s, int i) -+{ -+ gicv3_init_one_cpuif(s, i); -+} -+ -+static void arm_gicv3_cpu_hotplug_realize(GICv3State *s, int ncpu) -+{ -+ ARMGICv3Class *agc = ARM_GICV3_GET_CLASS(s); -+ -+ agc->parent_cpu_hotplug_realize(s, ncpu); -+ gicv3_cpu_realize(s, ncpu); -+} -+ - static void arm_gic_realize(DeviceState *dev, Error **errp) - { - /* Device instance realize function for the GIC sysbus device */ -@@ -388,7 +401,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) - } - - for (i = 0; i < s->num_cpu; i++) { -- gicv3_init_one_cpuif(s, i); -+ gicv3_cpu_realize(s, i); - } - } - -@@ -398,6 +411,8 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data) - ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass); - ARMGICv3Class *agc = ARM_GICV3_CLASS(klass); - -+ agc->parent_cpu_hotplug_realize = agcc->cpu_hotplug_realize; -+ agcc->cpu_hotplug_realize = arm_gicv3_cpu_hotplug_realize; - agcc->post_load = arm_gicv3_post_load; - device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); - } -diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c -index 798f295d7c..8740a52c9f 100644 ---- a/hw/intc/arm_gicv3_common.c -+++ b/hw/intc/arm_gicv3_common.c -@@ -313,6 +313,11 @@ static void arm_gicv3_common_cpu_realize(GICv3State *s, int ncpu) - gicv3_set_gicv3state(cpu, &s->cpu[ncpu]); - } - -+static void arm_gicv3_common_cpu_hotplug_realize(GICv3State *s, int ncpu) -+{ -+ arm_gicv3_common_cpu_realize(s, ncpu); -+} -+ - static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) - { - GICv3State *s = ARM_GICV3_COMMON(dev); -@@ -357,6 +362,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) - - for (i = 0; i < s->num_cpu; i++) { - CPUState *cpu = qemu_get_cpu(i); -+ - uint64_t cpu_affid; - int last; - -@@ -508,12 +514,14 @@ static Property arm_gicv3_common_properties[] = { - static void arm_gicv3_common_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -+ ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass); - ARMLinuxBootIfClass *albifc = ARM_LINUX_BOOT_IF_CLASS(klass); - - dc->reset = arm_gicv3_common_reset; - dc->realize = arm_gicv3_common_realize; - dc->props = arm_gicv3_common_properties; - dc->vmsd = &vmstate_gicv3; -+ agcc->cpu_hotplug_realize = arm_gicv3_common_cpu_hotplug_realize; - albifc->arm_linux_init = arm_gic_common_linux_init; - } - -diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c -index b2936938cb..f8d7be5479 100644 ---- a/hw/intc/arm_gicv3_kvm.c -+++ b/hw/intc/arm_gicv3_kvm.c -@@ -78,6 +78,7 @@ typedef struct KVMARMGICv3Class { - ARMGICv3CommonClass parent_class; - DeviceRealize parent_realize; - void (*parent_reset)(DeviceState *dev); -+ CPUHotplugRealize parent_cpu_hotplug_realize; - } KVMARMGICv3Class; - - static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level) -@@ -768,6 +769,14 @@ static void kvm_arm_gicv3_cpu_realize(GICv3State *s, int ncpu) - define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); - } - -+static void kvm_arm_gicv3_cpu_hotplug_realize(GICv3State *s, int ncpu) -+{ -+ KVMARMGICv3Class *kagcc = KVM_ARM_GICV3_GET_CLASS(s); -+ -+ kagcc->parent_cpu_hotplug_realize(s, ncpu); -+ kvm_arm_gicv3_cpu_realize(s, ncpu); -+} -+ - static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) - { - GICv3State *s = KVM_ARM_GICV3(dev); -@@ -884,6 +893,8 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) - ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass); - KVMARMGICv3Class *kgc = KVM_ARM_GICV3_CLASS(klass); - -+ kgc->parent_cpu_hotplug_realize = agcc->cpu_hotplug_realize; -+ agcc->cpu_hotplug_realize = kvm_arm_gicv3_cpu_hotplug_realize; - agcc->pre_save = kvm_arm_gicv3_get; - agcc->post_load = kvm_arm_gicv3_put; - device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, -diff --git a/include/hw/intc/arm_gicv3.h b/include/hw/intc/arm_gicv3.h -index 4a6fd85e22..98f2bdb7e9 100644 ---- a/include/hw/intc/arm_gicv3.h -+++ b/include/hw/intc/arm_gicv3.h -@@ -26,6 +26,8 @@ typedef struct ARMGICv3Class { - ARMGICv3CommonClass parent_class; - /*< public >*/ - -+ CPUHotplugRealize parent_cpu_hotplug_realize; -+ - DeviceRealize parent_realize; - } ARMGICv3Class; - -diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h -index 31ec9a1ae4..45cc50ed3b 100644 ---- a/include/hw/intc/arm_gicv3_common.h -+++ b/include/hw/intc/arm_gicv3_common.h -@@ -286,11 +286,15 @@ GICV3_BITMAP_ACCESSORS(edge_trigger) - #define ARM_GICV3_COMMON_GET_CLASS(obj) \ - OBJECT_GET_CLASS(ARMGICv3CommonClass, (obj), TYPE_ARM_GICV3_COMMON) - -+typedef void (*CPUHotplugRealize)(GICv3State *s, int ncpu); -+ - typedef struct ARMGICv3CommonClass { - /*< private >*/ - SysBusDeviceClass parent_class; - /*< public >*/ - -+ CPUHotplugRealize cpu_hotplug_realize; -+ - void (*pre_save)(GICv3State *s); - void (*post_load)(GICv3State *s); - } ARMGICv3CommonClass; --- -2.19.1 diff --git a/hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch b/hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d3c57b4e3e803ef673888d6e34af009ed660a11 --- /dev/null +++ b/hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch @@ -0,0 +1,297 @@ +From 4440ab99f7f7b04ef79f6b35b8330edf7fe66002 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Fri, 15 Dec 2023 11:07:36 +0800 +Subject: [PATCH] hw/intc/loongarch_extioi: Add dynamic cpu number support + +On LoongArch physical machine, one extioi interrupt controller only +supports 4 cpus. With processor more than 4 cpus, there are multiple +extioi interrupt controllers; if interrupts need to be routed to +other cpus, they are forwarded from extioi node0 to other extioi nodes. + +On virt machine model, there is simple extioi interrupt device model. +All cpus can access register of extioi interrupt controller, however +interrupt can only be route to 4 vcpu for compatible with old kernel. + +This patch adds dynamic cpu number support about extioi interrupt. +With old kernel legacy extioi model is used, however kernel can detect +and choose new route method in future, so that interrupt can be routed to +all vcpus. + +confict: + +++<<<<<<< HEAD + + .fields = (VMStateField[]) { +++======= ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT32_ARRAY(coreisr, ExtIOICore, EXTIOI_IRQS_GROUP_COUNT), ++ VMSTATE_END_OF_LIST() ++ } ++ }; ++ ++ static const VMStateDescription vmstate_loongarch_extioi = { ++ .name = TYPE_LOONGARCH_EXTIOI, ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { +++>>>>>>> hw/intc/loongarch_extioi: Add dynamic cpu number support + +solve: + +save: hw/intc/loongarch_extioi: Add dynamic cpu number support + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-4-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_extioi.c | 109 +++++++++++++++++++---------- + hw/loongarch/virt.c | 3 +- + include/hw/intc/loongarch_extioi.h | 11 ++- + 3 files changed, 82 insertions(+), 41 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index 77b4776958..28802bf3ef 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -8,6 +8,7 @@ + #include "qemu/osdep.h" + #include "qemu/module.h" + #include "qemu/log.h" ++#include "qapi/error.h" + #include "hw/irq.h" + #include "hw/sysbus.h" + #include "hw/loongarch/virt.h" +@@ -32,23 +33,23 @@ static void extioi_update_irq(LoongArchExtIOI *s, int irq, int level) + if (((s->enable[irq_index]) & irq_mask) == 0) { + return; + } +- s->coreisr[cpu][irq_index] |= irq_mask; +- found = find_first_bit(s->sw_isr[cpu][ipnum], EXTIOI_IRQS); +- set_bit(irq, s->sw_isr[cpu][ipnum]); ++ s->cpu[cpu].coreisr[irq_index] |= irq_mask; ++ found = find_first_bit(s->cpu[cpu].sw_isr[ipnum], EXTIOI_IRQS); ++ set_bit(irq, s->cpu[cpu].sw_isr[ipnum]); + if (found < EXTIOI_IRQS) { + /* other irq is handling, need not update parent irq level */ + return; + } + } else { +- s->coreisr[cpu][irq_index] &= ~irq_mask; +- clear_bit(irq, s->sw_isr[cpu][ipnum]); +- found = find_first_bit(s->sw_isr[cpu][ipnum], EXTIOI_IRQS); ++ s->cpu[cpu].coreisr[irq_index] &= ~irq_mask; ++ clear_bit(irq, s->cpu[cpu].sw_isr[ipnum]); ++ found = find_first_bit(s->cpu[cpu].sw_isr[ipnum], EXTIOI_IRQS); + if (found < EXTIOI_IRQS) { + /* other irq is handling, need not update parent irq level */ + return; + } + } +- qemu_set_irq(s->parent_irq[cpu][ipnum], level); ++ qemu_set_irq(s->cpu[cpu].parent_irq[ipnum], level); + } + + static void extioi_setirq(void *opaque, int irq, int level) +@@ -96,7 +97,7 @@ static MemTxResult extioi_readw(void *opaque, hwaddr addr, uint64_t *data, + index = (offset - EXTIOI_COREISR_START) >> 2; + /* using attrs to get current cpu index */ + cpu = attrs.requester_id; +- *data = s->coreisr[cpu][index]; ++ *data = s->cpu[cpu].coreisr[index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END - 1: + index = (offset - EXTIOI_COREMAP_START) >> 2; +@@ -189,8 +190,8 @@ static MemTxResult extioi_writew(void *opaque, hwaddr addr, + index = (offset - EXTIOI_COREISR_START) >> 2; + /* using attrs to get current cpu index */ + cpu = attrs.requester_id; +- old_data = s->coreisr[cpu][index]; +- s->coreisr[cpu][index] = old_data & ~val; ++ old_data = s->cpu[cpu].coreisr[index]; ++ s->cpu[cpu].coreisr[index] = old_data & ~val; + /* write 1 to clear interrupt */ + old_data &= val; + irq = ctz32(old_data); +@@ -248,14 +249,61 @@ static const MemoryRegionOps extioi_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + +-static const VMStateDescription vmstate_loongarch_extioi = { +- .name = TYPE_LOONGARCH_EXTIOI, ++static void loongarch_extioi_realize(DeviceState *dev, Error **errp) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(dev); ++ SysBusDevice *sbd = SYS_BUS_DEVICE(dev); ++ int i, pin; ++ ++ if (s->num_cpu == 0) { ++ error_setg(errp, "num-cpu must be at least 1"); ++ return; ++ } ++ ++ for (i = 0; i < EXTIOI_IRQS; i++) { ++ sysbus_init_irq(sbd, &s->irq[i]); ++ } ++ ++ qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS); ++ memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, ++ s, "extioi_system_mem", 0x900); ++ sysbus_init_mmio(sbd, &s->extioi_system_mem); ++ s->cpu = g_new0(ExtIOICore, s->num_cpu); ++ if (s->cpu == NULL) { ++ error_setg(errp, "Memory allocation for ExtIOICore faile"); ++ return; ++ } ++ ++ for (i = 0; i < s->num_cpu; i++) { ++ for (pin = 0; pin < LS3A_INTC_IP; pin++) { ++ qdev_init_gpio_out(dev, &s->cpu[i].parent_irq[pin], 1); ++ } ++ } ++} ++ ++static void loongarch_extioi_finalize(Object *obj) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(obj); ++ ++ g_free(s->cpu); ++} ++ ++static const VMStateDescription vmstate_extioi_core = { ++ .name = "extioi-core", + .version_id = 1, + .minimum_version_id = 1, +- .fields = (VMStateField[]) { ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT32_ARRAY(coreisr, ExtIOICore, EXTIOI_IRQS_GROUP_COUNT), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static const VMStateDescription vmstate_loongarch_extioi = { ++ .name = TYPE_LOONGARCH_EXTIOI, ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { + VMSTATE_UINT32_ARRAY(bounce, LoongArchExtIOI, EXTIOI_IRQS_GROUP_COUNT), +- VMSTATE_UINT32_2DARRAY(coreisr, LoongArchExtIOI, EXTIOI_CPUS, +- EXTIOI_IRQS_GROUP_COUNT), + VMSTATE_UINT32_ARRAY(nodetype, LoongArchExtIOI, + EXTIOI_IRQS_NODETYPE_COUNT / 2), + VMSTATE_UINT32_ARRAY(enable, LoongArchExtIOI, EXTIOI_IRQS / 32), +@@ -265,45 +313,32 @@ static const VMStateDescription vmstate_loongarch_extioi = { + VMSTATE_UINT8_ARRAY(sw_ipmap, LoongArchExtIOI, EXTIOI_IRQS_IPMAP_SIZE), + VMSTATE_UINT8_ARRAY(sw_coremap, LoongArchExtIOI, EXTIOI_IRQS), + ++ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchExtIOI, num_cpu, ++ vmstate_extioi_core, ExtIOICore), + VMSTATE_END_OF_LIST() + } + }; + +-static void loongarch_extioi_instance_init(Object *obj) +-{ +- SysBusDevice *dev = SYS_BUS_DEVICE(obj); +- LoongArchExtIOI *s = LOONGARCH_EXTIOI(obj); +- int i, cpu, pin; +- +- for (i = 0; i < EXTIOI_IRQS; i++) { +- sysbus_init_irq(dev, &s->irq[i]); +- } +- +- qdev_init_gpio_in(DEVICE(obj), extioi_setirq, EXTIOI_IRQS); +- +- for (cpu = 0; cpu < EXTIOI_CPUS; cpu++) { +- for (pin = 0; pin < LS3A_INTC_IP; pin++) { +- qdev_init_gpio_out(DEVICE(obj), &s->parent_irq[cpu][pin], 1); +- } +- } +- memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, +- s, "extioi_system_mem", 0x900); +- sysbus_init_mmio(dev, &s->extioi_system_mem); +-} ++static Property extioi_properties[] = { ++ DEFINE_PROP_UINT32("num-cpu", LoongArchExtIOI, num_cpu, 1), ++ DEFINE_PROP_END_OF_LIST(), ++}; + + static void loongarch_extioi_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + ++ dc->realize = loongarch_extioi_realize; ++ device_class_set_props(dc, extioi_properties); + dc->vmsd = &vmstate_loongarch_extioi; + } + + static const TypeInfo loongarch_extioi_info = { + .name = TYPE_LOONGARCH_EXTIOI, + .parent = TYPE_SYS_BUS_DEVICE, +- .instance_init = loongarch_extioi_instance_init, + .instance_size = sizeof(struct LoongArchExtIOI), + .class_init = loongarch_extioi_class_init, ++ .instance_finalize = loongarch_extioi_finalize, + }; + + static void loongarch_extioi_register_types(void) +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 13d19b6da3..c9a680e61a 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -582,6 +582,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + + /* Create EXTIOI device */ + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); ++ qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus); + sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); + memory_region_add_subregion(&lams->system_iocsr, APIC_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); +@@ -590,7 +591,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + * connect ext irq to the cpu irq + * cpu_pin[9:2] <= intc_pin[7:0] + */ +- for (cpu = 0; cpu < MIN(ms->smp.cpus, EXTIOI_CPUS); cpu++) { ++ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { + cpudev = DEVICE(qemu_get_cpu(cpu)); + for (pin = 0; pin < LS3A_INTC_IP; pin++) { + qdev_connect_gpio_out(extioi, (cpu * 8 + pin), +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index 110e5e8873..a0a46b888c 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -40,24 +40,29 @@ + #define EXTIOI_COREMAP_START (0xC00 - APIC_OFFSET) + #define EXTIOI_COREMAP_END (0xD00 - APIC_OFFSET) + ++typedef struct ExtIOICore { ++ uint32_t coreisr[EXTIOI_IRQS_GROUP_COUNT]; ++ DECLARE_BITMAP(sw_isr[LS3A_INTC_IP], EXTIOI_IRQS); ++ qemu_irq parent_irq[LS3A_INTC_IP]; ++} ExtIOICore; ++ + #define TYPE_LOONGARCH_EXTIOI "loongarch.extioi" + OBJECT_DECLARE_SIMPLE_TYPE(LoongArchExtIOI, LOONGARCH_EXTIOI) + struct LoongArchExtIOI { + SysBusDevice parent_obj; ++ uint32_t num_cpu; + /* hardware state */ + uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT]; + uint32_t isr[EXTIOI_IRQS / 32]; +- uint32_t coreisr[EXTIOI_CPUS][EXTIOI_IRQS_GROUP_COUNT]; + uint32_t enable[EXTIOI_IRQS / 32]; + uint32_t ipmap[EXTIOI_IRQS_IPMAP_SIZE / 4]; + uint32_t coremap[EXTIOI_IRQS / 4]; + uint32_t sw_pending[EXTIOI_IRQS / 32]; +- DECLARE_BITMAP(sw_isr[EXTIOI_CPUS][LS3A_INTC_IP], EXTIOI_IRQS); + uint8_t sw_ipmap[EXTIOI_IRQS_IPMAP_SIZE]; + uint8_t sw_coremap[EXTIOI_IRQS]; +- qemu_irq parent_irq[EXTIOI_CPUS][LS3A_INTC_IP]; + qemu_irq irq[EXTIOI_IRQS]; ++ ExtIOICore *cpu; + MemoryRegion extioi_system_mem; + }; + #endif /* LOONGARCH_EXTIOI_H */ +-- +2.27.0 + diff --git a/hw-intc-loongarch_extioi-Add-virt-extension-support.patch b/hw-intc-loongarch_extioi-Add-virt-extension-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..e02d5afcf8f7bf23790de1713829abbdd9b51d73 --- /dev/null +++ b/hw-intc-loongarch_extioi-Add-virt-extension-support.patch @@ -0,0 +1,434 @@ +From 04aef27ede108edd63d288dd3bb395e22a603f42 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 11 Mar 2024 15:01:31 +0800 +Subject: [PATCH] hw/intc/loongarch_extioi: Add virt extension support + +With hardware extioi, irq can be routed to four vcpus with hardware +extioi. This patch adds virt extension support, sot that irq can +be routed to 256 vcpus. + +Signed-off-by: Song Gao +Signed-off-by: Bibo Mao +--- + hw/intc/loongarch_extioi.c | 88 ++++++++++++++++++++- + hw/loongarch/virt.c | 122 ++++++++++++++++++++++++++--- + include/hw/intc/loongarch_extioi.h | 21 +++++ + include/hw/loongarch/virt.h | 3 + + target/loongarch/cpu.h | 1 + + 5 files changed, 220 insertions(+), 15 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index bdfa3b481e..fa23e247ca 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -143,15 +143,17 @@ static inline void extioi_update_sw_coremap(LoongArchExtIOI *s, int irq, + + for (i = 0; i < 4; i++) { + cpu = val & 0xff; +- cpu = ctz32(cpu); +- cpu = (cpu >= 4) ? 0 : cpu; ++ if (!(s->status & BIT(EXTIOI_ENABLE_CPU_ENCODE))) { ++ cpu = ctz32(cpu); ++ cpu = (cpu >= 4) ? 0 : cpu; ++ } + val = val >> 8; + + if (s->sw_coremap[irq + i] == cpu) { + continue; + } + +- if (notify && test_bit(irq, (unsigned long *)s->isr)) { ++ if (notify && test_bit(irq + i, (unsigned long *)s->isr)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ +@@ -265,6 +267,61 @@ static const MemoryRegionOps extioi_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static MemTxResult extioi_virt_readw(void *opaque, hwaddr addr, uint64_t *data, ++ unsigned size, MemTxAttrs attrs) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); ++ ++ switch (addr) { ++ case EXTIOI_VIRT_FEATURES: ++ *data = s->features; ++ break; ++ case EXTIOI_VIRT_CONFIG: ++ *data = s->status; ++ break; ++ default: ++ break; ++ } ++ ++ return MEMTX_OK; ++} ++ ++static MemTxResult extioi_virt_writew(void *opaque, hwaddr addr, ++ uint64_t val, unsigned size, ++ MemTxAttrs attrs) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); ++ ++ switch (addr) { ++ case EXTIOI_VIRT_FEATURES: ++ return MEMTX_ACCESS_ERROR; ++ ++ case EXTIOI_VIRT_CONFIG: ++ /* ++ * extioi features can only be set at disabled status ++ */ ++ if ((s->status & BIT(EXTIOI_ENABLE)) && val) { ++ return MEMTX_ACCESS_ERROR; ++ } ++ ++ s->status = val & s->features; ++ break; ++ default: ++ break; ++ } ++ return MEMTX_OK; ++} ++ ++static const MemoryRegionOps extioi_virt_ops = { ++ .read_with_attrs = extioi_virt_readw, ++ .write_with_attrs = extioi_virt_writew, ++ .impl.min_access_size = 4, ++ .impl.max_access_size = 4, ++ .valid.min_access_size = 4, ++ .valid.max_access_size = 8, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++}; ++ + static void loongarch_extioi_realize(DeviceState *dev, Error **errp) + { + LoongArchExtIOI *s = LOONGARCH_EXTIOI(dev); +@@ -284,6 +341,16 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp) + memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, + s, "extioi_system_mem", 0x900); + sysbus_init_mmio(sbd, &s->extioi_system_mem); ++ ++ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { ++ memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops, ++ s, "extioi_virt", EXTIOI_VIRT_SIZE); ++ sysbus_init_mmio(sbd, &s->virt_extend); ++ s->features |= EXTIOI_VIRT_HAS_FEATURES; ++ } else { ++ s->status |= BIT(EXTIOI_ENABLE); ++ } ++ + s->cpu = g_new0(ExtIOICore, s->num_cpu); + if (s->cpu == NULL) { + error_setg(errp, "Memory allocation for ExtIOICore faile"); +@@ -304,6 +371,16 @@ static void loongarch_extioi_finalize(Object *obj) + g_free(s->cpu); + } + ++static void loongarch_extioi_reset(DeviceState *d) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(d); ++ ++ /* use legacy interrupt routing method by default */ ++ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { ++ s->status = 0; ++ } ++} ++ + static int vmstate_extioi_post_load(void *opaque, int version_id) + { + LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); +@@ -347,12 +424,16 @@ static const VMStateDescription vmstate_loongarch_extioi = { + + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchExtIOI, num_cpu, + vmstate_extioi_core, ExtIOICore), ++ VMSTATE_UINT32(features, LoongArchExtIOI), ++ VMSTATE_UINT32(status, LoongArchExtIOI), + VMSTATE_END_OF_LIST() + } + }; + + static Property extioi_properties[] = { + DEFINE_PROP_UINT32("num-cpu", LoongArchExtIOI, num_cpu, 1), ++ DEFINE_PROP_BIT("has-virtualization-extension", LoongArchExtIOI, features, ++ EXTIOI_HAS_VIRT_EXTENSION, 0), + DEFINE_PROP_END_OF_LIST(), + }; + +@@ -361,6 +442,7 @@ static void loongarch_extioi_class_init(ObjectClass *klass, void *data) + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = loongarch_extioi_realize; ++ dc->reset = loongarch_extioi_reset; + device_class_set_props(dc, extioi_properties); + dc->vmsd = &vmstate_loongarch_extioi; + } +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 6ef40fa24a..01e59f3a95 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -15,6 +15,8 @@ + #include "sysemu/runstate.h" + #include "sysemu/reset.h" + #include "sysemu/rtc.h" ++#include "sysemu/tcg.h" ++#include "sysemu/kvm.h" + #include "hw/loongarch/virt.h" + #include "exec/address-spaces.h" + #include "hw/irq.h" +@@ -54,6 +56,31 @@ struct loaderparams { + const char *initrd_filename; + }; + ++static bool virt_is_veiointc_enabled(LoongArchMachineState *lams) ++{ ++ if (lams->veiointc == ON_OFF_AUTO_OFF) { ++ return false; ++ } ++ return true; ++} ++ ++static void virt_get_veiointc(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ OnOffAuto veiointc = lams->veiointc; ++ ++ visit_type_OnOffAuto(v, name, &veiointc, errp); ++} ++ ++static void virt_set_veiointc(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ ++ visit_type_OnOffAuto(v, name, &lams->veiointc, errp); ++} ++ + static PFlashCFI01 *virt_flash_create1(LoongArchMachineState *lams, + const char *name, + const char *alias_prop_name) +@@ -618,9 +645,18 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + /* Create EXTIOI device */ + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); + qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus); ++ if (virt_is_veiointc_enabled(lams)) { ++ qdev_prop_set_bit(extioi, "has-virtualization-extension", true); ++ } + sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); ++ + memory_region_add_subregion(&lams->system_iocsr, APIC_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); ++ if (virt_is_veiointc_enabled(lams)) { ++ memory_region_add_subregion(&lams->system_iocsr, EXTIOI_VIRT_BASE, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1)); ++ } ++ lams->extioi = extioi; + + /* + * connect ext irq to the cpu irq +@@ -780,32 +816,87 @@ static void loongarch_direct_kernel_boot(LoongArchMachineState *lams, + } + } + +-static void loongarch_qemu_write(void *opaque, hwaddr addr, +- uint64_t val, unsigned size) ++static MemTxResult loongarch_qemu_write(void *opaque, hwaddr addr, uint64_t val, ++ unsigned size, MemTxAttrs attrs) + { ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(opaque); ++ uint64_t features; ++ ++ switch (addr) { ++ case MISC_FUNC_REG: ++ if (!virt_is_veiointc_enabled(lams)) { ++ return MEMTX_OK; ++ } ++ ++ features = address_space_ldl(&lams->as_iocsr, ++ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, ++ attrs, NULL); ++ if (val & BIT_ULL(IOCSRM_EXTIOI_EN)) { ++ features |= BIT(EXTIOI_ENABLE); ++ } ++ if (val & BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE)) { ++ features |= BIT(EXTIOI_ENABLE_INT_ENCODE); ++ } ++ ++ address_space_stl(&lams->as_iocsr, ++ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, ++ features, attrs, NULL); ++ } ++ ++ return MEMTX_OK; + } + +-static uint64_t loongarch_qemu_read(void *opaque, hwaddr addr, unsigned size) ++static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, ++ uint64_t *data, ++ unsigned size, MemTxAttrs attrs) + { ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(opaque); ++ uint64_t ret = 0; ++ int features; ++ + switch (addr) { + case VERSION_REG: +- return 0x11ULL; ++ ret = 0x11ULL; ++ break; + case FEATURE_REG: +- return 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | +- 1ULL << IOCSRF_CSRIPI; ++ ret = 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | ++ 1ULL << IOCSRF_CSRIPI; ++ if (kvm_enabled()) { ++ ret |= 1ULL << IOCSRF_VM; ++ } ++ break; + case VENDOR_REG: +- return 0x6e6f73676e6f6f4cULL; /* "Loongson" */ ++ ret = 0x6e6f73676e6f6f4cULL; /* "Loongson" */ ++ break; + case CPUNAME_REG: +- return 0x303030354133ULL; /* "3A5000" */ ++ ret = 0x303030354133ULL; /* "3A5000" */ ++ break; + case MISC_FUNC_REG: +- return 1ULL << IOCSRM_EXTIOI_EN; ++ if (!virt_is_veiointc_enabled(lams)) { ++ ret |= BIT_ULL(IOCSRM_EXTIOI_EN); ++ break; ++ } ++ ++ features = address_space_ldl(&lams->as_iocsr, ++ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, ++ attrs, NULL); ++ if (features & BIT(EXTIOI_ENABLE)) { ++ ret |= BIT_ULL(IOCSRM_EXTIOI_EN); ++ } ++ ++ if (features & BIT(EXTIOI_ENABLE_INT_ENCODE)) { ++ ret |= BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE); ++ } ++ break; + } +- return 0ULL; ++ ++ *data = ret; ++ return MEMTX_OK; + } + + static const MemoryRegionOps loongarch_qemu_ops = { +- .read = loongarch_qemu_read, +- .write = loongarch_qemu_write, ++ .read_with_attrs = loongarch_qemu_read, ++ .write_with_attrs = loongarch_qemu_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, +@@ -1010,6 +1101,9 @@ static void loongarch_machine_initfn(Object *obj) + { + LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); + ++ if (tcg_enabled()) { ++ lams->veiointc = ON_OFF_AUTO_OFF; ++ } + lams->acpi = ON_OFF_AUTO_AUTO; + lams->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + lams->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +@@ -1197,6 +1291,10 @@ static void loongarch_class_init(ObjectClass *oc, void *data) + NULL, NULL); + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); ++ object_class_property_add(oc, "v-eiointc", "OnOffAuto", ++ virt_get_veiointc, virt_set_veiointc, NULL, NULL); ++ object_class_property_set_description(oc, "v-eiointc", ++ "Enable Virt Extend I/O Interrupt Controller"); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); + #ifdef CONFIG_TPM + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index a0a46b888c..98f348c49d 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -40,6 +40,24 @@ + #define EXTIOI_COREMAP_START (0xC00 - APIC_OFFSET) + #define EXTIOI_COREMAP_END (0xD00 - APIC_OFFSET) + ++#define EXTIOI_VIRT_BASE (0x40000000) ++#define EXTIOI_VIRT_SIZE (0x1000) ++#define EXTIOI_VIRT_FEATURES (0x0) ++#define EXTIOI_HAS_VIRT_EXTENSION (0) ++#define EXTIOI_HAS_ENABLE_OPTION (1) ++#define EXTIOI_HAS_INT_ENCODE (2) ++#define EXTIOI_HAS_CPU_ENCODE (3) ++#define EXTIOI_VIRT_HAS_FEATURES (BIT(EXTIOI_HAS_VIRT_EXTENSION) \ ++ | BIT(EXTIOI_HAS_ENABLE_OPTION)\ ++ | BIT(EXTIOI_HAS_INT_ENCODE) \ ++ | BIT(EXTIOI_HAS_CPU_ENCODE)) ++#define EXTIOI_VIRT_CONFIG (0x4) ++#define EXTIOI_ENABLE (1) ++#define EXTIOI_ENABLE_INT_ENCODE (2) ++#define EXTIOI_ENABLE_CPU_ENCODE (3) ++#define EXTIOI_VIRT_COREMAP_START (0x40) ++#define EXTIOI_VIRT_COREMAP_END (0x240) ++ + typedef struct ExtIOICore { + uint32_t coreisr[EXTIOI_IRQS_GROUP_COUNT]; + DECLARE_BITMAP(sw_isr[LS3A_INTC_IP], EXTIOI_IRQS); +@@ -51,6 +69,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(LoongArchExtIOI, LOONGARCH_EXTIOI) + struct LoongArchExtIOI { + SysBusDevice parent_obj; + uint32_t num_cpu; ++ uint32_t features; ++ uint32_t status; + /* hardware state */ + uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT]; +@@ -64,5 +84,6 @@ struct LoongArchExtIOI { + qemu_irq irq[EXTIOI_IRQS]; + ExtIOICore *cpu; + MemoryRegion extioi_system_mem; ++ MemoryRegion virt_extend; + }; + #endif /* LOONGARCH_EXTIOI_H */ +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 252f7df7f4..99447fd1d6 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -45,16 +45,19 @@ struct LoongArchMachineState { + Notifier machine_done; + Notifier powerdown_notifier; + OnOffAuto acpi; ++ OnOffAuto veiointc; + char *oem_id; + char *oem_table_id; + DeviceState *acpi_ged; + int fdt_size; + DeviceState *platform_bus_dev; ++ DeviceState *extioi; + PCIBus *pci_bus; + PFlashCFI01 *flash[2]; + MemoryRegion system_iocsr; + MemoryRegion iocsr_mem; + AddressSpace as_iocsr; ++ int features; + }; + + #define TYPE_LOONGARCH_MACHINE MACHINE_TYPE_NAME("virt") +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 4aba8aba4c..4749d41c8c 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -36,6 +36,7 @@ + #define CPUNAME_REG 0x20 + #define MISC_FUNC_REG 0x420 + #define IOCSRM_EXTIOI_EN 48 ++#define IOCSRM_EXTIOI_INT_ENCODE 49 + + #define IOCSR_MEM_SIZE 0x428 + +-- +2.33.0 + diff --git a/hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch b/hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch new file mode 100644 index 0000000000000000000000000000000000000000..03a378fe9aa98382c1b3b24780fe6bc39466d756 --- /dev/null +++ b/hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch @@ -0,0 +1,193 @@ +From db8c355d923c218c5ca373c4acd5d13493152889 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Fri, 15 Dec 2023 17:42:58 +0800 +Subject: [PATCH] hw/intc/loongarch_extioi: Add vmstate post_load support + +There are elements sw_ipmap and sw_coremap, which is usd to speed +up irq injection flow. They are saved and restored in vmstate during +migration, indeed they can calculated from hw registers. Here +post_load is added for get sw_ipmap and sw_coremap from extioi hw +state. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-5-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_extioi.c | 120 +++++++++++++++++++++++-------------- + 1 file changed, 76 insertions(+), 44 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index 28802bf3ef..bdfa3b481e 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -130,12 +130,66 @@ static inline void extioi_enable_irq(LoongArchExtIOI *s, int index,\ + } + } + ++static inline void extioi_update_sw_coremap(LoongArchExtIOI *s, int irq, ++ uint64_t val, bool notify) ++{ ++ int i, cpu; ++ ++ /* ++ * loongarch only support little endian, ++ * so we paresd the value with little endian. ++ */ ++ val = cpu_to_le64(val); ++ ++ for (i = 0; i < 4; i++) { ++ cpu = val & 0xff; ++ cpu = ctz32(cpu); ++ cpu = (cpu >= 4) ? 0 : cpu; ++ val = val >> 8; ++ ++ if (s->sw_coremap[irq + i] == cpu) { ++ continue; ++ } ++ ++ if (notify && test_bit(irq, (unsigned long *)s->isr)) { ++ /* ++ * lower irq at old cpu and raise irq at new cpu ++ */ ++ extioi_update_irq(s, irq + i, 0); ++ s->sw_coremap[irq + i] = cpu; ++ extioi_update_irq(s, irq + i, 1); ++ } else { ++ s->sw_coremap[irq + i] = cpu; ++ } ++ } ++} ++ ++static inline void extioi_update_sw_ipmap(LoongArchExtIOI *s, int index, ++ uint64_t val) ++{ ++ int i; ++ uint8_t ipnum; ++ ++ /* ++ * loongarch only support little endian, ++ * so we paresd the value with little endian. ++ */ ++ val = cpu_to_le64(val); ++ for (i = 0; i < 4; i++) { ++ ipnum = val & 0xff; ++ ipnum = ctz32(ipnum); ++ ipnum = (ipnum >= 4) ? 0 : ipnum; ++ s->sw_ipmap[index * 4 + i] = ipnum; ++ val = val >> 8; ++ } ++} ++ + static MemTxResult extioi_writew(void *opaque, hwaddr addr, + uint64_t val, unsigned size, + MemTxAttrs attrs) + { + LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); +- int i, cpu, index, old_data, irq; ++ int cpu, index, old_data, irq; + uint32_t offset; + + trace_loongarch_extioi_writew(addr, val); +@@ -153,20 +207,7 @@ static MemTxResult extioi_writew(void *opaque, hwaddr addr, + */ + index = (offset - EXTIOI_IPMAP_START) >> 2; + s->ipmap[index] = val; +- /* +- * loongarch only support little endian, +- * so we paresd the value with little endian. +- */ +- val = cpu_to_le64(val); +- for (i = 0; i < 4; i++) { +- uint8_t ipnum; +- ipnum = val & 0xff; +- ipnum = ctz32(ipnum); +- ipnum = (ipnum >= 4) ? 0 : ipnum; +- s->sw_ipmap[index * 4 + i] = ipnum; +- val = val >> 8; +- } +- ++ extioi_update_sw_ipmap(s, index, val); + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END - 1: + index = (offset - EXTIOI_ENABLE_START) >> 2; +@@ -205,33 +246,8 @@ static MemTxResult extioi_writew(void *opaque, hwaddr addr, + irq = offset - EXTIOI_COREMAP_START; + index = irq / 4; + s->coremap[index] = val; +- /* +- * loongarch only support little endian, +- * so we paresd the value with little endian. +- */ +- val = cpu_to_le64(val); +- +- for (i = 0; i < 4; i++) { +- cpu = val & 0xff; +- cpu = ctz32(cpu); +- cpu = (cpu >= 4) ? 0 : cpu; +- val = val >> 8; +- +- if (s->sw_coremap[irq + i] == cpu) { +- continue; +- } +- +- if (test_bit(irq, (unsigned long *)s->isr)) { +- /* +- * lower irq at old cpu and raise irq at new cpu +- */ +- extioi_update_irq(s, irq + i, 0); +- s->sw_coremap[irq + i] = cpu; +- extioi_update_irq(s, irq + i, 1); +- } else { +- s->sw_coremap[irq + i] = cpu; +- } +- } ++ ++ extioi_update_sw_coremap(s, irq, val, true); + break; + default: + break; +@@ -288,6 +304,23 @@ static void loongarch_extioi_finalize(Object *obj) + g_free(s->cpu); + } + ++static int vmstate_extioi_post_load(void *opaque, int version_id) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); ++ int i, start_irq; ++ ++ for (i = 0; i < (EXTIOI_IRQS / 4); i++) { ++ start_irq = i * 4; ++ extioi_update_sw_coremap(s, start_irq, s->coremap[i], false); ++ } ++ ++ for (i = 0; i < (EXTIOI_IRQS_IPMAP_SIZE / 4); i++) { ++ extioi_update_sw_ipmap(s, i, s->ipmap[i]); ++ } ++ ++ return 0; ++} ++ + static const VMStateDescription vmstate_extioi_core = { + .name = "extioi-core", + .version_id = 1, +@@ -302,6 +335,7 @@ static const VMStateDescription vmstate_loongarch_extioi = { + .name = TYPE_LOONGARCH_EXTIOI, + .version_id = 2, + .minimum_version_id = 2, ++ .post_load = vmstate_extioi_post_load, + .fields = (const VMStateField[]) { + VMSTATE_UINT32_ARRAY(bounce, LoongArchExtIOI, EXTIOI_IRQS_GROUP_COUNT), + VMSTATE_UINT32_ARRAY(nodetype, LoongArchExtIOI, +@@ -310,8 +344,6 @@ static const VMStateDescription vmstate_loongarch_extioi = { + VMSTATE_UINT32_ARRAY(isr, LoongArchExtIOI, EXTIOI_IRQS / 32), + VMSTATE_UINT32_ARRAY(ipmap, LoongArchExtIOI, EXTIOI_IRQS_IPMAP_SIZE / 4), + VMSTATE_UINT32_ARRAY(coremap, LoongArchExtIOI, EXTIOI_IRQS / 4), +- VMSTATE_UINT8_ARRAY(sw_ipmap, LoongArchExtIOI, EXTIOI_IRQS_IPMAP_SIZE), +- VMSTATE_UINT8_ARRAY(sw_coremap, LoongArchExtIOI, EXTIOI_IRQS), + + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchExtIOI, num_cpu, + vmstate_extioi_core, ExtIOICore), +-- +2.27.0 + diff --git a/hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch b/hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..8309aa9b0b440f5eadfad2fa9d3bee075fe64158 --- /dev/null +++ b/hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch @@ -0,0 +1,286 @@ +From b8f53cfa91e86d5163318f8ade1cca18e94f3eb7 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 13 Dec 2023 12:12:01 +0800 +Subject: [PATCH] hw/intc/loongarch_ipi: Use MemTxAttrs interface for ipi ops + +There are two interface pairs for MemoryRegionOps, read/write and +read_with_attrs/write_with_attrs. The later is better for ipi device +emulation since initial cpu can be parsed from attrs.requester_id. + +And requester_id can be overrided for IOCSR_IPI_SEND and mail_send +function when it is to forward message to another vcpu. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-2-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_ipi.c | 136 +++++++++++++++++++++++----------------- + 1 file changed, 77 insertions(+), 59 deletions(-) + +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index 67858b521c..221246c5cb 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -17,14 +17,16 @@ + #include "target/loongarch/internals.h" + #include "trace.h" + +-static void loongarch_ipi_writel(void *, hwaddr, uint64_t, unsigned); +- +-static uint64_t loongarch_ipi_readl(void *opaque, hwaddr addr, unsigned size) ++static MemTxResult loongarch_ipi_readl(void *opaque, hwaddr addr, ++ uint64_t *data, ++ unsigned size, MemTxAttrs attrs) + { +- IPICore *s = opaque; ++ IPICore *s; ++ LoongArchIPI *ipi = opaque; + uint64_t ret = 0; + int index = 0; + ++ s = &ipi->ipi_core; + addr &= 0xff; + switch (addr) { + case CORE_STATUS_OFF: +@@ -49,10 +51,12 @@ static uint64_t loongarch_ipi_readl(void *opaque, hwaddr addr, unsigned size) + } + + trace_loongarch_ipi_read(size, (uint64_t)addr, ret); +- return ret; ++ *data = ret; ++ return MEMTX_OK; + } + +-static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr) ++static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr, ++ MemTxAttrs attrs) + { + int i, mask = 0, data = 0; + +@@ -62,7 +66,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr) + */ + if ((val >> 27) & 0xf) { + data = address_space_ldl(&env->address_space_iocsr, addr, +- MEMTXATTRS_UNSPECIFIED, NULL); ++ attrs, NULL); + for (i = 0; i < 4; i++) { + /* get mask for byte writing */ + if (val & (0x1 << (27 + i))) { +@@ -74,7 +78,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr) + data &= mask; + data |= (val >> 32) & ~mask; + address_space_stl(&env->address_space_iocsr, addr, +- data, MEMTXATTRS_UNSPECIFIED, NULL); ++ data, attrs, NULL); + } + + static int archid_cmp(const void *a, const void *b) +@@ -103,80 +107,72 @@ static CPUState *ipi_getcpu(int arch_id) + CPUArchId *archid; + + archid = find_cpu_by_archid(machine, arch_id); +- return CPU(archid->cpu); +-} +- +-static void ipi_send(uint64_t val) +-{ +- uint32_t cpuid; +- uint8_t vector; +- CPUState *cs; +- LoongArchCPU *cpu; +- LoongArchIPI *s; +- +- cpuid = extract32(val, 16, 10); +- if (cpuid >= LOONGARCH_MAX_CPUS) { +- trace_loongarch_ipi_unsupported_cpuid("IOCSR_IPI_SEND", cpuid); +- return; ++ if (archid) { ++ return CPU(archid->cpu); + } + +- /* IPI status vector */ +- vector = extract8(val, 0, 5); +- +- cs = ipi_getcpu(cpuid); +- cpu = LOONGARCH_CPU(cs); +- s = LOONGARCH_IPI(cpu->env.ipistate); +- loongarch_ipi_writel(&s->ipi_core, CORE_SET_OFF, BIT(vector), 4); ++ return NULL; + } + +-static void mail_send(uint64_t val) ++static MemTxResult mail_send(uint64_t val, MemTxAttrs attrs) + { + uint32_t cpuid; + hwaddr addr; +- CPULoongArchState *env; + CPUState *cs; +- LoongArchCPU *cpu; + + cpuid = extract32(val, 16, 10); + if (cpuid >= LOONGARCH_MAX_CPUS) { + trace_loongarch_ipi_unsupported_cpuid("IOCSR_MAIL_SEND", cpuid); +- return; ++ return MEMTX_DECODE_ERROR; + } + +- addr = 0x1020 + (val & 0x1c); + cs = ipi_getcpu(cpuid); +- cpu = LOONGARCH_CPU(cs); +- env = &cpu->env; +- send_ipi_data(env, val, addr); ++ if (cs == NULL) { ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* override requester_id */ ++ addr = SMP_IPI_MAILBOX + CORE_BUF_20 + (val & 0x1c); ++ attrs.requester_id = cs->cpu_index; ++ send_ipi_data(&LOONGARCH_CPU(cs)->env, val, addr, attrs); ++ return MEMTX_OK; + } + +-static void any_send(uint64_t val) ++static MemTxResult any_send(uint64_t val, MemTxAttrs attrs) + { + uint32_t cpuid; + hwaddr addr; +- CPULoongArchState *env; + CPUState *cs; +- LoongArchCPU *cpu; + + cpuid = extract32(val, 16, 10); + if (cpuid >= LOONGARCH_MAX_CPUS) { + trace_loongarch_ipi_unsupported_cpuid("IOCSR_ANY_SEND", cpuid); +- return; ++ return MEMTX_DECODE_ERROR; + } + +- addr = val & 0xffff; + cs = ipi_getcpu(cpuid); +- cpu = LOONGARCH_CPU(cs); +- env = &cpu->env; +- send_ipi_data(env, val, addr); ++ if (cs == NULL) { ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* override requester_id */ ++ addr = val & 0xffff; ++ attrs.requester_id = cs->cpu_index; ++ send_ipi_data(&LOONGARCH_CPU(cs)->env, val, addr, attrs); ++ return MEMTX_OK; + } + +-static void loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, +- unsigned size) ++static MemTxResult loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, ++ unsigned size, MemTxAttrs attrs) + { +- IPICore *s = opaque; ++ LoongArchIPI *ipi = opaque; ++ IPICore *s; + int index = 0; ++ uint32_t cpuid; ++ uint8_t vector; ++ CPUState *cs; + ++ s = &ipi->ipi_core; + addr &= 0xff; + trace_loongarch_ipi_write(size, (uint64_t)addr, val); + switch (addr) { +@@ -203,17 +199,35 @@ static void loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, + s->buf[index] = val; + break; + case IOCSR_IPI_SEND: +- ipi_send(val); ++ cpuid = extract32(val, 16, 10); ++ if (cpuid >= LOONGARCH_MAX_CPUS) { ++ trace_loongarch_ipi_unsupported_cpuid("IOCSR_IPI_SEND", cpuid); ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* IPI status vector */ ++ vector = extract8(val, 0, 5); ++ cs = ipi_getcpu(cpuid); ++ if (cs == NULL) { ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* override requester_id */ ++ attrs.requester_id = cs->cpu_index; ++ ipi = LOONGARCH_IPI(LOONGARCH_CPU(cs)->env.ipistate); ++ loongarch_ipi_writel(ipi, CORE_SET_OFF, BIT(vector), 4, attrs); + break; + default: + qemu_log_mask(LOG_UNIMP, "invalid write: %x", (uint32_t)addr); + break; + } ++ ++ return MEMTX_OK; + } + + static const MemoryRegionOps loongarch_ipi_ops = { +- .read = loongarch_ipi_readl, +- .write = loongarch_ipi_writel, ++ .read_with_attrs = loongarch_ipi_readl, ++ .write_with_attrs = loongarch_ipi_writel, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .valid.min_access_size = 4, +@@ -222,24 +236,28 @@ static const MemoryRegionOps loongarch_ipi_ops = { + }; + + /* mail send and any send only support writeq */ +-static void loongarch_ipi_writeq(void *opaque, hwaddr addr, uint64_t val, +- unsigned size) ++static MemTxResult loongarch_ipi_writeq(void *opaque, hwaddr addr, uint64_t val, ++ unsigned size, MemTxAttrs attrs) + { ++ MemTxResult ret = MEMTX_OK; ++ + addr &= 0xfff; + switch (addr) { + case MAIL_SEND_OFFSET: +- mail_send(val); ++ ret = mail_send(val, attrs); + break; + case ANY_SEND_OFFSET: +- any_send(val); ++ ret = any_send(val, attrs); + break; + default: + break; + } ++ ++ return ret; + } + + static const MemoryRegionOps loongarch_ipi64_ops = { +- .write = loongarch_ipi_writeq, ++ .write_with_attrs = loongarch_ipi_writeq, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .valid.min_access_size = 8, +@@ -253,7 +271,7 @@ static void loongarch_ipi_init(Object *obj) + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + memory_region_init_io(&s->ipi_iocsr_mem, obj, &loongarch_ipi_ops, +- &s->ipi_core, "loongarch_ipi_iocsr", 0x48); ++ s, "loongarch_ipi_iocsr", 0x48); + + /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ + s->ipi_iocsr_mem.disable_reentrancy_guard = true; +@@ -261,7 +279,7 @@ static void loongarch_ipi_init(Object *obj) + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem); + + memory_region_init_io(&s->ipi64_iocsr_mem, obj, &loongarch_ipi64_ops, +- &s->ipi_core, "loongarch_ipi64_iocsr", 0x118); ++ s, "loongarch_ipi64_iocsr", 0x118); + sysbus_init_mmio(sbd, &s->ipi64_iocsr_mem); + qdev_init_gpio_out(DEVICE(obj), &s->ipi_core.irq, 1); + } +-- +2.27.0 + diff --git a/hw-intc-openpic-Improve-errors-for-out-of-bounds-pro.patch b/hw-intc-openpic-Improve-errors-for-out-of-bounds-pro.patch new file mode 100644 index 0000000000000000000000000000000000000000..e9e94c64d6e2fc63cfd8fbaa401015327ea221ce --- /dev/null +++ b/hw-intc-openpic-Improve-errors-for-out-of-bounds-pro.patch @@ -0,0 +1,51 @@ +From 15b6c032ed2f92aa3210fe30376119eb468af039 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Wed, 23 Oct 2024 14:19:00 +0800 +Subject: [PATCH] hw/intc/openpic: Improve errors for out of bounds property + values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 627c1e012cb3f14745f9b7d991642894a4402d5c + +The error message doesn't matter much, as the "openpic" device isn't +user-creatable. But it's the last use of +QERR_PROPERTY_VALUE_OUT_OF_RANGE, which has to go. Change the message +just like the previous commit did for x86 CPUs. + +Signed-off-by: Markus Armbruster +Message-ID: <20241010150144.986655-7-armbru@redhat.com> +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Jiao +--- + hw/intc/openpic.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c +index a6f91d4bcd..0f99b77a17 100644 +--- a/hw/intc/openpic.c ++++ b/hw/intc/openpic.c +@@ -41,7 +41,6 @@ + #include "hw/pci/msi.h" + #include "qapi/error.h" + #include "qemu/bitops.h" +-#include "qapi/qmp/qerror.h" + #include "qemu/module.h" + #include "qemu/timer.h" + #include "qemu/error-report.h" +@@ -1535,9 +1534,7 @@ static void openpic_realize(DeviceState *dev, Error **errp) + }; + + if (opp->nb_cpus > MAX_CPU) { +- error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, +- TYPE_OPENPIC, "nb_cpus", (uint64_t)opp->nb_cpus, +- (uint64_t)0, (uint64_t)MAX_CPU); ++ error_setg(errp, "property 'nb_cpus' can be at most %d", MAX_CPU); + return; + } + +-- +2.41.0.windows.1 + diff --git a/hw-intc-riscv_aplic-APLICs-should-add-child-earlier-.patch b/hw-intc-riscv_aplic-APLICs-should-add-child-earlier-.patch new file mode 100644 index 0000000000000000000000000000000000000000..70738c4d50fdfde595a8788f9df55f9997112b69 --- /dev/null +++ b/hw-intc-riscv_aplic-APLICs-should-add-child-earlier-.patch @@ -0,0 +1,49 @@ +From 8ee63ce50289adb4ea346901366bd30aa23e412a Mon Sep 17 00:00:00 2001 +From: "yang.zhang" +Date: Tue, 9 Apr 2024 09:44:45 +0800 +Subject: [PATCH] hw/intc/riscv_aplic: APLICs should add child earlier than + realize + +Since only root APLICs can have hw IRQ lines, aplic->parent should +be initialized first. + +Fixes: e8f79343cf ("hw/intc: Add RISC-V AIA APLIC device emulation") +Reviewed-by: Daniel Henrique Barboza +Signed-off-by: yang.zhang +Cc: qemu-stable +Message-ID: <20240409014445.278-1-gaoshanliukou@163.com> +Signed-off-by: Alistair Francis +(cherry picked from commit c76b121840c6ca79dc6305a5f4bcf17c72217d9c) +Signed-off-by: zhujun2 +--- + hw/intc/riscv_aplic.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c +index c677b5cfbb..2fdf85444e 100644 +--- a/hw/intc/riscv_aplic.c ++++ b/hw/intc/riscv_aplic.c +@@ -974,16 +974,16 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, + qdev_prop_set_bit(dev, "msimode", msimode); + qdev_prop_set_bit(dev, "mmode", mmode); + ++ if (parent) { ++ riscv_aplic_add_child(parent, dev); ++ } ++ + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + if (!is_kvm_aia(msimode)) { + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + } + +- if (parent) { +- riscv_aplic_add_child(parent, dev); +- } +- + if (!msimode) { + for (i = 0; i < num_harts; i++) { + CPUState *cpu = cpu_by_arch_id(hartid_base + i); +-- +2.41.0.windows.1 + diff --git a/hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch b/hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a3eab33573785ccf086284fcd4f8e5ff98f2e70 --- /dev/null +++ b/hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch @@ -0,0 +1,49 @@ +From 74817cbc4ccb4e3b0f6d7b464b5707d3fbc5f686 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 23 Apr 2024 10:40:32 +0800 +Subject: [PATCH] hw/isa/vt82c686: Keep track of PIRQ/PINT pins separately +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from f33274265a242df5d9fdb00915fe72fbb1b2a3c4 + +Move calculation of mask after the switch which sets the function +number for PIRQ/PINT pins to make sure the state of these pins are +kept track of separately and IRQ is raised if any of them is active. + +Cc: qemu-stable@nongnu.org +Fixes: 7e01bd80c1 hw/isa/vt82c686: Bring back via_isa_set_irq() +Signed-off-by: BALATON Zoltan +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240410222543.0EA534E6005@zero.eik.bme.hu> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + hw/isa/vt82c686.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c +index 9c2333a277..0334431219 100644 +--- a/hw/isa/vt82c686.c ++++ b/hw/isa/vt82c686.c +@@ -613,7 +613,7 @@ void via_isa_set_irq(PCIDevice *d, int pin, int level) + ViaISAState *s = VIA_ISA(pci_get_function_0(d)); + uint8_t irq = d->config[PCI_INTERRUPT_LINE], max_irq = 15; + int f = PCI_FUNC(d->devfn); +- uint16_t mask = BIT(f); ++ uint16_t mask; + + switch (f) { + case 0: /* PIRQ/PINT inputs */ +@@ -628,6 +628,7 @@ void via_isa_set_irq(PCIDevice *d, int pin, int level) + } + + /* Keep track of the state of all sources */ ++ mask = BIT(f); + if (level) { + s->irq_state[0] |= mask; + } else { +-- +2.27.0 + diff --git a/hw-loongarch-Add-KVM-IPI-device-support.patch b/hw-loongarch-Add-KVM-IPI-device-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..fea48930cd127ad8258231feb971a9da8e53bbe5 --- /dev/null +++ b/hw-loongarch-Add-KVM-IPI-device-support.patch @@ -0,0 +1,400 @@ +From 24e4e6742bdc8d804760e84f4e4bde5460e1e024 Mon Sep 17 00:00:00 2001 +From: gaosong +Date: Sun, 8 Sep 2024 09:29:00 +0800 +Subject: [PATCH 72/78] hw/loongarch: Add KVM IPI device support + +Added ipi interrupt controller for kvm emulation. +The main process is to send the command word for +creating an ipi device to the kernel. +When the VM is saved, the ioctl obtains the ipi +interrupt controller data in the kernel and saves it. +When the VM is recovered, the saved data is sent to the kernel. + +Signed-off-by: gaosong +Signed-off-by: Xianglai Li +--- + hw/intc/Kconfig | 3 + + hw/intc/loongarch_ipi_kvm.c | 207 ++++++++++++++++++++++++++++++++ + hw/intc/meson.build | 1 + + hw/loongarch/Kconfig | 1 + + hw/loongarch/virt.c | 35 ++++-- + include/hw/intc/loongarch_ipi.h | 23 ++++ + linux-headers/linux/kvm.h | 2 + + target/loongarch/kvm/kvm.c | 4 + + 8 files changed, 263 insertions(+), 13 deletions(-) + create mode 100644 hw/intc/loongarch_ipi_kvm.c + +diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig +index 97d550b06b..cbba74c22e 100644 +--- a/hw/intc/Kconfig ++++ b/hw/intc/Kconfig +@@ -93,6 +93,9 @@ config NIOS2_VIC + config LOONGARCH_IPI + bool + ++config LOONGARCH_IPI_KVM ++ bool ++ + config LOONGARCH_PCH_PIC + bool + select UNIMP +diff --git a/hw/intc/loongarch_ipi_kvm.c b/hw/intc/loongarch_ipi_kvm.c +new file mode 100644 +index 0000000000..fd308eb0c0 +--- /dev/null ++++ b/hw/intc/loongarch_ipi_kvm.c +@@ -0,0 +1,207 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * LoongArch kvm ipi interrupt support ++ * ++ * Copyright (C) 2024 Loongson Technology Corporation Limited ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/qdev-properties.h" ++#include "qemu/typedefs.h" ++#include "hw/intc/loongarch_ipi.h" ++#include "hw/sysbus.h" ++#include "linux/kvm.h" ++#include "migration/vmstate.h" ++#include "qapi/error.h" ++#include "sysemu/kvm.h" ++ ++#define IPI_DEV_FD_UNDEF -1 ++ ++static void kvm_ipi_access_regs(int fd, uint64_t addr, ++ uint32_t *val, int is_write) ++{ ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_IPI_GRP_REGS, ++ addr, val, is_write, &error_abort); ++} ++ ++static int kvm_loongarch_ipi_pre_save(void *opaque) ++{ ++ KVMLoongArchIPI *ipi = (KVMLoongArchIPI *)opaque; ++ KVMLoongArchIPIClass *ipi_class = KVM_LOONGARCH_IPI_GET_CLASS(ipi); ++ IPICore *cpu; ++ uint64_t attr; ++ int cpu_id = 0; ++ int fd = ipi_class->dev_fd; ++ ++ for (cpu_id = 0; cpu_id < ipi->num_cpu; cpu_id++) { ++ cpu = &ipi->cpu[cpu_id]; ++ attr = (cpu_id << 16) | CORE_STATUS_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->status, false); ++ ++ attr = (cpu_id << 16) | CORE_EN_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->en, false); ++ ++ attr = (cpu_id << 16) | CORE_SET_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->set, false); ++ ++ attr = (cpu_id << 16) | CORE_CLEAR_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->clear, false); ++ ++ attr = (cpu_id << 16) | CORE_BUF_20; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[0], false); ++ ++ attr = (cpu_id << 16) | CORE_BUF_28; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[2], false); ++ ++ attr = (cpu_id << 16) | CORE_BUF_30; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[4], false); ++ ++ attr = (cpu_id << 16) | CORE_BUF_38; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[6], false); ++ } ++ ++ return 0; ++} ++ ++static int kvm_loongarch_ipi_post_load(void *opaque, int version_id) ++{ ++ KVMLoongArchIPI *ipi = (KVMLoongArchIPI *)opaque; ++ KVMLoongArchIPIClass *ipi_class = KVM_LOONGARCH_IPI_GET_CLASS(ipi); ++ IPICore *cpu; ++ uint64_t attr; ++ int cpu_id = 0; ++ int fd = ipi_class->dev_fd; ++ ++ for (cpu_id = 0; cpu_id < ipi->num_cpu; cpu_id++) { ++ cpu = &ipi->cpu[cpu_id]; ++ attr = (cpu_id << 16) | CORE_STATUS_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->status, true); ++ ++ attr = (cpu_id << 16) | CORE_EN_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->en, true); ++ ++ attr = (cpu_id << 16) | CORE_SET_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->set, true); ++ ++ attr = (cpu_id << 16) | CORE_CLEAR_OFF; ++ kvm_ipi_access_regs(fd, attr, &cpu->clear, true); ++ ++ attr = (cpu_id << 16) | CORE_BUF_20; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[0], true); ++ ++ attr = (cpu_id << 16) | CORE_BUF_28; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[2], true); ++ ++ attr = (cpu_id << 16) | CORE_BUF_30; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[4], true); ++ ++ attr = (cpu_id << 16) | CORE_BUF_38; ++ kvm_ipi_access_regs(fd, attr, &cpu->buf[6], true); ++ } ++ ++ return 0; ++} ++ ++static void kvm_loongarch_ipi_realize(DeviceState *dev, Error **errp) ++{ ++ KVMLoongArchIPI *ipi = KVM_LOONGARCH_IPI(dev); ++ KVMLoongArchIPIClass *ipi_class = KVM_LOONGARCH_IPI_GET_CLASS(dev); ++ struct kvm_create_device cd = {0}; ++ Error *err = NULL; ++ int ret; ++ ++ if (ipi->num_cpu == 0) { ++ error_setg(errp, "num-cpu must be at least 1"); ++ return; ++ } ++ ++ ipi_class->parent_realize(dev, &err); ++ if (err) { ++ error_propagate(errp, err); ++ return; ++ } ++ ++ ipi->cpu = g_new0(IPICore, ipi->num_cpu); ++ if (ipi->cpu == NULL) { ++ error_setg(errp, "Memory allocation for ExtIOICore faile"); ++ return; ++ } ++ ++ if (!ipi_class->is_created) { ++ cd.type = KVM_DEV_TYPE_LA_IPI; ++ ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); ++ if (ret < 0) { ++ error_setg_errno(errp, errno, "Creating the KVM device failed"); ++ return; ++ } ++ ipi_class->is_created = true; ++ ipi_class->dev_fd = cd.fd; ++ fprintf(stdout, "Create LoongArch IPI irqchip in KVM done!\n"); ++ } ++ ++ assert(ipi_class->dev_fd != IPI_DEV_FD_UNDEF); ++} ++ ++static Property kvm_loongarch_ipi_properties[] = { ++ DEFINE_PROP_UINT32("num-cpu", KVMLoongArchIPI, num_cpu, 1), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ ++static const VMStateDescription vmstate_kvm_ipi_core = { ++ .name = "kvm-ipi-single", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT32(status, IPICore), ++ VMSTATE_UINT32(en, IPICore), ++ VMSTATE_UINT32(set, IPICore), ++ VMSTATE_UINT32(clear, IPICore), ++ VMSTATE_UINT32_ARRAY(buf, IPICore, 8), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static const VMStateDescription vmstate_kvm_loongarch_ipi = { ++ .name = TYPE_KVM_LOONGARCH_IPI, ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .pre_save = kvm_loongarch_ipi_pre_save, ++ .post_load = kvm_loongarch_ipi_post_load, ++ .fields = (VMStateField[]) { ++ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, KVMLoongArchIPI, num_cpu, ++ vmstate_kvm_ipi_core, IPICore), ++ ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static void kvm_loongarch_ipi_class_init(ObjectClass *oc, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(oc); ++ KVMLoongArchIPIClass *ipi_class = KVM_LOONGARCH_IPI_CLASS(oc); ++ ++ ipi_class->parent_realize = dc->realize; ++ dc->realize = kvm_loongarch_ipi_realize; ++ ++ ipi_class->is_created = false; ++ ipi_class->dev_fd = IPI_DEV_FD_UNDEF; ++ ++ device_class_set_props(dc, kvm_loongarch_ipi_properties); ++ ++ dc->vmsd = &vmstate_kvm_loongarch_ipi; ++} ++ ++static const TypeInfo kvm_loongarch_ipi_info = { ++ .name = TYPE_KVM_LOONGARCH_IPI, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(KVMLoongArchIPI), ++ .class_size = sizeof(KVMLoongArchIPIClass), ++ .class_init = kvm_loongarch_ipi_class_init, ++}; ++ ++static void kvm_loongarch_ipi_register_types(void) ++{ ++ type_register_static(&kvm_loongarch_ipi_info); ++} ++ ++type_init(kvm_loongarch_ipi_register_types) +diff --git a/hw/intc/meson.build b/hw/intc/meson.build +index ed355941d1..9deeeb51bb 100644 +--- a/hw/intc/meson.build ++++ b/hw/intc/meson.build +@@ -70,6 +70,7 @@ specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XIVE'], + specific_ss.add(when: 'CONFIG_M68K_IRQC', if_true: files('m68k_irqc.c')) + specific_ss.add(when: 'CONFIG_NIOS2_VIC', if_true: files('nios2_vic.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_IPI', if_true: files('loongarch_ipi.c')) ++specific_ss.add(when: 'CONFIG_LOONGARCH_IPI_KVM', if_true: files('loongarch_ipi_kvm.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_PIC', if_true: files('loongarch_pch_pic.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_MSI', if_true: files('loongarch_pch_msi.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI', if_true: files('loongarch_extioi.c')) +diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig +index b42a8573d4..1e761624c6 100644 +--- a/hw/loongarch/Kconfig ++++ b/hw/loongarch/Kconfig +@@ -14,6 +14,7 @@ config LOONGARCH_VIRT + select LOONGARCH_PCH_PIC + select LOONGARCH_PCH_MSI + select LOONGARCH_EXTIOI ++ select LOONGARCH_IPI_KVM if KVM + select LS7A_RTC + select SMBIOS + select ACPI_CPU_HOTPLUG +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 6159fd9470..f065eb75f8 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -829,16 +829,28 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + * +--------+ +---------+ +---------+ + */ + +- /* Create IPI device */ +- ipi = qdev_new(TYPE_LOONGARCH_IPI); +- qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.max_cpus); +- sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); +- +- /* IPI iocsr memory region */ +- memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0)); +- memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); ++ if (kvm_enabled() && kvm_irqchip_in_kernel()) { ++ ipi = qdev_new(TYPE_KVM_LOONGARCH_IPI); ++ qdev_prop_set_int32(ipi, "num-cpu", ms->smp.max_cpus); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); ++ } else { ++ ipi = qdev_new(TYPE_LOONGARCH_IPI); ++ qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.max_cpus); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); ++ ++ /* IPI iocsr memory region */ ++ memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0)); ++ memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); ++ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { ++ cpu_state = qemu_get_cpu(cpu); ++ cpudev = DEVICE(cpu_state); ++ ++ /* connect ipi irq to cpu irq */ ++ qdev_connect_gpio_out(ipi, cpu, qdev_get_gpio_in(cpudev, IRQ_IPI)); ++ } ++ } + + /* Add cpu interrupt-controller */ + fdt_add_cpuic_node(lvms, &cpuintc_phandle); +@@ -849,9 +861,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + lacpu = LOONGARCH_CPU(cpu_state); + env = &(lacpu->env); + env->address_space_iocsr = &lvms->as_iocsr; +- +- /* connect ipi irq to cpu irq */ +- qdev_connect_gpio_out(ipi, cpu, qdev_get_gpio_in(cpudev, IRQ_IPI)); + env->ipistate = ipi; + } + +diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h +index 1c1e834849..601b4f18a7 100644 +--- a/include/hw/intc/loongarch_ipi.h ++++ b/include/hw/intc/loongarch_ipi.h +@@ -32,6 +32,7 @@ + + #define TYPE_LOONGARCH_IPI "loongarch_ipi" + OBJECT_DECLARE_SIMPLE_TYPE(LoongArchIPI, LOONGARCH_IPI) ++#define TYPE_KVM_LOONGARCH_IPI "loongarch-ipi-kvm" + + typedef struct IPICore { + uint32_t status; +@@ -51,4 +52,26 @@ struct LoongArchIPI { + IPICore *cpu; + }; + ++struct KVMLoongArchIPI { ++ SysBusDevice parent_obj; ++ uint32_t num_cpu; ++ IPICore *cpu; ++}; ++typedef struct KVMLoongArchIPI KVMLoongArchIPI; ++DECLARE_INSTANCE_CHECKER(KVMLoongArchIPI, KVM_LOONGARCH_IPI, ++ TYPE_KVM_LOONGARCH_IPI) ++ ++struct KVMLoongArchIPIClass { ++ SysBusDeviceClass parent_class; ++ DeviceRealize parent_realize; ++ ++ bool is_created; ++ int dev_fd; ++ ++}; ++typedef struct KVMLoongArchIPIClass KVMLoongArchIPIClass; ++DECLARE_CLASS_CHECKERS(KVMLoongArchIPIClass, KVM_LOONGARCH_IPI, ++ TYPE_KVM_LOONGARCH_IPI) ++ ++ + #endif +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index eb30402c2d..ea1f821a9f 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1470,6 +1470,8 @@ enum kvm_device_type { + #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME + KVM_DEV_TYPE_RISCV_AIA, + #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA ++ KVM_DEV_TYPE_LA_IPI, ++#define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI + KVM_DEV_TYPE_MAX, + }; + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 550f14269e..ab1ea3d4fd 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -1066,6 +1066,10 @@ int kvm_arch_get_default_type(MachineState *ms) + int kvm_arch_init(MachineState *ms, KVMState *s) + { + cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); ++ if(!kvm_vm_check_attr(kvm_state, KVM_LOONGARCH_VM_HAVE_IRQCHIP, KVM_LOONGARCH_VM_HAVE_IRQCHIP)) { ++ s->kernel_irqchip_allowed = false; ++ } ++ + return 0; + } + +-- +2.39.1 + diff --git a/hw-loongarch-Add-KVM-extioi-device-support.patch b/hw-loongarch-Add-KVM-extioi-device-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..58abefd4afa5307722157e4c72761b59727d714b --- /dev/null +++ b/hw-loongarch-Add-KVM-extioi-device-support.patch @@ -0,0 +1,384 @@ +From 833cdea8037d9124cd2e0328739de1b85aaec2a2 Mon Sep 17 00:00:00 2001 +From: gaosong +Date: Sun, 8 Sep 2024 09:50:50 +0800 +Subject: [PATCH 73/78] hw/loongarch: Add KVM extioi device support + +Added extioi interrupt controller for kvm emulation. +The main process is to send the command word for +creating an extioi device to the kernel. +When the VM is saved, the ioctl obtains the related +data of the extioi interrupt controller in the kernel +and saves it. When the VM is recovered, the saved data +is sent to the kernel. + +Signed-off-by: gaosong +Signed-off-by: Xianglai Li +--- + hw/intc/Kconfig | 3 + + hw/intc/loongarch_extioi_kvm.c | 150 +++++++++++++++++++++++++++++ + hw/intc/meson.build | 1 + + hw/loongarch/Kconfig | 1 + + hw/loongarch/virt.c | 50 +++++----- + include/hw/intc/loongarch_extioi.h | 36 ++++++- + include/hw/loongarch/virt.h | 15 +++ + linux-headers/linux/kvm.h | 2 + + 8 files changed, 232 insertions(+), 26 deletions(-) + create mode 100644 hw/intc/loongarch_extioi_kvm.c + +diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig +index cbba74c22e..f1e8bd2fc9 100644 +--- a/hw/intc/Kconfig ++++ b/hw/intc/Kconfig +@@ -107,3 +107,6 @@ config LOONGARCH_PCH_MSI + + config LOONGARCH_EXTIOI + bool ++ ++config LOONGARCH_EXTIOI_KVM ++ bool +diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c +new file mode 100644 +index 0000000000..f5bbc33255 +--- /dev/null ++++ b/hw/intc/loongarch_extioi_kvm.c +@@ -0,0 +1,150 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * LoongArch kvm extioi interrupt support ++ * ++ * Copyright (C) 2024 Loongson Technology Corporation Limited ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/qdev-properties.h" ++#include "qemu/typedefs.h" ++#include "hw/intc/loongarch_extioi.h" ++#include "hw/sysbus.h" ++#include "linux/kvm.h" ++#include "migration/vmstate.h" ++#include "qapi/error.h" ++#include "sysemu/kvm.h" ++ ++static void kvm_extioi_access_regs(int fd, uint64_t addr, ++ void *val, int is_write) ++{ ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS, ++ addr, val, is_write, &error_abort); ++} ++ ++static int kvm_loongarch_extioi_pre_save(void *opaque) ++{ ++ KVMLoongArchExtIOI *s = (KVMLoongArchExtIOI *)opaque; ++ KVMLoongArchExtIOIClass *class = KVM_LOONGARCH_EXTIOI_GET_CLASS(s); ++ int fd = class->dev_fd; ++ ++ kvm_extioi_access_regs(fd, EXTIOI_NODETYPE_START, ++ (void *)s->nodetype, false); ++ kvm_extioi_access_regs(fd, EXTIOI_IPMAP_START, (void *)s->ipmap, false); ++ kvm_extioi_access_regs(fd, EXTIOI_ENABLE_START, (void *)s->enable, false); ++ kvm_extioi_access_regs(fd, EXTIOI_BOUNCE_START, (void *)s->bounce, false); ++ kvm_extioi_access_regs(fd, EXTIOI_ISR_START, (void *)s->isr, false); ++ kvm_extioi_access_regs(fd, EXTIOI_COREMAP_START, ++ (void *)s->coremap, false); ++ kvm_extioi_access_regs(fd, EXTIOI_SW_COREMAP_FLAG, ++ (void *)s->sw_coremap, false); ++ kvm_extioi_access_regs(fd, EXTIOI_COREISR_START, ++ (void *)s->coreisr, false); ++ ++ return 0; ++} ++ ++static int kvm_loongarch_extioi_post_load(void *opaque, int version_id) ++{ ++ KVMLoongArchExtIOI *s = (KVMLoongArchExtIOI *)opaque; ++ KVMLoongArchExtIOIClass *class = KVM_LOONGARCH_EXTIOI_GET_CLASS(s); ++ int fd = class->dev_fd; ++ ++ kvm_extioi_access_regs(fd, EXTIOI_NODETYPE_START, ++ (void *)s->nodetype, true); ++ kvm_extioi_access_regs(fd, EXTIOI_IPMAP_START, (void *)s->ipmap, true); ++ kvm_extioi_access_regs(fd, EXTIOI_ENABLE_START, (void *)s->enable, true); ++ kvm_extioi_access_regs(fd, EXTIOI_BOUNCE_START, (void *)s->bounce, true); ++ kvm_extioi_access_regs(fd, EXTIOI_ISR_START, (void *)s->isr, true); ++ kvm_extioi_access_regs(fd, EXTIOI_COREMAP_START, (void *)s->coremap, true); ++ kvm_extioi_access_regs(fd, EXTIOI_SW_COREMAP_FLAG, ++ (void *)s->sw_coremap, true); ++ kvm_extioi_access_regs(fd, EXTIOI_COREISR_START, (void *)s->coreisr, true); ++ ++ return 0; ++} ++ ++static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) ++{ ++ KVMLoongArchExtIOIClass *extioi_class = KVM_LOONGARCH_EXTIOI_GET_CLASS(dev); ++ struct kvm_create_device cd = {0}; ++ Error *err = NULL; ++ int ret,i; ++ ++ extioi_class->parent_realize(dev, &err); ++ if (err) { ++ error_propagate(errp, err); ++ return; ++ } ++ ++ if (!extioi_class->is_created) { ++ cd.type = KVM_DEV_TYPE_LA_EXTIOI; ++ ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); ++ if (ret < 0) { ++ error_setg_errno(errp, errno, ++ "Creating the KVM extioi device failed"); ++ return; ++ } ++ extioi_class->is_created = true; ++ extioi_class->dev_fd = cd.fd; ++ fprintf(stdout, "Create LoongArch extioi irqchip in KVM done!\n"); ++ } ++ ++ kvm_async_interrupts_allowed = true; ++ kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); ++ if (kvm_has_gsi_routing()) { ++ for (i = 0; i < 64; ++i) { ++ kvm_irqchip_add_irq_route(kvm_state, i, 0, i); ++ } ++ kvm_gsi_routing_allowed = true; ++ } ++} ++ ++static const VMStateDescription vmstate_kvm_extioi_core = { ++ .name = "kvm-extioi-single", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .pre_save = kvm_loongarch_extioi_pre_save, ++ .post_load = kvm_loongarch_extioi_post_load, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT32_ARRAY(nodetype, KVMLoongArchExtIOI, ++ EXTIOI_IRQS_NODETYPE_COUNT / 2), ++ VMSTATE_UINT32_ARRAY(bounce, KVMLoongArchExtIOI, ++ EXTIOI_IRQS_GROUP_COUNT), ++ VMSTATE_UINT32_ARRAY(isr, KVMLoongArchExtIOI, EXTIOI_IRQS / 32), ++ VMSTATE_UINT32_2DARRAY(coreisr, KVMLoongArchExtIOI, EXTIOI_CPUS, ++ EXTIOI_IRQS_GROUP_COUNT), ++ VMSTATE_UINT32_ARRAY(enable, KVMLoongArchExtIOI, EXTIOI_IRQS / 32), ++ VMSTATE_UINT32_ARRAY(ipmap, KVMLoongArchExtIOI, ++ EXTIOI_IRQS_IPMAP_SIZE / 4), ++ VMSTATE_UINT32_ARRAY(coremap, KVMLoongArchExtIOI, EXTIOI_IRQS / 4), ++ VMSTATE_UINT8_ARRAY(sw_coremap, KVMLoongArchExtIOI, EXTIOI_IRQS), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static void kvm_loongarch_extioi_class_init(ObjectClass *oc, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(oc); ++ KVMLoongArchExtIOIClass *extioi_class = KVM_LOONGARCH_EXTIOI_CLASS(oc); ++ ++ extioi_class->parent_realize = dc->realize; ++ dc->realize = kvm_loongarch_extioi_realize; ++ extioi_class->is_created = false; ++ dc->vmsd = &vmstate_kvm_extioi_core; ++} ++ ++static const TypeInfo kvm_loongarch_extioi_info = { ++ .name = TYPE_KVM_LOONGARCH_EXTIOI, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(KVMLoongArchExtIOI), ++ .class_size = sizeof(KVMLoongArchExtIOIClass), ++ .class_init = kvm_loongarch_extioi_class_init, ++}; ++ ++static void kvm_loongarch_extioi_register_types(void) ++{ ++ type_register_static(&kvm_loongarch_extioi_info); ++} ++ ++type_init(kvm_loongarch_extioi_register_types) +diff --git a/hw/intc/meson.build b/hw/intc/meson.build +index 9deeeb51bb..a37d7da8aa 100644 +--- a/hw/intc/meson.build ++++ b/hw/intc/meson.build +@@ -74,3 +74,4 @@ specific_ss.add(when: 'CONFIG_LOONGARCH_IPI_KVM', if_true: files('loongarch_ipi_ + specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_PIC', if_true: files('loongarch_pch_pic.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_MSI', if_true: files('loongarch_pch_msi.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI', if_true: files('loongarch_extioi.c')) ++specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI_KVM', if_true: files('loongarch_extioi_kvm.c')) +diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig +index 1e761624c6..1a47d44a64 100644 +--- a/hw/loongarch/Kconfig ++++ b/hw/loongarch/Kconfig +@@ -15,6 +15,7 @@ config LOONGARCH_VIRT + select LOONGARCH_PCH_MSI + select LOONGARCH_EXTIOI + select LOONGARCH_IPI_KVM if KVM ++ select LOONGARCH_EXTIOI_KVM if KVM + select LS7A_RTC + select SMBIOS + select ACPI_CPU_HOTPLUG +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index f065eb75f8..71e2a3735c 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -867,31 +867,33 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + lvms->ipi = ipi; + + /* Create EXTIOI device */ +- extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); +- qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.max_cpus); +- if (virt_is_veiointc_enabled(lvms)) { +- qdev_prop_set_bit(extioi, "has-virtualization-extension", true); +- } +- sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); +- +- memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); +- if (virt_is_veiointc_enabled(lvms)) { +- memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1)); +- } +- lvms->extioi = extioi; +- +- /* +- * connect ext irq to the cpu irq +- * cpu_pin[9:2] <= intc_pin[7:0] +- */ +- for (cpu = 0; cpu < ms->smp.cpus; cpu++) { +- cpudev = DEVICE(qemu_get_cpu(cpu)); +- for (pin = 0; pin < LS3A_INTC_IP; pin++) { +- qdev_connect_gpio_out(extioi, (cpu * 8 + pin), +- qdev_get_gpio_in(cpudev, pin + 2)); ++ if (kvm_enabled() && kvm_irqchip_in_kernel()) { ++ extioi = qdev_new(TYPE_KVM_LOONGARCH_EXTIOI); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); ++ } else { ++ extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); ++ qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.max_cpus); ++ if (virt_is_veiointc_enabled(lvms)) { ++ qdev_prop_set_bit(extioi, "has-virtualization-extension", true); + } ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); ++ memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); ++ if (virt_is_veiointc_enabled(lvms)) { ++ memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1)); ++ } ++ /* ++ * connect ext irq to the cpu irq ++ * cpu_pin[9:2] <= intc_pin[7:0] ++ */ ++ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { ++ cpudev = DEVICE(qemu_get_cpu(cpu)); ++ for (pin = 0; pin < LS3A_INTC_IP; pin++) { ++ qdev_connect_gpio_out(extioi, (cpu * 8 + pin), ++ qdev_get_gpio_in(cpudev, pin + 2)); ++ } ++ } + } + + lvms->extioi = extioi; +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index 722ffee1bc..9966cd98d3 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -15,7 +15,7 @@ + #define EXTIOI_IRQS (256) + #define EXTIOI_IRQS_BITMAP_SIZE (256 / 8) + /* irq from EXTIOI is routed to no more than 4 cpus */ +-#define EXTIOI_CPUS (4) ++#define EXTIOI_CPUS (256) + /* map to ipnum per 32 irqs */ + #define EXTIOI_IRQS_IPMAP_SIZE (256 / 32) + #define EXTIOI_IRQS_COREMAP_SIZE 256 +@@ -59,13 +59,17 @@ + #define EXTIOI_VIRT_COREMAP_START (0x40) + #define EXTIOI_VIRT_COREMAP_END (0x240) + ++#define EXTIOI_SW_COREMAP_FLAG (1 << 0) ++ + typedef struct ExtIOICore { + uint32_t coreisr[EXTIOI_IRQS_GROUP_COUNT]; + DECLARE_BITMAP(sw_isr[LS3A_INTC_IP], EXTIOI_IRQS); + qemu_irq parent_irq[LS3A_INTC_IP]; + } ExtIOICore; + +-#define TYPE_LOONGARCH_EXTIOI "loongarch.extioi" ++#define TYPE_LOONGARCH_EXTIOI "loongarch-extioi" ++#define TYPE_KVM_LOONGARCH_EXTIOI "loongarch-kvm-extioi" ++ + OBJECT_DECLARE_SIMPLE_TYPE(LoongArchExtIOI, LOONGARCH_EXTIOI) + struct LoongArchExtIOI { + SysBusDevice parent_obj; +@@ -87,4 +91,32 @@ struct LoongArchExtIOI { + MemoryRegion extioi_system_mem; + MemoryRegion virt_extend; + }; ++ ++struct KVMLoongArchExtIOI { ++ SysBusDevice parent_obj; ++ /* hardware state */ ++ uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2]; ++ uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT]; ++ uint32_t isr[EXTIOI_IRQS / 32]; ++ uint32_t coreisr[EXTIOI_CPUS][EXTIOI_IRQS_GROUP_COUNT]; ++ uint32_t enable[EXTIOI_IRQS / 32]; ++ uint32_t ipmap[EXTIOI_IRQS_IPMAP_SIZE / 4]; ++ uint32_t coremap[EXTIOI_IRQS / 4]; ++ uint8_t sw_coremap[EXTIOI_IRQS]; ++}; ++typedef struct KVMLoongArchExtIOI KVMLoongArchExtIOI; ++DECLARE_INSTANCE_CHECKER(KVMLoongArchExtIOI, KVM_LOONGARCH_EXTIOI, ++ TYPE_KVM_LOONGARCH_EXTIOI) ++ ++struct KVMLoongArchExtIOIClass { ++ SysBusDeviceClass parent_class; ++ DeviceRealize parent_realize; ++ ++ bool is_created; ++ int dev_fd; ++}; ++typedef struct KVMLoongArchExtIOIClass KVMLoongArchExtIOIClass; ++DECLARE_CLASS_CHECKERS(KVMLoongArchExtIOIClass, KVM_LOONGARCH_EXTIOI, ++ TYPE_KVM_LOONGARCH_EXTIOI) ++ + #endif /* LOONGARCH_EXTIOI_H */ +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 98c990327b..168b40c31b 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -38,6 +38,21 @@ + + #define FDT_BASE 0x100000 + ++/* KVM_IRQ_LINE irq field index values */ ++#define KVM_LOONGARCH_IRQ_TYPE_SHIFT 24 ++#define KVM_LOONGARCH_IRQ_TYPE_MASK 0xff ++#define KVM_LOONGARCH_IRQ_VCPU_SHIFT 16 ++#define KVM_LOONGARCH_IRQ_VCPU_MASK 0xff ++#define KVM_LOONGARCH_IRQ_NUM_SHIFT 0 ++#define KVM_LOONGARCH_IRQ_NUM_MASK 0xffff ++ ++/* irq_type field */ ++#define KVM_LOONGARCH_IRQ_TYPE_CPU_IP 0 ++#define KVM_LOONGARCH_IRQ_TYPE_CPU_IO 1 ++#define KVM_LOONGARCH_IRQ_TYPE_HT 2 ++#define KVM_LOONGARCH_IRQ_TYPE_MSI 3 ++#define KVM_LOONGARCH_IRQ_TYPE_IOAPIC 4 ++ + struct LoongArchVirtMachineState { + /*< private >*/ + MachineState parent_obj; +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index ea1f821a9f..0c0b82d1ef 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1472,6 +1472,8 @@ enum kvm_device_type { + #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LA_IPI, + #define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI ++ KVM_DEV_TYPE_LA_EXTIOI, ++#define KVM_DEV_TYPE_LA_EXTIOI KVM_DEV_TYPE_LA_EXTIOI + KVM_DEV_TYPE_MAX, + }; + +-- +2.39.1 + diff --git a/hw-loongarch-Add-KVM-pch-msi-device-support.patch b/hw-loongarch-Add-KVM-pch-msi-device-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..e488337a436f6c4b205a4f5886fa435280f94ff5 --- /dev/null +++ b/hw-loongarch-Add-KVM-pch-msi-device-support.patch @@ -0,0 +1,136 @@ +From 24bd774f8146247c7ac6071492f6016140a97267 Mon Sep 17 00:00:00 2001 +From: gaosong +Date: Sun, 8 Sep 2024 22:18:50 +0800 +Subject: [PATCH 75/78] hw/loongarch: Add KVM pch msi device support + +Added pch_msi interrupt controller handling +during kernel emulation of irq chip. + +Signed-off-by: gaosong +Signed-off-by: Xianglai Li +--- + hw/intc/loongarch_pch_msi.c | 39 ++++++++++++++++++++++------- + hw/loongarch/virt.c | 22 +++++++++------- + include/hw/intc/loongarch_pch_msi.h | 2 +- + 3 files changed, 44 insertions(+), 19 deletions(-) + +diff --git a/hw/intc/loongarch_pch_msi.c b/hw/intc/loongarch_pch_msi.c +index ecf3ed0267..901c2c21be 100644 +--- a/hw/intc/loongarch_pch_msi.c ++++ b/hw/intc/loongarch_pch_msi.c +@@ -14,6 +14,8 @@ + #include "hw/misc/unimp.h" + #include "migration/vmstate.h" + #include "trace.h" ++#include "sysemu/kvm.h" ++#include "hw/loongarch/virt.h" + + static uint64_t loongarch_msi_mem_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -26,14 +28,24 @@ static void loongarch_msi_mem_write(void *opaque, hwaddr addr, + LoongArchPCHMSI *s = (LoongArchPCHMSI *)opaque; + int irq_num; + +- /* +- * vector number is irq number from upper extioi intc +- * need subtract irq base to get msi vector offset +- */ +- irq_num = (val & 0xff) - s->irq_base; +- trace_loongarch_msi_set_irq(irq_num); +- assert(irq_num < s->irq_num); +- qemu_set_irq(s->pch_msi_irq[irq_num], 1); ++ MSIMessage msg = { ++ .address = addr, ++ .data = val, ++ }; ++ ++ if (kvm_enabled() && kvm_irqchip_in_kernel()) { ++ kvm_irqchip_send_msi(kvm_state, msg); ++ } else { ++ /* ++ * vector number is irq number from upper extioi intc ++ * need subtract irq base to get msi vector offset ++ */ ++ irq_num = (val & 0xff) - s->irq_base; ++ trace_loongarch_msi_set_irq(irq_num); ++ assert(irq_num < s->irq_num); ++ ++ qemu_set_irq(s->pch_msi_irq[irq_num], 1); ++ } + } + + static const MemoryRegionOps loongarch_pch_msi_ops = { +@@ -46,7 +58,16 @@ static void pch_msi_irq_handler(void *opaque, int irq, int level) + { + LoongArchPCHMSI *s = LOONGARCH_PCH_MSI(opaque); + +- qemu_set_irq(s->pch_msi_irq[irq], level); ++ MSIMessage msg = { ++ .address = 0, ++ .data = irq, ++ }; ++ ++ if (kvm_enabled() && kvm_irqchip_in_kernel()) { ++ kvm_irqchip_send_msi(kvm_state, msg); ++ } else { ++ qemu_set_irq(s->pch_msi_irq[irq], level); ++ } + } + + static void loongarch_pch_msi_realize(DeviceState *dev, Error **errp) +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 270dcfd38f..5b0468f6cb 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -928,22 +928,26 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + for (i = 0; i < num; i++) { + qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i)); + } ++ } + +- pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); +- start = num; +- num = EXTIOI_IRQS - start; +- qdev_prop_set_uint32(pch_msi, "msi_irq_base", start); +- qdev_prop_set_uint32(pch_msi, "msi_irq_num", num); +- d = SYS_BUS_DEVICE(pch_msi); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW); ++ pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); ++ num = VIRT_PCH_PIC_IRQ_NUM; ++ start = num; ++ num = EXTIOI_IRQS - start; ++ qdev_prop_set_uint32(pch_msi, "msi_irq_base", start); ++ qdev_prop_set_uint32(pch_msi, "msi_irq_num", num); ++ d = SYS_BUS_DEVICE(pch_msi); ++ sysbus_realize_and_unref(d, &error_fatal); ++ ++ if (!(kvm_enabled() && kvm_irqchip_in_kernel())) { ++ /* Connect pch_msi irqs to extioi */ + for (i = 0; i < num; i++) { +- /* Connect pch_msi irqs to extioi */ + qdev_connect_gpio_out(DEVICE(d), i, + qdev_get_gpio_in(extioi, i + start)); + } + } + ++ sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW); + virt_devices_init(pch_pic, lvms, &pch_pic_phandle, &pch_msi_phandle); + } + +diff --git a/include/hw/intc/loongarch_pch_msi.h b/include/hw/intc/loongarch_pch_msi.h +index b8586fb3b6..fd4ea97a83 100644 +--- a/include/hw/intc/loongarch_pch_msi.h ++++ b/include/hw/intc/loongarch_pch_msi.h +@@ -7,7 +7,7 @@ + + #include "hw/sysbus.h" + +-#define TYPE_LOONGARCH_PCH_MSI "loongarch_pch_msi" ++#define TYPE_LOONGARCH_PCH_MSI "loongarch_pch_msi" + OBJECT_DECLARE_SIMPLE_TYPE(LoongArchPCHMSI, LOONGARCH_PCH_MSI) + + /* MSI irq start from 32 to 255 */ +-- +2.39.1 + diff --git a/hw-loongarch-Add-KVM-pch-pic-device-support.patch b/hw-loongarch-Add-KVM-pch-pic-device-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e0e1ba1b296bf7d6c09968fd62dad61367ba233 --- /dev/null +++ b/hw-loongarch-Add-KVM-pch-pic-device-support.patch @@ -0,0 +1,487 @@ +From 30f88e80a47d9bcde08c44c0d752c22c11f2224c Mon Sep 17 00:00:00 2001 +From: gaosong +Date: Sun, 8 Sep 2024 10:13:29 +0800 +Subject: [PATCH 74/78] hw/loongarch: Add KVM pch pic device support + +Added pch_pic interrupt controller for kvm emulation. +The main process is to send the command word for +creating an pch_pic device to the kernel, +Delivers the pch pic interrupt controller configuration +register base address to the kernel. +When the VM is saved, the ioctl obtains the pch_pic +interrupt controller data in the kernel and saves it. +When the VM is recovered, the saved data is sent to the kernel. + +Signed-off-by: gaosong +Signed-off-by: Xianglai Li +--- + hw/intc/Kconfig | 3 + + hw/intc/loongarch_pch_pic.c | 24 +++- + hw/intc/loongarch_pch_pic_kvm.c | 189 ++++++++++++++++++++++++++++ + hw/intc/meson.build | 1 + + hw/loongarch/Kconfig | 1 + + hw/loongarch/virt.c | 70 ++++++----- + include/hw/intc/loongarch_pch_pic.h | 51 +++++++- + linux-headers/linux/kvm.h | 2 + + 8 files changed, 303 insertions(+), 38 deletions(-) + create mode 100644 hw/intc/loongarch_pch_pic_kvm.c + +diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig +index f1e8bd2fc9..91c7aa668e 100644 +--- a/hw/intc/Kconfig ++++ b/hw/intc/Kconfig +@@ -100,6 +100,9 @@ config LOONGARCH_PCH_PIC + bool + select UNIMP + ++config LOONGARCH_PCH_PIC_KVM ++ bool ++ + config LOONGARCH_PCH_MSI + select MSI_NONBROKEN + bool +diff --git a/hw/intc/loongarch_pch_pic.c b/hw/intc/loongarch_pch_pic.c +index 6aa4cadfa4..beb4ac188d 100644 +--- a/hw/intc/loongarch_pch_pic.c ++++ b/hw/intc/loongarch_pch_pic.c +@@ -16,19 +16,28 @@ + #include "migration/vmstate.h" + #include "trace.h" + #include "qapi/error.h" ++#include "sysemu/kvm.h" + + static void pch_pic_update_irq(LoongArchPCHPIC *s, uint64_t mask, int level) + { + uint64_t val; + int irq; ++ int kvm_irq; + + if (level) { + val = mask & s->intirr & ~s->int_mask; + if (val) { + irq = ctz64(val); + s->intisr |= MAKE_64BIT_MASK(irq, 1); +- qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 1); +- } ++ if (kvm_enabled() && kvm_irqchip_in_kernel()) { ++ kvm_irq = ( ++ KVM_LOONGARCH_IRQ_TYPE_IOAPIC << KVM_LOONGARCH_IRQ_TYPE_SHIFT) ++ | (0 << KVM_LOONGARCH_IRQ_VCPU_SHIFT) | s->htmsi_vector[irq]; ++ kvm_set_irq(kvm_state, kvm_irq, !!level); ++ } else { ++ qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 1); ++ } ++ } + } else { + /* + * intirr means requested pending irq +@@ -38,8 +47,15 @@ static void pch_pic_update_irq(LoongArchPCHPIC *s, uint64_t mask, int level) + if (val) { + irq = ctz64(val); + s->intisr &= ~MAKE_64BIT_MASK(irq, 1); +- qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 0); +- } ++ if (kvm_enabled() && kvm_irqchip_in_kernel()) { ++ kvm_irq = ( ++ KVM_LOONGARCH_IRQ_TYPE_IOAPIC << KVM_LOONGARCH_IRQ_TYPE_SHIFT) ++ | (0 << KVM_LOONGARCH_IRQ_VCPU_SHIFT) | s->htmsi_vector[irq]; ++ kvm_set_irq(kvm_state, kvm_irq, !!level); ++ } else { ++ qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 0); ++ } ++ } + } + } + +diff --git a/hw/intc/loongarch_pch_pic_kvm.c b/hw/intc/loongarch_pch_pic_kvm.c +new file mode 100644 +index 0000000000..8f66d9a01f +--- /dev/null ++++ b/hw/intc/loongarch_pch_pic_kvm.c +@@ -0,0 +1,189 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * LoongArch kvm pch pic interrupt support ++ * ++ * Copyright (C) 2024 Loongson Technology Corporation Limited ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/qdev-properties.h" ++#include "qemu/typedefs.h" ++#include "hw/intc/loongarch_pch_pic.h" ++#include "hw/sysbus.h" ++#include "linux/kvm.h" ++#include "migration/vmstate.h" ++#include "qapi/error.h" ++#include "sysemu/kvm.h" ++#include "hw/loongarch/virt.h" ++#include "hw/pci-host/ls7a.h" ++#include "qemu/error-report.h" ++ ++static void kvm_pch_pic_access_regs(int fd, uint64_t addr, ++ void *val, int is_write) ++{ ++ kvm_device_access(fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS, ++ addr, val, is_write, &error_abort); ++} ++ ++static int kvm_loongarch_pch_pic_pre_save(void *opaque) ++{ ++ KVMLoongArchPCHPIC *s = (KVMLoongArchPCHPIC *)opaque; ++ KVMLoongArchPCHPICClass *class = KVM_LOONGARCH_PCH_PIC_GET_CLASS(s); ++ int fd = class->dev_fd; ++ ++ kvm_pch_pic_access_regs(fd, PCH_PIC_MASK_START, ++ (void *)&s->int_mask, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_HTMSI_EN_START, ++ (void *)&s->htmsi_en, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_EDGE_START, ++ (void *)&s->intedge, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_AUTO_CTRL0_START, ++ (void *)&s->auto_crtl0, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_AUTO_CTRL1_START, ++ (void *)&s->auto_crtl1, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_ROUTE_ENTRY_START, ++ (void *)s->route_entry, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_HTMSI_VEC_START, ++ (void *)s->htmsi_vector, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_INT_IRR_START, ++ (void *)&s->intirr, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_INT_ISR_START, ++ (void *)&s->intisr, false); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_POLARITY_START, ++ (void *)&s->int_polarity, false); ++ ++ return 0; ++} ++ ++static int kvm_loongarch_pch_pic_post_load(void *opaque, int version_id) ++{ ++ KVMLoongArchPCHPIC *s = (KVMLoongArchPCHPIC *)opaque; ++ KVMLoongArchPCHPICClass *class = KVM_LOONGARCH_PCH_PIC_GET_CLASS(s); ++ int fd = class->dev_fd; ++ ++ kvm_pch_pic_access_regs(fd, PCH_PIC_MASK_START, ++ (void *)&s->int_mask, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_HTMSI_EN_START, ++ (void *)&s->htmsi_en, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_EDGE_START, ++ (void *)&s->intedge, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_AUTO_CTRL0_START, ++ (void *)&s->auto_crtl0, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_AUTO_CTRL1_START, ++ (void *)&s->auto_crtl1, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_ROUTE_ENTRY_START, ++ (void *)s->route_entry, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_HTMSI_VEC_START, ++ (void *)s->htmsi_vector, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_INT_IRR_START, ++ (void *)&s->intirr, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_INT_ISR_START, ++ (void *)&s->intisr, true); ++ kvm_pch_pic_access_regs(fd, PCH_PIC_POLARITY_START, ++ (void *)&s->int_polarity, true); ++ ++ return 0; ++} ++ ++static void kvm_pch_pic_handler(void *opaque, int irq, int level) ++{ ++ int kvm_irq; ++ ++ if (kvm_enabled()) { ++ kvm_irq = \ ++ (KVM_LOONGARCH_IRQ_TYPE_IOAPIC << KVM_LOONGARCH_IRQ_TYPE_SHIFT) ++ | (0 << KVM_LOONGARCH_IRQ_VCPU_SHIFT) | irq; ++ kvm_set_irq(kvm_state, kvm_irq, !!level); ++ } ++} ++ ++static void kvm_loongarch_pch_pic_realize(DeviceState *dev, Error **errp) ++{ ++ KVMLoongArchPCHPICClass *pch_pic_class = ++ KVM_LOONGARCH_PCH_PIC_GET_CLASS(dev); ++ struct kvm_create_device cd = {0}; ++ uint64_t pch_pic_base = VIRT_PCH_REG_BASE; ++ Error *err = NULL; ++ int ret; ++ ++ pch_pic_class->parent_realize(dev, &err); ++ if (err) { ++ error_propagate(errp, err); ++ return; ++ } ++ ++ if (!pch_pic_class->is_created) { ++ cd.type = KVM_DEV_TYPE_LA_PCH_PIC; ++ ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); ++ if (ret < 0) { ++ error_setg_errno(errp, errno, ++ "Creating the KVM pch pic device failed"); ++ return; ++ } ++ pch_pic_class->is_created = true; ++ pch_pic_class->dev_fd = cd.fd; ++ fprintf(stdout, "Create LoongArch pch pic irqchip in KVM done!\n"); ++ ++ ret = kvm_device_access(cd.fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL, ++ KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT, ++ &pch_pic_base, true, NULL); ++ if (ret < 0) { ++ error_report( ++ "KVM EXTIOI: failed to set the base address of EXTIOI"); ++ exit(1); ++ } ++ ++ qdev_init_gpio_in(dev, kvm_pch_pic_handler, VIRT_PCH_PIC_IRQ_NUM); ++ } ++} ++ ++static const VMStateDescription vmstate_kvm_loongarch_pch_pic = { ++ .name = TYPE_LOONGARCH_PCH_PIC, ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .pre_save = kvm_loongarch_pch_pic_pre_save, ++ .post_load = kvm_loongarch_pch_pic_post_load, ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT64(int_mask, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(htmsi_en, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(intedge, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(intclr, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(auto_crtl0, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(auto_crtl1, KVMLoongArchPCHPIC), ++ VMSTATE_UINT8_ARRAY(route_entry, KVMLoongArchPCHPIC, 64), ++ VMSTATE_UINT8_ARRAY(htmsi_vector, KVMLoongArchPCHPIC, 64), ++ VMSTATE_UINT64(last_intirr, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(intirr, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(intisr, KVMLoongArchPCHPIC), ++ VMSTATE_UINT64(int_polarity, KVMLoongArchPCHPIC), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++ ++static void kvm_loongarch_pch_pic_class_init(ObjectClass *oc, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(oc); ++ KVMLoongArchPCHPICClass *pch_pic_class = KVM_LOONGARCH_PCH_PIC_CLASS(oc); ++ ++ pch_pic_class->parent_realize = dc->realize; ++ dc->realize = kvm_loongarch_pch_pic_realize; ++ pch_pic_class->is_created = false; ++ dc->vmsd = &vmstate_kvm_loongarch_pch_pic; ++ ++} ++ ++static const TypeInfo kvm_loongarch_pch_pic_info = { ++ .name = TYPE_KVM_LOONGARCH_PCH_PIC, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(KVMLoongArchPCHPIC), ++ .class_size = sizeof(KVMLoongArchPCHPICClass), ++ .class_init = kvm_loongarch_pch_pic_class_init, ++}; ++ ++static void kvm_loongarch_pch_pic_register_types(void) ++{ ++ type_register_static(&kvm_loongarch_pch_pic_info); ++} ++ ++type_init(kvm_loongarch_pch_pic_register_types) +diff --git a/hw/intc/meson.build b/hw/intc/meson.build +index a37d7da8aa..49b4501315 100644 +--- a/hw/intc/meson.build ++++ b/hw/intc/meson.build +@@ -75,3 +75,4 @@ specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_PIC', if_true: files('loongarch_pch_ + specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_MSI', if_true: files('loongarch_pch_msi.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI', if_true: files('loongarch_extioi.c')) + specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI_KVM', if_true: files('loongarch_extioi_kvm.c')) ++specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_PIC_KVM', if_true: files('loongarch_pch_pic_kvm.c')) +diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig +index 1a47d44a64..16c854c0d5 100644 +--- a/hw/loongarch/Kconfig ++++ b/hw/loongarch/Kconfig +@@ -15,6 +15,7 @@ config LOONGARCH_VIRT + select LOONGARCH_PCH_MSI + select LOONGARCH_EXTIOI + select LOONGARCH_IPI_KVM if KVM ++ select LOONGARCH_PCH_PIC_KVM if KVM + select LOONGARCH_EXTIOI_KVM if KVM + select LS7A_RTC + select SMBIOS +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 71e2a3735c..270dcfd38f 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -901,45 +901,49 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + /* Add Extend I/O Interrupt Controller node */ + fdt_add_eiointc_node(lvms, &cpuintc_phandle, &eiointc_phandle); + +- pch_pic = qdev_new(TYPE_LOONGARCH_PCH_PIC); +- num = VIRT_PCH_PIC_IRQ_NUM; +- qdev_prop_set_uint32(pch_pic, "pch_pic_irq_num", num); +- d = SYS_BUS_DEVICE(pch_pic); +- sysbus_realize_and_unref(d, &error_fatal); +- memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE, +- sysbus_mmio_get_region(d, 0)); +- memory_region_add_subregion(get_system_memory(), +- VIRT_IOAPIC_REG_BASE + PCH_PIC_ROUTE_ENTRY_OFFSET, +- sysbus_mmio_get_region(d, 1)); +- memory_region_add_subregion(get_system_memory(), +- VIRT_IOAPIC_REG_BASE + PCH_PIC_INT_STATUS_LO, +- sysbus_mmio_get_region(d, 2)); +- +- /* Connect pch_pic irqs to extioi */ +- for (i = 0; i < num; i++) { +- qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i)); +- } +- + /* Add PCH PIC node */ + fdt_add_pch_pic_node(lvms, &eiointc_phandle, &pch_pic_phandle); + +- pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); +- start = num; +- num = EXTIOI_IRQS - start; +- qdev_prop_set_uint32(pch_msi, "msi_irq_base", start); +- qdev_prop_set_uint32(pch_msi, "msi_irq_num", num); +- d = SYS_BUS_DEVICE(pch_msi); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW); +- for (i = 0; i < num; i++) { +- /* Connect pch_msi irqs to extioi */ +- qdev_connect_gpio_out(DEVICE(d), i, +- qdev_get_gpio_in(extioi, i + start)); +- } +- + /* Add PCH MSI node */ + fdt_add_pch_msi_node(lvms, &eiointc_phandle, &pch_msi_phandle); + ++ if (kvm_enabled() && kvm_irqchip_in_kernel()) { ++ pch_pic = qdev_new(TYPE_KVM_LOONGARCH_PCH_PIC); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(pch_pic), &error_fatal); ++ } else { ++ pch_pic = qdev_new(TYPE_LOONGARCH_PCH_PIC); ++ num = VIRT_PCH_PIC_IRQ_NUM; ++ qdev_prop_set_uint32(pch_pic, "pch_pic_irq_num", num); ++ d = SYS_BUS_DEVICE(pch_pic); ++ sysbus_realize_and_unref(d, &error_fatal); ++ memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE, ++ sysbus_mmio_get_region(d, 0)); ++ memory_region_add_subregion(get_system_memory(), ++ VIRT_IOAPIC_REG_BASE + PCH_PIC_ROUTE_ENTRY_OFFSET, ++ sysbus_mmio_get_region(d, 1)); ++ memory_region_add_subregion(get_system_memory(), ++ VIRT_IOAPIC_REG_BASE + PCH_PIC_INT_STATUS_LO, ++ sysbus_mmio_get_region(d, 2)); ++ /* Connect pch_pic irqs to extioi */ ++ for (i = 0; i < num; i++) { ++ qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i)); ++ } ++ ++ pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); ++ start = num; ++ num = EXTIOI_IRQS - start; ++ qdev_prop_set_uint32(pch_msi, "msi_irq_base", start); ++ qdev_prop_set_uint32(pch_msi, "msi_irq_num", num); ++ d = SYS_BUS_DEVICE(pch_msi); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW); ++ for (i = 0; i < num; i++) { ++ /* Connect pch_msi irqs to extioi */ ++ qdev_connect_gpio_out(DEVICE(d), i, ++ qdev_get_gpio_in(extioi, i + start)); ++ } ++ } ++ + virt_devices_init(pch_pic, lvms, &pch_pic_phandle, &pch_msi_phandle); + } + +diff --git a/include/hw/intc/loongarch_pch_pic.h b/include/hw/intc/loongarch_pch_pic.h +index d5437e88f2..77f4cd74a1 100644 +--- a/include/hw/intc/loongarch_pch_pic.h ++++ b/include/hw/intc/loongarch_pch_pic.h +@@ -7,7 +7,8 @@ + + #include "hw/sysbus.h" + +-#define TYPE_LOONGARCH_PCH_PIC "loongarch_pch_pic" ++#define TYPE_LOONGARCH_PCH_PIC "loongarch_pch_pic" ++#define TYPE_KVM_LOONGARCH_PCH_PIC "loongarch_kvm_pch_pic" + #define PCH_PIC_NAME(name) TYPE_LOONGARCH_PCH_PIC#name + OBJECT_DECLARE_SIMPLE_TYPE(LoongArchPCHPIC, LOONGARCH_PCH_PIC) + +@@ -37,6 +38,19 @@ OBJECT_DECLARE_SIMPLE_TYPE(LoongArchPCHPIC, LOONGARCH_PCH_PIC) + #define PCH_PIC_INT_POL_LO 0x3e0 + #define PCH_PIC_INT_POL_HI 0x3e4 + ++#define PCH_PIC_INT_ID_START PCH_PIC_INT_ID_LO ++#define PCH_PIC_MASK_START PCH_PIC_INT_MASK_LO ++#define PCH_PIC_HTMSI_EN_START PCH_PIC_HTMSI_EN_LO ++#define PCH_PIC_EDGE_START PCH_PIC_INT_EDGE_LO ++#define PCH_PIC_CLEAR_START PCH_PIC_INT_CLEAR_LO ++#define PCH_PIC_AUTO_CTRL0_START PCH_PIC_AUTO_CTRL0_LO ++#define PCH_PIC_AUTO_CTRL1_START PCH_PIC_AUTO_CTRL1_LO ++#define PCH_PIC_ROUTE_ENTRY_START PCH_PIC_ROUTE_ENTRY_OFFSET ++#define PCH_PIC_HTMSI_VEC_START PCH_PIC_HTMSI_VEC_OFFSET ++#define PCH_PIC_INT_IRR_START 0x380 ++#define PCH_PIC_INT_ISR_START PCH_PIC_INT_STATUS_LO ++#define PCH_PIC_POLARITY_START PCH_PIC_INT_POL_LO ++ + #define STATUS_LO_START 0 + #define STATUS_HI_START 0x4 + #define POL_LO_START 0x40 +@@ -67,3 +81,38 @@ struct LoongArchPCHPIC { + MemoryRegion iomem8; + unsigned int irq_num; + }; ++ ++struct KVMLoongArchPCHPIC { ++ SysBusDevice parent_obj; ++ uint64_t int_mask; /*0x020 interrupt mask register*/ ++ uint64_t htmsi_en; /*0x040 1=msi*/ ++ uint64_t intedge; /*0x060 edge=1 level =0*/ ++ uint64_t intclr; /*0x080 for clean edge int,set 1 clean,set 0 is noused*/ ++ uint64_t auto_crtl0; /*0x0c0*/ ++ uint64_t auto_crtl1; /*0x0e0*/ ++ uint64_t last_intirr; /* edge detection */ ++ uint64_t intirr; /* 0x380 interrupt request register */ ++ uint64_t intisr; /* 0x3a0 interrupt service register */ ++ /* ++ * 0x3e0 interrupt level polarity selection ++ * register 0 for high level trigger ++ */ ++ uint64_t int_polarity; ++ ++ uint8_t route_entry[64]; /*0x100 - 0x138*/ ++ uint8_t htmsi_vector[64]; /*0x200 - 0x238*/ ++}; ++typedef struct KVMLoongArchPCHPIC KVMLoongArchPCHPIC; ++DECLARE_INSTANCE_CHECKER(KVMLoongArchPCHPIC, KVM_LOONGARCH_PCH_PIC, ++ TYPE_KVM_LOONGARCH_PCH_PIC) ++ ++struct KVMLoongArchPCHPICClass { ++ SysBusDeviceClass parent_class; ++ DeviceRealize parent_realize; ++ ++ bool is_created; ++ int dev_fd; ++}; ++typedef struct KVMLoongArchPCHPICClass KVMLoongArchPCHPICClass; ++DECLARE_CLASS_CHECKERS(KVMLoongArchPCHPICClass, KVM_LOONGARCH_PCH_PIC, ++ TYPE_KVM_LOONGARCH_PCH_PIC) +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 0c0b82d1ef..887f8268e7 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1470,6 +1470,8 @@ enum kvm_device_type { + #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME + KVM_DEV_TYPE_RISCV_AIA, + #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA ++ KVM_DEV_TYPE_LA_PCH_PIC = 0x100, ++#define KVM_DEV_TYPE_LA_PCH_PIC KVM_DEV_TYPE_LA_PCH_PIC + KVM_DEV_TYPE_LA_IPI, + #define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI + KVM_DEV_TYPE_LA_EXTIOI, +-- +2.39.1 + diff --git a/hw-loongarch-Add-VM-mode-in-IOCSR-feature-register-i.patch b/hw-loongarch-Add-VM-mode-in-IOCSR-feature-register-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..83747a78ed933e050ab7441b40082cb4bb608087 --- /dev/null +++ b/hw-loongarch-Add-VM-mode-in-IOCSR-feature-register-i.patch @@ -0,0 +1,49 @@ +From 0437c11a20b3c66882770e468518d33ff71a932a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Tue, 14 May 2024 10:51:09 +0800 +Subject: [PATCH 22/78] hw/loongarch: Add VM mode in IOCSR feature register in + kvm mode + +If VM runs in kvm mode, VM mode is added in IOCSR feature register. +So guest can detect kvm hypervisor type and enable possible pv functions. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240514025109.3238398-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index e82e3b6792..c3514f9293 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -10,6 +10,7 @@ + #include "qapi/error.h" + #include "hw/boards.h" + #include "hw/char/serial.h" ++#include "sysemu/kvm.h" + #include "sysemu/sysemu.h" + #include "sysemu/qtest.h" + #include "sysemu/runstate.h" +@@ -914,12 +915,11 @@ static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, + ret = 0x11ULL; + break; + case FEATURE_REG: +- ret = 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | +- 1ULL << IOCSRF_CSRIPI; ++ ret = BIT(IOCSRF_MSI) | BIT(IOCSRF_EXTIOI) | BIT(IOCSRF_CSRIPI); + if (kvm_enabled()) { +- ret |= 1ULL << IOCSRF_VM; ++ ret |= BIT(IOCSRF_VM); + } +- break; ++ return ret; + case VENDOR_REG: + ret = 0x6e6f73676e6f6f4cULL; /* "Loongson" */ + break; +-- +2.39.1 + diff --git a/hw-loongarch-Add-acpi-SPCR-table-support.patch b/hw-loongarch-Add-acpi-SPCR-table-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..0bb637abeedce204b04806a1ac946f4cedb3351f --- /dev/null +++ b/hw-loongarch-Add-acpi-SPCR-table-support.patch @@ -0,0 +1,82 @@ +From fe22e0efe4c1c99fc876a42446cb2c87f9457afb Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Sat, 7 Sep 2024 15:30:37 +0800 +Subject: [PATCH 37/78] hw/loongarch: Add acpi SPCR table support + +Serial port console redirection table can be used for default serial +port selection, like chosen stdout-path selection with FDT method. + +With acpi SPCR table added, early debug console can be parsed from +SPCR table with simple kernel parameter earlycon rather than +earlycon=uart,mmio,0x1fe001e0 + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240907073037.243353-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/acpi-build.c | 40 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index 33a92223d8..bcdec2e1cb 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -242,6 +242,44 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + acpi_table_end(linker, &table); + } + ++/* ++ * Serial Port Console Redirection Table (SPCR) ++ * https://learn.microsoft.com/en-us/windows-hardware/drivers/serports/serial-port-console-redirection-table ++ */ ++static void ++spcr_setup(GArray *table_data, BIOSLinker *linker, MachineState *machine) ++{ ++ LoongArchVirtMachineState *lvms; ++ AcpiSpcrData serial = { ++ .interface_type = 0, /* 16550 compatible */ ++ .base_addr.id = AML_AS_SYSTEM_MEMORY, ++ .base_addr.width = 32, ++ .base_addr.offset = 0, ++ .base_addr.size = 1, ++ .base_addr.addr = VIRT_UART_BASE, ++ .interrupt_type = 0, /* Interrupt not supported */ ++ .pc_interrupt = 0, ++ .interrupt = VIRT_UART_IRQ, ++ .baud_rate = 7, /* 115200 */ ++ .parity = 0, ++ .stop_bits = 1, ++ .flow_control = 0, ++ .terminal_type = 3, /* ANSI */ ++ .language = 0, /* Language */ ++ .pci_device_id = 0xffff, /* not a PCI device*/ ++ .pci_vendor_id = 0xffff, /* not a PCI device*/ ++ .pci_bus = 0, ++ .pci_device = 0, ++ .pci_function = 0, ++ .pci_flags = 0, ++ .pci_segment = 0, ++ }; ++ ++ lvms = LOONGARCH_VIRT_MACHINE(machine); ++ build_spcr(table_data, linker, &serial, 2, lvms->oem_id, ++ lvms->oem_table_id); ++} ++ + typedef + struct AcpiBuildState { + /* Copy of table in RAM (for patching). */ +@@ -484,6 +522,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + + acpi_add_table(table_offsets, tables_blob); + build_srat(tables_blob, tables->linker, machine); ++ acpi_add_table(table_offsets, tables_blob); ++ spcr_setup(tables_blob, tables->linker, machine); + + if (machine->numa_state->num_nodes) { + if (machine->numa_state->have_numa_distance) { +-- +2.39.1 + diff --git a/hw-loongarch-Add-cells-missing-from-rtc-node.patch b/hw-loongarch-Add-cells-missing-from-rtc-node.patch new file mode 100644 index 0000000000000000000000000000000000000000..371b2d3a66c4cc81ebb60a1b8290a74a3db3f9b1 --- /dev/null +++ b/hw-loongarch-Add-cells-missing-from-rtc-node.patch @@ -0,0 +1,56 @@ +From 7266141c658cd00426922534a7de4dd5d89486b2 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:51 +0800 +Subject: [PATCH 16/78] hw/loongarch: Add cells missing from rtc node + +rtc node need interrupts and interrupt-parent cells. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-18-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index a6aea52ebb..0972ebd150 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -258,7 +258,8 @@ static void fdt_add_flash_node(LoongArchMachineState *lams) + g_free(nodename); + } + +-static void fdt_add_rtc_node(LoongArchMachineState *lams) ++static void fdt_add_rtc_node(LoongArchMachineState *lams, ++ uint32_t *pch_pic_phandle) + { + char *nodename; + hwaddr base = VIRT_RTC_REG_BASE; +@@ -267,8 +268,13 @@ static void fdt_add_rtc_node(LoongArchMachineState *lams) + + nodename = g_strdup_printf("/rtc@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +- qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "loongson,ls7a-rtc"); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", ++ "loongson,ls7a-rtc"); + qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 2, size); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", ++ VIRT_RTC_IRQ - VIRT_GSI_BASE , 0x4); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", ++ *pch_pic_phandle); + g_free(nodename); + } + +@@ -677,7 +683,7 @@ static void loongarch_devices_init(DeviceState *pch_pic, + sysbus_create_simple("ls7a_rtc", VIRT_RTC_REG_BASE, + qdev_get_gpio_in(pch_pic, + VIRT_RTC_IRQ - VIRT_GSI_BASE)); +- fdt_add_rtc_node(lams); ++ fdt_add_rtc_node(lams, pch_pic_phandle); + + /* acpi ged */ + lams->acpi_ged = create_acpi_ged(pch_pic, lams); +-- +2.39.1 + diff --git a/hw-loongarch-Add-cells-missing-from-uart-node.patch b/hw-loongarch-Add-cells-missing-from-uart-node.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c20bb3c79c2cc5024225de154e7a5276ce36122 --- /dev/null +++ b/hw-loongarch-Add-cells-missing-from-uart-node.patch @@ -0,0 +1,52 @@ +From 33994eff45e75e91acf0a4753fec77ad0027e4dd Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:50 +0800 +Subject: [PATCH 15/78] hw/loongarch: Add cells missing from uart node + +uart node need interrupts and interrupt-parent cells. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-17-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index ff9513034b..a6aea52ebb 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -272,7 +272,8 @@ static void fdt_add_rtc_node(LoongArchMachineState *lams) + g_free(nodename); + } + +-static void fdt_add_uart_node(LoongArchMachineState *lams) ++static void fdt_add_uart_node(LoongArchMachineState *lams, ++ uint32_t *pch_pic_phandle) + { + char *nodename; + hwaddr base = VIRT_UART_BASE; +@@ -285,6 +286,10 @@ static void fdt_add_uart_node(LoongArchMachineState *lams) + qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0x0, base, 0x0, size); + qemu_fdt_setprop_cell(ms->fdt, nodename, "clock-frequency", 100000000); + qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", ++ VIRT_UART_IRQ - VIRT_GSI_BASE, 0x4); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", ++ *pch_pic_phandle); + g_free(nodename); + } + +@@ -657,7 +662,7 @@ static void loongarch_devices_init(DeviceState *pch_pic, + qdev_get_gpio_in(pch_pic, + VIRT_UART_IRQ - VIRT_GSI_BASE), + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); +- fdt_add_uart_node(lams); ++ fdt_add_uart_node(lams, pch_pic_phandle); + + /* Network init */ + for (i = 0; i < nb_nics; i++) { +-- +2.39.1 + diff --git a/hw-loongarch-Add-init_cmdline.patch b/hw-loongarch-Add-init_cmdline.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba6e03e9512cb3e0532490180e032b28105d7cb1 --- /dev/null +++ b/hw-loongarch-Add-init_cmdline.patch @@ -0,0 +1,117 @@ +From 206b799cb8c218c744f4dcdaf161d11f14c21e0f Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:38 +0800 +Subject: [PATCH 04/78] hw/loongarch: Add init_cmdline + +Add init_cmline and set boot_info->a0, a1 + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-5-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 30 ++++++++++++++++++++++++++++++ + include/hw/loongarch/virt.h | 2 ++ + target/loongarch/cpu.h | 2 ++ + 3 files changed, 34 insertions(+) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index fb6effbaff..127085bcc4 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -63,6 +63,16 @@ static const unsigned int slave_boot_code[] = { + 0x4c000020, /* jirl $zero, $ra,0 */ + }; + ++static void init_cmdline(struct loongarch_boot_info *info, void *p, void *start) ++{ ++ hwaddr cmdline_addr = p - start; ++ ++ info->a0 = 1; ++ info->a1 = cmdline_addr; ++ ++ memcpy(p, info->kernel_cmdline, COMMAND_LINE_SIZE); ++} ++ + static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) + { + return addr & MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS); +@@ -121,6 +131,10 @@ static void reset_load_elf(void *opaque) + + cpu_reset(CPU(cpu)); + if (env->load_elf) { ++ if (cpu == LOONGARCH_CPU(first_cpu)) { ++ env->gpr[4] = env->boot_info->a0; ++ env->gpr[5] = env->boot_info->a1; ++ } + cpu_set_pc(CPU(cpu), env->elf_address); + } + } +@@ -158,8 +172,17 @@ static void loongarch_firmware_boot(LoongArchMachineState *lams, + fw_cfg_add_kernel_info(info, lams->fw_cfg); + } + ++static void init_boot_rom(struct loongarch_boot_info *info, void *p) ++{ ++ void *start = p; ++ ++ init_cmdline(info, p, start); ++ p += COMMAND_LINE_SIZE; ++} ++ + static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + { ++ void *p, *bp; + int64_t kernel_addr = 0; + LoongArchCPU *lacpu; + CPUState *cs; +@@ -173,6 +196,12 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + } + } + ++ /* Load cmdline and system tables at [0 - 1 MiB] */ ++ p = g_malloc0(1 * MiB); ++ bp = p; ++ init_boot_rom(info, p); ++ rom_add_blob_fixed_as("boot_info", bp, 1 * MiB, 0, &address_space_memory); ++ + /* Load slave boot code at pflash0 . */ + void *boot_code = g_malloc0(VIRT_FLASH0_SIZE); + memcpy(boot_code, &slave_boot_code, sizeof(slave_boot_code)); +@@ -190,6 +219,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + } + + g_free(boot_code); ++ g_free(bp); + } + + void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 02c8234b8d..ffff075f63 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -33,6 +33,8 @@ + #define VIRT_GED_MEM_ADDR (VIRT_GED_EVT_ADDR + ACPI_GED_EVT_SEL_LEN) + #define VIRT_GED_REG_ADDR (VIRT_GED_MEM_ADDR + MEMORY_HOTPLUG_IO_LEN) + ++#define COMMAND_LINE_SIZE 512 ++ + struct LoongArchMachineState { + /*< private >*/ + MachineState parent_obj; +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 0ed24051af..e3a15c593f 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -364,6 +364,8 @@ typedef struct CPUArchState { + uint32_t mp_state; + /* Store ipistate to access from this struct */ + DeviceState *ipistate; ++ ++ struct loongarch_boot_info *boot_info; + #endif + struct { + uint64_t guest_addr; +-- +2.39.1 + diff --git a/hw-loongarch-Add-load-initrd.patch b/hw-loongarch-Add-load-initrd.patch new file mode 100644 index 0000000000000000000000000000000000000000..8714697780f0778debf62d9d4b6fdf40cf915318 --- /dev/null +++ b/hw-loongarch-Add-load-initrd.patch @@ -0,0 +1,64 @@ +From 02c5f52da7f9458c0fc41e43f181f6e9b7101b57 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:36 +0800 +Subject: [PATCH 02/78] hw/loongarch: Add load initrd + +we load initrd ramdisk after kernel_high address + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-3-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 28 +++++++++++++++++++++++++++- + 1 file changed, 27 insertions(+), 1 deletion(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 9feed17db3..a5135fe542 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -22,7 +22,8 @@ static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) + + static int64_t load_kernel_info(struct loongarch_boot_info *info) + { +- uint64_t kernel_entry, kernel_low, kernel_high; ++ uint64_t kernel_entry, kernel_low, kernel_high, initrd_size; ++ ram_addr_t initrd_offset; + ssize_t kernel_size; + + kernel_size = load_elf(info->kernel_filename, NULL, +@@ -37,6 +38,31 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) + load_elf_strerror(kernel_size)); + exit(1); + } ++ ++ if (info->initrd_filename) { ++ initrd_size = get_image_size(info->initrd_filename); ++ if (initrd_size > 0) { ++ initrd_offset = ROUND_UP(kernel_high + 4 * kernel_size, 64 * KiB); ++ ++ if (initrd_offset + initrd_size > info->ram_size) { ++ error_report("memory too small for initial ram disk '%s'", ++ info->initrd_filename); ++ exit(1); ++ } ++ ++ initrd_size = load_image_targphys(info->initrd_filename, initrd_offset, ++ info->ram_size - initrd_offset); ++ } ++ ++ if (initrd_size == (target_ulong)-1) { ++ error_report("could not load initial ram disk '%s'", ++ info->initrd_filename); ++ exit(1); ++ } ++ } else { ++ initrd_size = 0; ++ } ++ + return kernel_entry; + } + +-- +2.39.1 + diff --git a/hw-loongarch-Add-slave-cpu-boot_code.patch b/hw-loongarch-Add-slave-cpu-boot_code.patch new file mode 100644 index 0000000000000000000000000000000000000000..39e1c24fd7521d35780091da6734fa2456a83223 --- /dev/null +++ b/hw-loongarch-Add-slave-cpu-boot_code.patch @@ -0,0 +1,103 @@ +From 2e3e7bcf92284f41c08fce29f6c6d45849721e71 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:37 +0800 +Subject: [PATCH 03/78] hw/loongarch: Add slave cpu boot_code + +Load the slave CPU boot code at pflash0 and set +the slave CPU elf_address to VIRT_FLASH0_BASE. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-4-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 62 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 61 insertions(+), 1 deletion(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index a5135fe542..fb6effbaff 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -15,6 +15,54 @@ + #include "sysemu/reset.h" + #include "sysemu/qtest.h" + ++static const unsigned int slave_boot_code[] = { ++ /* Configure reset ebase. */ ++ 0x0400302c, /* csrwr $t0, LOONGARCH_CSR_EENTRY */ ++ ++ /* Disable interrupt. */ ++ 0x0380100c, /* ori $t0, $zero,0x4 */ ++ 0x04000180, /* csrxchg $zero, $t0, LOONGARCH_CSR_CRMD */ ++ ++ /* Clear mailbox. */ ++ 0x1400002d, /* lu12i.w $t1, 1(0x1) */ ++ 0x038081ad, /* ori $t1, $t1, CORE_BUF_20 */ ++ 0x06481da0, /* iocsrwr.d $zero, $t1 */ ++ ++ /* Enable IPI interrupt. */ ++ 0x1400002c, /* lu12i.w $t0, 1(0x1) */ ++ 0x0400118c, /* csrxchg $t0, $t0, LOONGARCH_CSR_ECFG */ ++ 0x02fffc0c, /* addi.d $t0, $r0,-1(0xfff) */ ++ 0x1400002d, /* lu12i.w $t1, 1(0x1) */ ++ 0x038011ad, /* ori $t1, $t1, CORE_EN_OFF */ ++ 0x064819ac, /* iocsrwr.w $t0, $t1 */ ++ 0x1400002d, /* lu12i.w $t1, 1(0x1) */ ++ 0x038081ad, /* ori $t1, $t1, CORE_BUF_20 */ ++ ++ /* Wait for wakeup <.L11>: */ ++ 0x06488000, /* idle 0x0 */ ++ 0x03400000, /* andi $zero, $zero, 0x0 */ ++ 0x064809ac, /* iocsrrd.w $t0, $t1 */ ++ 0x43fff59f, /* beqz $t0, -12(0x7ffff4) # 48 <.L11> */ ++ ++ /* Read and clear IPI interrupt. */ ++ 0x1400002d, /* lu12i.w $t1, 1(0x1) */ ++ 0x064809ac, /* iocsrrd.w $t0, $t1 */ ++ 0x1400002d, /* lu12i.w $t1, 1(0x1) */ ++ 0x038031ad, /* ori $t1, $t1, CORE_CLEAR_OFF */ ++ 0x064819ac, /* iocsrwr.w $t0, $t1 */ ++ ++ /* Disable IPI interrupt. */ ++ 0x1400002c, /* lu12i.w $t0, 1(0x1) */ ++ 0x04001180, /* csrxchg $zero, $t0, LOONGARCH_CSR_ECFG */ ++ ++ /* Read mail buf and jump to specified entry */ ++ 0x1400002d, /* lu12i.w $t1, 1(0x1) */ ++ 0x038081ad, /* ori $t1, $t1, CORE_BUF_20 */ ++ 0x06480dac, /* iocsrrd.d $t0, $t1 */ ++ 0x00150181, /* move $ra, $t0 */ ++ 0x4c000020, /* jirl $zero, $ra,0 */ ++}; ++ + static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) + { + return addr & MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS); +@@ -125,11 +173,23 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + } + } + ++ /* Load slave boot code at pflash0 . */ ++ void *boot_code = g_malloc0(VIRT_FLASH0_SIZE); ++ memcpy(boot_code, &slave_boot_code, sizeof(slave_boot_code)); ++ rom_add_blob_fixed("boot_code", boot_code, VIRT_FLASH0_SIZE, VIRT_FLASH0_BASE); ++ + CPU_FOREACH(cs) { + lacpu = LOONGARCH_CPU(cs); + lacpu->env.load_elf = true; +- lacpu->env.elf_address = kernel_addr; ++ if (cs == first_cpu) { ++ lacpu->env.elf_address = kernel_addr; ++ } else { ++ lacpu->env.elf_address = VIRT_FLASH0_BASE; ++ } ++ lacpu->env.boot_info = info; + } ++ ++ g_free(boot_code); + } + + void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) +-- +2.39.1 + diff --git a/hw-loongarch-Change-the-tpm-support-by-default.patch b/hw-loongarch-Change-the-tpm-support-by-default.patch new file mode 100644 index 0000000000000000000000000000000000000000..11ae7455681fc2b37829ac7de30805751a90123d --- /dev/null +++ b/hw-loongarch-Change-the-tpm-support-by-default.patch @@ -0,0 +1,46 @@ +From 67c68371e457a85e460221a8c56d8dc93186f79f Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 24 Jun 2024 11:23:00 +0800 +Subject: [PATCH 31/78] hw/loongarch: Change the tpm support by default + +Add devices that support tpm by default, +Fixed incomplete tpm acpi table information. + +Signed-off-by: Xianglai Li +Reviewed-by: Song Gao +Message-Id: <20240624032300.999157-1-lixianglai@loongson.cn> +Signed-off-by: Song Gao +--- + hw/loongarch/Kconfig | 1 + + hw/loongarch/acpi-build.c | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig +index 7864050563..b2a3adb7dc 100644 +--- a/hw/loongarch/Kconfig ++++ b/hw/loongarch/Kconfig +@@ -7,6 +7,7 @@ config LOONGARCH_VIRT + imply VIRTIO_VGA + imply PCI_DEVICES + imply NVDIMM ++ imply TPM_TIS_SYSBUS + select SERIAL + select VIRTIO_PCI + select PLATFORM_BUS +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index 2555c6763c..6593476409 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -646,6 +646,9 @@ void loongarch_acpi_setup(LoongArchVirtMachineState *lvms) + build_state, tables.rsdp, + ACPI_BUILD_RSDP_FILE); + ++ fw_cfg_add_file(lvms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, ++ acpi_data_len(tables.tcpalog)); ++ + qemu_register_reset(acpi_build_reset, build_state); + acpi_build_reset(build_state); + vmstate_register(NULL, 0, &vmstate_acpi_build, build_state); +-- +2.39.1 + diff --git a/hw-loongarch-Fix-fdt-memory-node-wrong-reg.patch b/hw-loongarch-Fix-fdt-memory-node-wrong-reg.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bae4ee1ab9a3ffb6234988b4567f152bfa1f90c --- /dev/null +++ b/hw-loongarch-Fix-fdt-memory-node-wrong-reg.patch @@ -0,0 +1,44 @@ +From 073620787702404e2d71486c30967455c3c7904c Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 10:57:38 +0800 +Subject: [PATCH] hw/loongarch: Fix fdt memory node wrong 'reg' + +cherry picked from commitd b11f9814526b833b3a052be2559457b1affad7f5 + +The right fdt memory node like [1], not [2] + + [1] + memory@0 { + device_type = "memory"; + reg = <0x00 0x00 0x00 0x10000000>; + }; + [2] + memory@0 { + device_type = "memory"; + reg = <0x02 0x00 0x02 0x10000000>; + }; + +Reviewed-by: Bibo Mao +Signed-off-by: Song Gao +Message-Id: <20240426091551.2397867-10-gaosong@loongson.cn> +Signed-off-by: Gao Jiazhen +--- + hw/loongarch/virt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 01e59f3a95..fc7b70ed4e 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -360,7 +360,7 @@ static void fdt_add_memory_node(MachineState *ms, + char *nodename = g_strdup_printf("/memory@%" PRIx64, base); + + qemu_fdt_add_subnode(ms->fdt, nodename); +- qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0, base, 0, size); + qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory"); + + if (ms->numa_state && ms->numa_state->num_nodes) { +-- +2.41.0.windows.1 + diff --git a/hw-loongarch-Fix-length-for-lowram-in-ACPI-SRAT.patch b/hw-loongarch-Fix-length-for-lowram-in-ACPI-SRAT.patch new file mode 100644 index 0000000000000000000000000000000000000000..c4413facfbe8db368ad24f9a40fccabd8c601aff --- /dev/null +++ b/hw-loongarch-Fix-length-for-lowram-in-ACPI-SRAT.patch @@ -0,0 +1,39 @@ +From 087201cd62e71801855775c3aa6395c7e1c00cee Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Tue, 20 Aug 2024 19:42:33 +0100 +Subject: [PATCH 33/78] hw/loongarch: Fix length for lowram in ACPI SRAT + +The size of lowram should be "gap" instead of the whole node. + +This is failing kernel's sanity check: + +[ 0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0xffffffff] +[ 0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x80000000-0x16fffffff] +[ 0.000000] ACPI: SRAT: Node 1 PXM 1 [mem 0x170000000-0x26fffffff] +[ 0.000000] Warning: node 0 [mem 0x00000000-0xffffffff] overlaps with itself [mem 0x80000000-0x16fffffff] + +Fixes: fc100011f38d ("hw/loongarch: Refine acpi srat table for numa memory") +Signed-off-by: Jiaxun Yang +Reviewed-by: Bibo Mao +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/acpi-build.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index 6593476409..1a9d25fc51 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -218,7 +218,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + * highram: [VIRT_HIGHMEM_BASE, +(len - gap)) + */ + if (len >= gap) { +- build_srat_memory(table_data, base, len, i, MEM_AFFINITY_ENABLED); ++ build_srat_memory(table_data, base, gap, i, MEM_AFFINITY_ENABLED); + len -= gap; + base = VIRT_HIGHMEM_BASE; + gap = machine->ram_size - VIRT_LOWMEM_SIZE; +-- +2.39.1 + diff --git a/hw-loongarch-Init-efi_boot_memmap-table.patch b/hw-loongarch-Init-efi_boot_memmap-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..092667720a6ee1dc2626e738c8f96667985b0f42 --- /dev/null +++ b/hw-loongarch-Init-efi_boot_memmap-table.patch @@ -0,0 +1,169 @@ +From 0245881108803abedf50e954d34ebcfff294d1c3 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:40 +0800 +Subject: [PATCH 06/78] hw/loongarch: Init efi_boot_memmap table + +The efi_system_table adds a efi_boot_memmap configuration table. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-7-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 40 +++++++++++++++++++++++++++++++++++++ + hw/loongarch/virt.c | 11 ++-------- + include/hw/loongarch/boot.h | 27 +++++++++++++++++++++++++ + include/hw/loongarch/virt.h | 10 ++++++++++ + 4 files changed, 79 insertions(+), 9 deletions(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 59889dbc90..527fc9c0be 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -63,8 +63,41 @@ static const unsigned int slave_boot_code[] = { + 0x4c000020, /* jirl $zero, $ra,0 */ + }; + ++static inline void *guidcpy(void *dst, const void *src) ++{ ++ return memcpy(dst, src, sizeof(efi_guid_t)); ++} ++ ++static void init_efi_boot_memmap(struct efi_system_table *systab, ++ void *p, void *start) ++{ ++ unsigned i; ++ struct efi_boot_memmap *boot_memmap = p; ++ efi_guid_t tbl_guid = LINUX_EFI_BOOT_MEMMAP_GUID; ++ ++ /* efi_configuration_table 1 */ ++ guidcpy(&systab->tables[0].guid, &tbl_guid); ++ systab->tables[0].table = (struct efi_configuration_table *)(p - start); ++ systab->nr_tables = 1; ++ ++ boot_memmap->desc_size = sizeof(efi_memory_desc_t); ++ boot_memmap->desc_ver = 1; ++ boot_memmap->map_size = 0; ++ ++ efi_memory_desc_t *map = p + sizeof(struct efi_boot_memmap); ++ for (i = 0; i < memmap_entries; i++) { ++ map = (void *)boot_memmap + sizeof(*map); ++ map[i].type = memmap_table[i].type; ++ map[i].phys_addr = ROUND_UP(memmap_table[i].address, 64 * KiB); ++ map[i].num_pages = ROUND_DOWN(memmap_table[i].address + ++ memmap_table[i].length - map[i].phys_addr, 64 * KiB); ++ p += sizeof(efi_memory_desc_t); ++ } ++} ++ + static void init_systab(struct loongarch_boot_info *info, void *p, void *start) + { ++ void *bp_tables_start; + struct efi_system_table *systab = p; + + info->a2 = p - start; +@@ -80,6 +113,13 @@ static void init_systab(struct loongarch_boot_info *info, void *p, void *start) + p += ROUND_UP(sizeof(struct efi_system_table), 64 * KiB); + + systab->tables = p; ++ bp_tables_start = p; ++ ++ init_efi_boot_memmap(systab, p, start); ++ p += ROUND_UP(sizeof(struct efi_boot_memmap) + ++ sizeof(efi_memory_desc_t) * memmap_entries, 64 * KiB); ++ ++ systab->tables = (struct efi_configuration_table *)(bp_tables_start - start); + } + + static void init_cmdline(struct loongarch_boot_info *info, void *p, void *start) +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index a0aee28f41..028356acf5 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -405,15 +405,8 @@ static void virt_powerdown_req(Notifier *notifier, void *opaque) + acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS); + } + +-struct memmap_entry { +- uint64_t address; +- uint64_t length; +- uint32_t type; +- uint32_t reserved; +-}; +- +-static struct memmap_entry *memmap_table; +-static unsigned memmap_entries; ++struct memmap_entry *memmap_table; ++unsigned memmap_entries; + + static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) + { +diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h +index cf0e4d4f91..76622af2e2 100644 +--- a/include/hw/loongarch/boot.h ++++ b/include/hw/loongarch/boot.h +@@ -21,6 +21,15 @@ typedef struct { + uint8_t b[16]; + } efi_guid_t QEMU_ALIGNED(8); + ++#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ ++ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ ++ (b) & 0xff, ((b) >> 8) & 0xff, \ ++ (c) & 0xff, ((c) >> 8) & 0xff, d } } ++ ++#define LINUX_EFI_BOOT_MEMMAP_GUID \ ++ EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, \ ++ 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4) ++ + struct efi_config_table { + efi_guid_t guid; + uint64_t *ptr; +@@ -56,6 +65,24 @@ struct efi_system_table { + struct efi_configuration_table *tables; + }; + ++typedef struct { ++ uint32_t type; ++ uint32_t pad; ++ uint64_t phys_addr; ++ uint64_t virt_addr; ++ uint64_t num_pages; ++ uint64_t attribute; ++} efi_memory_desc_t; ++ ++struct efi_boot_memmap { ++ uint64_t map_size; ++ uint64_t desc_size; ++ uint32_t desc_ver; ++ uint64_t map_key; ++ uint64_t buff_size; ++ efi_memory_desc_t map[32]; ++}; ++ + struct loongarch_boot_info { + uint64_t ram_size; + const char *kernel_filename; +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index ffff075f63..2f9eaf4b0e 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -35,6 +35,16 @@ + + #define COMMAND_LINE_SIZE 512 + ++extern struct memmap_entry *memmap_table; ++extern unsigned memmap_entries; ++ ++struct memmap_entry { ++ uint64_t address; ++ uint64_t length; ++ uint32_t type; ++ uint32_t reserved; ++}; ++ + struct LoongArchMachineState { + /*< private >*/ + MachineState parent_obj; +-- +2.39.1 + diff --git a/hw-loongarch-Init-efi_fdt-table.patch b/hw-loongarch-Init-efi_fdt-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..545da6489517f72cf0194fda693664a91921461c --- /dev/null +++ b/hw-loongarch-Init-efi_fdt-table.patch @@ -0,0 +1,105 @@ +From 605b2b372f972fffa2d198d8dee4cf37f335559d Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:42 +0800 +Subject: [PATCH 08/78] hw/loongarch: Init efi_fdt table + +The efi_system_table adds a efi_fdt configuration table. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-9-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 11 +++++++++++ + hw/loongarch/virt.c | 6 ++---- + include/hw/loongarch/boot.h | 4 ++++ + include/hw/loongarch/virt.h | 2 ++ + 4 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index c8b3e742b4..7d1630b2e7 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -113,6 +113,16 @@ static void init_efi_initrd_table(struct efi_system_table *systab, + initrd_table->size = initrd_size; + } + ++static void init_efi_fdt_table(struct efi_system_table *systab) ++{ ++ efi_guid_t tbl_guid = DEVICE_TREE_GUID; ++ ++ /* efi_configuration_table 3 */ ++ guidcpy(&systab->tables[2].guid, &tbl_guid); ++ systab->tables[2].table = (void *)FDT_BASE; ++ systab->nr_tables = 3; ++} ++ + static void init_systab(struct loongarch_boot_info *info, void *p, void *start) + { + void *bp_tables_start; +@@ -138,6 +148,7 @@ static void init_systab(struct loongarch_boot_info *info, void *p, void *start) + sizeof(efi_memory_desc_t) * memmap_entries, 64 * KiB); + init_efi_initrd_table(systab, p, start); + p += ROUND_UP(sizeof(struct efi_initrd), 64 * KiB); ++ init_efi_fdt_table(systab); + + systab->tables = (struct efi_configuration_table *)(bp_tables_start - start); + } +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 028356acf5..99a3dc8696 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -820,7 +820,6 @@ static void loongarch_init(MachineState *machine) + int nb_numa_nodes = machine->numa_state->num_nodes; + NodeInfo *numa_info = machine->numa_state->nodes; + int i; +- hwaddr fdt_base; + const CPUArchIdList *possible_cpus; + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUState *cpu; +@@ -949,12 +948,11 @@ static void loongarch_init(MachineState *machine) + * Put the FDT into the memory map as a ROM image: this will ensure + * the FDT is copied again upon reset, even if addr points into RAM. + */ +- fdt_base = 1 * MiB; + qemu_fdt_dumpdtb(machine->fdt, lams->fdt_size); +- rom_add_blob_fixed_as("fdt", machine->fdt, lams->fdt_size, fdt_base, ++ rom_add_blob_fixed_as("fdt", machine->fdt, lams->fdt_size, FDT_BASE, + &address_space_memory); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, +- rom_ptr_for_as(&address_space_memory, fdt_base, lams->fdt_size)); ++ rom_ptr_for_as(&address_space_memory, FDT_BASE, lams->fdt_size)); + + lams->bootinfo.ram_size = ram_size; + loongarch_load_kernel(machine, &lams->bootinfo); +diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h +index 42d1ee3663..4ebcc89dcf 100644 +--- a/include/hw/loongarch/boot.h ++++ b/include/hw/loongarch/boot.h +@@ -34,6 +34,10 @@ typedef struct { + EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, \ + 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) + ++#define DEVICE_TREE_GUID \ ++ EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, \ ++ 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0) ++ + struct efi_config_table { + efi_guid_t guid; + uint64_t *ptr; +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 2f9eaf4b0e..673b57aa2b 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -35,6 +35,8 @@ + + #define COMMAND_LINE_SIZE 512 + ++#define FDT_BASE 0x100000 ++ + extern struct memmap_entry *memmap_table; + extern unsigned memmap_entries; + +-- +2.39.1 + diff --git a/hw-loongarch-Init-efi_initrd-table.patch b/hw-loongarch-Init-efi_initrd-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..13adbf41f41cd076eb3cdee2383565eabc16a0d2 --- /dev/null +++ b/hw-loongarch-Init-efi_initrd-table.patch @@ -0,0 +1,101 @@ +From ad674827da4ab972a30d51818f7768de47336984 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:41 +0800 +Subject: [PATCH 07/78] hw/loongarch: Init efi_initrd table + +The efi_system_table adds a efi_initrd configuration table. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-8-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 23 +++++++++++++++++++++-- + include/hw/loongarch/boot.h | 9 +++++++++ + 2 files changed, 30 insertions(+), 2 deletions(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 527fc9c0be..c8b3e742b4 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -15,6 +15,9 @@ + #include "sysemu/reset.h" + #include "sysemu/qtest.h" + ++ram_addr_t initrd_offset; ++uint64_t initrd_size; ++ + static const unsigned int slave_boot_code[] = { + /* Configure reset ebase. */ + 0x0400302c, /* csrwr $t0, LOONGARCH_CSR_EENTRY */ +@@ -95,6 +98,21 @@ static void init_efi_boot_memmap(struct efi_system_table *systab, + } + } + ++static void init_efi_initrd_table(struct efi_system_table *systab, ++ void *p, void *start) ++{ ++ efi_guid_t tbl_guid = LINUX_EFI_INITRD_MEDIA_GUID; ++ struct efi_initrd *initrd_table = p; ++ ++ /* efi_configuration_table 2 */ ++ guidcpy(&systab->tables[1].guid, &tbl_guid); ++ systab->tables[1].table = (struct efi_configuration_table *)(p - start); ++ systab->nr_tables = 2; ++ ++ initrd_table->base = initrd_offset; ++ initrd_table->size = initrd_size; ++} ++ + static void init_systab(struct loongarch_boot_info *info, void *p, void *start) + { + void *bp_tables_start; +@@ -118,6 +136,8 @@ static void init_systab(struct loongarch_boot_info *info, void *p, void *start) + init_efi_boot_memmap(systab, p, start); + p += ROUND_UP(sizeof(struct efi_boot_memmap) + + sizeof(efi_memory_desc_t) * memmap_entries, 64 * KiB); ++ init_efi_initrd_table(systab, p, start); ++ p += ROUND_UP(sizeof(struct efi_initrd), 64 * KiB); + + systab->tables = (struct efi_configuration_table *)(bp_tables_start - start); + } +@@ -139,8 +159,7 @@ static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) + + static int64_t load_kernel_info(struct loongarch_boot_info *info) + { +- uint64_t kernel_entry, kernel_low, kernel_high, initrd_size; +- ram_addr_t initrd_offset; ++ uint64_t kernel_entry, kernel_low, kernel_high; + ssize_t kernel_size; + + kernel_size = load_elf(info->kernel_filename, NULL, +diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h +index 76622af2e2..42d1ee3663 100644 +--- a/include/hw/loongarch/boot.h ++++ b/include/hw/loongarch/boot.h +@@ -30,6 +30,10 @@ typedef struct { + EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, \ + 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4) + ++#define LINUX_EFI_INITRD_MEDIA_GUID \ ++ EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, \ ++ 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) ++ + struct efi_config_table { + efi_guid_t guid; + uint64_t *ptr; +@@ -83,6 +87,11 @@ struct efi_boot_memmap { + efi_memory_desc_t map[32]; + }; + ++struct efi_initrd { ++ uint64_t base; ++ uint64_t size; ++}; ++ + struct loongarch_boot_info { + uint64_t ram_size; + const char *kernel_filename; +-- +2.39.1 + diff --git a/hw-loongarch-Init-efi_system_table.patch b/hw-loongarch-Init-efi_system_table.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa4158770bc4067e87881bcfb941763c7314d4b5 --- /dev/null +++ b/hw-loongarch-Init-efi_system_table.patch @@ -0,0 +1,125 @@ +From 65ae44689bfa6a1b697fea6ec0e72027fdddee95 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:39 +0800 +Subject: [PATCH 05/78] hw/loongarch: Init efi_system_table + +Add init_systab and set boot_info->a2 + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-6-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 22 +++++++++++++++++ + include/hw/loongarch/boot.h | 48 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 70 insertions(+) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 127085bcc4..59889dbc90 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -63,6 +63,25 @@ static const unsigned int slave_boot_code[] = { + 0x4c000020, /* jirl $zero, $ra,0 */ + }; + ++static void init_systab(struct loongarch_boot_info *info, void *p, void *start) ++{ ++ struct efi_system_table *systab = p; ++ ++ info->a2 = p - start; ++ ++ systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; ++ systab->hdr.revision = EFI_SPECIFICATION_VERSION; ++ systab->hdr.revision = sizeof(struct efi_system_table), ++ systab->fw_revision = FW_VERSION << 16 | FW_PATCHLEVEL << 8; ++ systab->runtime = 0; ++ systab->boottime = 0; ++ systab->nr_tables = 0; ++ ++ p += ROUND_UP(sizeof(struct efi_system_table), 64 * KiB); ++ ++ systab->tables = p; ++} ++ + static void init_cmdline(struct loongarch_boot_info *info, void *p, void *start) + { + hwaddr cmdline_addr = p - start; +@@ -134,6 +153,7 @@ static void reset_load_elf(void *opaque) + if (cpu == LOONGARCH_CPU(first_cpu)) { + env->gpr[4] = env->boot_info->a0; + env->gpr[5] = env->boot_info->a1; ++ env->gpr[6] = env->boot_info->a2; + } + cpu_set_pc(CPU(cpu), env->elf_address); + } +@@ -178,6 +198,8 @@ static void init_boot_rom(struct loongarch_boot_info *info, void *p) + + init_cmdline(info, p, start); + p += COMMAND_LINE_SIZE; ++ ++ init_systab(info, p, start); + } + + static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) +diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h +index 3275c1e295..cf0e4d4f91 100644 +--- a/include/hw/loongarch/boot.h ++++ b/include/hw/loongarch/boot.h +@@ -8,6 +8,54 @@ + #ifndef HW_LOONGARCH_BOOT_H + #define HW_LOONGARCH_BOOT_H + ++/* UEFI 2.10 */ ++#define EFI_SYSTEM_TABLE_SIGNATURE 0x5453595320494249 ++#define EFI_2_100_SYSTEM_TABLE_REVISION ((2<<16) | (100)) ++#define EFI_SPECIFICATION_VERSION EFI_SYSTEM_TABLE_REVISION ++#define EFI_SYSTEM_TABLE_REVISION EFI_2_100_SYSTEM_TABLE_REVISION ++ ++#define FW_VERSION 0x1 ++#define FW_PATCHLEVEL 0x0 ++ ++typedef struct { ++ uint8_t b[16]; ++} efi_guid_t QEMU_ALIGNED(8); ++ ++struct efi_config_table { ++ efi_guid_t guid; ++ uint64_t *ptr; ++ const char name[16]; ++}; ++ ++typedef struct { ++ uint64_t signature; ++ uint32_t revision; ++ uint32_t headersize; ++ uint32_t crc32; ++ uint32_t reserved; ++} efi_table_hdr_t; ++ ++struct efi_configuration_table { ++ efi_guid_t guid; ++ void *table; ++}; ++ ++struct efi_system_table { ++ efi_table_hdr_t hdr; ++ uint64_t fw_vendor; /* physical addr of CHAR16 vendor string */ ++ uint32_t fw_revision; ++ uint64_t con_in_handle; ++ uint64_t *con_in; ++ uint64_t con_out_handle; ++ uint64_t *con_out; ++ uint64_t stderr_handle; ++ uint64_t stderr_placeholder; ++ uint64_t *runtime; ++ uint64_t *boottime; ++ uint64_t nr_tables; ++ struct efi_configuration_table *tables; ++}; ++ + struct loongarch_boot_info { + uint64_t ram_size; + const char *kernel_filename; +-- +2.39.1 + diff --git a/hw-loongarch-Move-boot-functions-to-boot.c.patch b/hw-loongarch-Move-boot-functions-to-boot.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..08e73981bba586f419807c7d48113962da7c7c69 --- /dev/null +++ b/hw-loongarch-Move-boot-functions-to-boot.c.patch @@ -0,0 +1,390 @@ +From 2414b74bec88f4db58040a683191d3c3828f81ab Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:35 +0800 +Subject: [PATCH 01/78] hw/loongarch: Move boot functions to boot.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Move some boot functions to boot.c and struct +loongarch_boot_info into struct LoongArchMachineState. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20240426091551.2397867-2-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 128 ++++++++++++++++++++++++++++++++++++ + hw/loongarch/meson.build | 1 + + hw/loongarch/virt.c | 121 +++------------------------------- + include/hw/loongarch/boot.h | 21 ++++++ + include/hw/loongarch/virt.h | 2 + + 5 files changed, 160 insertions(+), 113 deletions(-) + create mode 100644 hw/loongarch/boot.c + create mode 100644 include/hw/loongarch/boot.h + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +new file mode 100644 +index 0000000000..9feed17db3 +--- /dev/null ++++ b/hw/loongarch/boot.c +@@ -0,0 +1,128 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * LoongArch boot helper functions. ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/units.h" ++#include "target/loongarch/cpu.h" ++#include "hw/loongarch/virt.h" ++#include "hw/loader.h" ++#include "elf.h" ++#include "qemu/error-report.h" ++#include "sysemu/reset.h" ++#include "sysemu/qtest.h" ++ ++static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) ++{ ++ return addr & MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS); ++} ++ ++static int64_t load_kernel_info(struct loongarch_boot_info *info) ++{ ++ uint64_t kernel_entry, kernel_low, kernel_high; ++ ssize_t kernel_size; ++ ++ kernel_size = load_elf(info->kernel_filename, NULL, ++ cpu_loongarch_virt_to_phys, NULL, ++ &kernel_entry, &kernel_low, ++ &kernel_high, NULL, 0, ++ EM_LOONGARCH, 1, 0); ++ ++ if (kernel_size < 0) { ++ error_report("could not load kernel '%s': %s", ++ info->kernel_filename, ++ load_elf_strerror(kernel_size)); ++ exit(1); ++ } ++ return kernel_entry; ++} ++ ++static void reset_load_elf(void *opaque) ++{ ++ LoongArchCPU *cpu = opaque; ++ CPULoongArchState *env = &cpu->env; ++ ++ cpu_reset(CPU(cpu)); ++ if (env->load_elf) { ++ cpu_set_pc(CPU(cpu), env->elf_address); ++ } ++} ++ ++static void fw_cfg_add_kernel_info(struct loongarch_boot_info *info, ++ FWCfgState *fw_cfg) ++{ ++ /* ++ * Expose the kernel, the command line, and the initrd in fw_cfg. ++ * We don't process them here at all, it's all left to the ++ * firmware. ++ */ ++ load_image_to_fw_cfg(fw_cfg, ++ FW_CFG_KERNEL_SIZE, FW_CFG_KERNEL_DATA, ++ info->kernel_filename, ++ false); ++ ++ if (info->initrd_filename) { ++ load_image_to_fw_cfg(fw_cfg, ++ FW_CFG_INITRD_SIZE, FW_CFG_INITRD_DATA, ++ info->initrd_filename, false); ++ } ++ ++ if (info->kernel_cmdline) { ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, ++ strlen(info->kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, ++ info->kernel_cmdline); ++ } ++} ++ ++static void loongarch_firmware_boot(LoongArchMachineState *lams, ++ struct loongarch_boot_info *info) ++{ ++ fw_cfg_add_kernel_info(info, lams->fw_cfg); ++} ++ ++static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) ++{ ++ int64_t kernel_addr = 0; ++ LoongArchCPU *lacpu; ++ CPUState *cs; ++ ++ if (info->kernel_filename) { ++ kernel_addr = load_kernel_info(info); ++ } else { ++ if(!qtest_enabled()) { ++ error_report("Need kernel filename\n"); ++ exit(1); ++ } ++ } ++ ++ CPU_FOREACH(cs) { ++ lacpu = LOONGARCH_CPU(cs); ++ lacpu->env.load_elf = true; ++ lacpu->env.elf_address = kernel_addr; ++ } ++} ++ ++void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) ++{ ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(ms); ++ int i; ++ ++ /* register reset function */ ++ for (i = 0; i < ms->smp.cpus; i++) { ++ qemu_register_reset(reset_load_elf, LOONGARCH_CPU(qemu_get_cpu(i))); ++ } ++ ++ info->kernel_filename = ms->kernel_filename; ++ info->kernel_cmdline = ms->kernel_cmdline; ++ info->initrd_filename = ms->initrd_filename; ++ ++ if (lams->bios_loaded) { ++ loongarch_firmware_boot(lams, info); ++ } else { ++ loongarch_direct_kernel_boot(info); ++ } ++} +diff --git a/hw/loongarch/meson.build b/hw/loongarch/meson.build +index c0421502ab..d306d82c2e 100644 +--- a/hw/loongarch/meson.build ++++ b/hw/loongarch/meson.build +@@ -1,6 +1,7 @@ + loongarch_ss = ss.source_set() + loongarch_ss.add(files( + 'fw_cfg.c', ++ 'boot.c', + )) + loongarch_ss.add(when: 'CONFIG_LOONGARCH_VIRT', if_true: [files('virt.c'), fdt]) + loongarch_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-build.c')) +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index eca3b94581..a0aee28f41 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -48,14 +48,6 @@ + #include "hw/block/flash.h" + #include "qemu/error-report.h" + +- +-struct loaderparams { +- uint64_t ram_size; +- const char *kernel_filename; +- const char *kernel_cmdline; +- const char *initrd_filename; +-}; +- + static bool virt_is_veiointc_enabled(LoongArchMachineState *lams) + { + if (lams->veiointc == ON_OFF_AUTO_OFF) { +@@ -439,31 +431,6 @@ static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) + memmap_entries++; + } + +-static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) +-{ +- return addr & MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS); +-} +- +-static int64_t load_kernel_info(const struct loaderparams *loaderparams) +-{ +- uint64_t kernel_entry, kernel_low, kernel_high; +- ssize_t kernel_size; +- +- kernel_size = load_elf(loaderparams->kernel_filename, NULL, +- cpu_loongarch_virt_to_phys, NULL, +- &kernel_entry, &kernel_low, +- &kernel_high, NULL, 0, +- EM_LOONGARCH, 1, 0); +- +- if (kernel_size < 0) { +- error_report("could not load kernel '%s': %s", +- loaderparams->kernel_filename, +- load_elf_strerror(kernel_size)); +- exit(1); +- } +- return kernel_entry; +-} +- + static DeviceState *create_acpi_ged(DeviceState *pch_pic, LoongArchMachineState *lams) + { + DeviceState *dev; +@@ -755,67 +722,6 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + } + } + +-static void reset_load_elf(void *opaque) +-{ +- LoongArchCPU *cpu = opaque; +- CPULoongArchState *env = &cpu->env; +- +- cpu_reset(CPU(cpu)); +- if (env->load_elf) { +- cpu_set_pc(CPU(cpu), env->elf_address); +- } +-} +- +-static void fw_cfg_add_kernel_info(const struct loaderparams *loaderparams, +- FWCfgState *fw_cfg) +-{ +- /* +- * Expose the kernel, the command line, and the initrd in fw_cfg. +- * We don't process them here at all, it's all left to the +- * firmware. +- */ +- load_image_to_fw_cfg(fw_cfg, +- FW_CFG_KERNEL_SIZE, FW_CFG_KERNEL_DATA, +- loaderparams->kernel_filename, +- false); +- +- if (loaderparams->initrd_filename) { +- load_image_to_fw_cfg(fw_cfg, +- FW_CFG_INITRD_SIZE, FW_CFG_INITRD_DATA, +- loaderparams->initrd_filename, false); +- } +- +- if (loaderparams->kernel_cmdline) { +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, +- strlen(loaderparams->kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, +- loaderparams->kernel_cmdline); +- } +-} +- +-static void loongarch_firmware_boot(LoongArchMachineState *lams, +- const struct loaderparams *loaderparams) +-{ +- fw_cfg_add_kernel_info(loaderparams, lams->fw_cfg); +-} +- +-static void loongarch_direct_kernel_boot(LoongArchMachineState *lams, +- const struct loaderparams *loaderparams) +-{ +- MachineState *machine = MACHINE(lams); +- int64_t kernel_addr = 0; +- LoongArchCPU *lacpu; +- int i; +- +- kernel_addr = load_kernel_info(loaderparams); +- if (!machine->firmware) { +- for (i = 0; i < machine->smp.cpus; i++) { +- lacpu = LOONGARCH_CPU(qemu_get_cpu(i)); +- lacpu->env.load_elf = true; +- lacpu->env.elf_address = kernel_addr; +- } +- } +-} + + static MemTxResult loongarch_qemu_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size, MemTxAttrs attrs) +@@ -925,7 +831,6 @@ static void loongarch_init(MachineState *machine) + const CPUArchIdList *possible_cpus; + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUState *cpu; +- struct loaderparams loaderparams = { }; + + if (!cpu_model) { + cpu_model = LOONGARCH_CPU_TYPE_NAME("la464"); +@@ -1028,24 +933,8 @@ static void loongarch_init(MachineState *machine) + sizeof(struct memmap_entry) * (memmap_entries)); + } + fdt_add_fw_cfg_node(lams); +- loaderparams.ram_size = ram_size; +- loaderparams.kernel_filename = machine->kernel_filename; +- loaderparams.kernel_cmdline = machine->kernel_cmdline; +- loaderparams.initrd_filename = machine->initrd_filename; +- /* load the kernel. */ +- if (loaderparams.kernel_filename) { +- if (lams->bios_loaded) { +- loongarch_firmware_boot(lams, &loaderparams); +- } else { +- loongarch_direct_kernel_boot(lams, &loaderparams); +- } +- } + fdt_add_flash_node(lams); +- /* register reset function */ +- for (i = 0; i < machine->smp.cpus; i++) { +- lacpu = LOONGARCH_CPU(qemu_get_cpu(i)); +- qemu_register_reset(reset_load_elf, lacpu); +- } ++ + /* Initialize the IO interrupt subsystem */ + loongarch_irq_init(lams); + fdt_add_irqchip_node(lams); +@@ -1069,7 +958,13 @@ static void loongarch_init(MachineState *machine) + */ + fdt_base = 1 * MiB; + qemu_fdt_dumpdtb(machine->fdt, lams->fdt_size); +- rom_add_blob_fixed("fdt", machine->fdt, lams->fdt_size, fdt_base); ++ rom_add_blob_fixed_as("fdt", machine->fdt, lams->fdt_size, fdt_base, ++ &address_space_memory); ++ qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, ++ rom_ptr_for_as(&address_space_memory, fdt_base, lams->fdt_size)); ++ ++ lams->bootinfo.ram_size = ram_size; ++ loongarch_load_kernel(machine, &lams->bootinfo); + } + + bool loongarch_is_acpi_enabled(LoongArchMachineState *lams) +diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h +new file mode 100644 +index 0000000000..3275c1e295 +--- /dev/null ++++ b/include/hw/loongarch/boot.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * Definitions for LoongArch boot. ++ * ++ * Copyright (C) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#ifndef HW_LOONGARCH_BOOT_H ++#define HW_LOONGARCH_BOOT_H ++ ++struct loongarch_boot_info { ++ uint64_t ram_size; ++ const char *kernel_filename; ++ const char *kernel_cmdline; ++ const char *initrd_filename; ++ uint64_t a0, a1, a2; ++}; ++ ++void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info); ++ ++#endif /* HW_LOONGARCH_BOOT_H */ +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 99447fd1d6..02c8234b8d 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -13,6 +13,7 @@ + #include "qemu/queue.h" + #include "hw/intc/loongarch_ipi.h" + #include "hw/block/flash.h" ++#include "hw/loongarch/boot.h" + + #define LOONGARCH_MAX_CPUS 256 + +@@ -58,6 +59,7 @@ struct LoongArchMachineState { + MemoryRegion iocsr_mem; + AddressSpace as_iocsr; + int features; ++ struct loongarch_boot_info bootinfo; + }; + + #define TYPE_LOONGARCH_MACHINE MACHINE_TYPE_NAME("virt") +-- +2.39.1 + diff --git a/hw-loongarch-Refine-acpi-srat-table-for-numa-memory.patch b/hw-loongarch-Refine-acpi-srat-table-for-numa-memory.patch new file mode 100644 index 0000000000000000000000000000000000000000..330846cdc21e858d69781050c8737701fec5d8a9 --- /dev/null +++ b/hw-loongarch-Refine-acpi-srat-table-for-numa-memory.patch @@ -0,0 +1,108 @@ +From 1c9b7b7e76a63738721ac1092fdfff12ae87993a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 15 May 2024 17:39:22 +0800 +Subject: [PATCH 23/78] hw/loongarch: Refine acpi srat table for numa memory + +One LoongArch virt machine platform, there is limitation for memory +map information. The minimum memory size is 256M and minimum memory +size for numa node0 is 256M also. With qemu numa qtest, it is possible +that memory size of numa node0 is 128M. + +Limitations for minimum memory size for both total memory and numa +node0 is removed for acpi srat table creation. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240515093927.3453674-2-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/acpi-build.c | 58 +++++++++++++++++++++++---------------- + 1 file changed, 34 insertions(+), 24 deletions(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index 2b4e09bf37..2555c6763c 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -166,8 +166,9 @@ static void + build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + { + int i, arch_id, node_id; +- uint64_t mem_len, mem_base; +- int nb_numa_nodes = machine->numa_state->num_nodes; ++ hwaddr len, base, gap; ++ NodeInfo *numa_info; ++ int nodes, nb_numa_nodes = machine->numa_state->num_nodes; + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); + MachineClass *mc = MACHINE_GET_CLASS(lvms); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine); +@@ -196,35 +197,44 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + } + +- /* Node0 */ +- build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, +- 0, MEM_AFFINITY_ENABLED); +- mem_base = VIRT_HIGHMEM_BASE; +- if (!nb_numa_nodes) { +- mem_len = machine->ram_size - VIRT_LOWMEM_SIZE; +- } else { +- mem_len = machine->numa_state->nodes[0].node_mem - VIRT_LOWMEM_SIZE; ++ base = VIRT_LOWMEM_BASE; ++ gap = VIRT_LOWMEM_SIZE; ++ numa_info = machine->numa_state->nodes; ++ nodes = nb_numa_nodes; ++ if (!nodes) { ++ nodes = 1; + } +- if (mem_len) +- build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED); +- +- /* Node1 - Nodemax */ +- if (nb_numa_nodes) { +- mem_base += mem_len; +- for (i = 1; i < nb_numa_nodes; ++i) { +- if (machine->numa_state->nodes[i].node_mem > 0) { +- build_srat_memory(table_data, mem_base, +- machine->numa_state->nodes[i].node_mem, i, +- MEM_AFFINITY_ENABLED); +- mem_base += machine->numa_state->nodes[i].node_mem; +- } ++ ++ for (i = 0; i < nodes; i++) { ++ if (nb_numa_nodes) { ++ len = numa_info[i].node_mem; ++ } else { ++ len = machine->ram_size; ++ } ++ ++ /* ++ * memory for the node splited into two part ++ * lowram: [base, +gap) ++ * highram: [VIRT_HIGHMEM_BASE, +(len - gap)) ++ */ ++ if (len >= gap) { ++ build_srat_memory(table_data, base, len, i, MEM_AFFINITY_ENABLED); ++ len -= gap; ++ base = VIRT_HIGHMEM_BASE; ++ gap = machine->ram_size - VIRT_LOWMEM_SIZE; ++ } ++ ++ if (len) { ++ build_srat_memory(table_data, base, len, i, MEM_AFFINITY_ENABLED); ++ base += len; ++ gap -= len; + } + } + + if (machine->device_memory) { + build_srat_memory(table_data, machine->device_memory->base, + memory_region_size(&machine->device_memory->mr), +- nb_numa_nodes - 1, ++ nodes - 1, + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + } + +-- +2.39.1 + diff --git a/hw-loongarch-Refine-default-numa-id-calculation.patch b/hw-loongarch-Refine-default-numa-id-calculation.patch new file mode 100644 index 0000000000000000000000000000000000000000..e53da7176eb216b8a6fa854354451f331f362509 --- /dev/null +++ b/hw-loongarch-Refine-default-numa-id-calculation.patch @@ -0,0 +1,57 @@ +From a9f9a4a0a60596f2e738e6e434c20a3f5266fa17 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Tue, 19 Mar 2024 10:26:06 +0800 +Subject: [PATCH 21/78] hw/loongarch: Refine default numa id calculation + +With numa_test test case, there is subcase named test_def_cpu_split(), +there are 8 sockets and 2 numa nodes. Here is command line: +"-machine smp.cpus=8,smp.sockets=8 -numa node,memdev=ram -numa node" + +The required result is: + node 0 cpus: 0 2 4 6 + node 1 cpus: 1 3 5 7 +Test case numa_test fails on LoongArch, since the actual result is: + node 0 cpus: 0 1 2 3 + node 1 cpus: 4 5 6 7 + +It will be better if all the cpus in one socket share the same numa +node. Here socket id is used to calculate numa id in function +virt_get_default_cpu_node_id(). + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240319022606.2994565-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index e39989193e..e82e3b6792 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1278,15 +1278,14 @@ static CpuInstanceProperties virt_cpu_index_to_props(MachineState *ms, + + static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + { +- int64_t nidx = 0; ++ int64_t socket_id; + + if (ms->numa_state->num_nodes) { +- nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes); +- if (ms->numa_state->num_nodes <= nidx) { +- nidx = ms->numa_state->num_nodes - 1; +- } ++ socket_id = ms->possible_cpus->cpus[idx].props.socket_id; ++ return socket_id % ms->numa_state->num_nodes; ++ } else { ++ return 0; + } +- return nidx; + } + + static void virt_class_init(ObjectClass *oc, void *data) +-- +2.39.1 + diff --git a/hw-loongarch-Refine-fadt-memory-table-for-numa-memor.patch b/hw-loongarch-Refine-fadt-memory-table-for-numa-memor.patch new file mode 100644 index 0000000000000000000000000000000000000000..df9e4b4f52b7261f12198a8a841dafb6966b0d91 --- /dev/null +++ b/hw-loongarch-Refine-fadt-memory-table-for-numa-memor.patch @@ -0,0 +1,106 @@ +From d39247ec5d4ef52a4b9422aaecccc284cbd1a5dd Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 15 May 2024 17:39:23 +0800 +Subject: [PATCH 24/78] hw/loongarch: Refine fadt memory table for numa memory + +One LoongArch virt machine platform, there is limitation for memory +map information. The minimum memory size is 256M and minimum memory +size for numa node0 is 256M also. With qemu numa qtest, it is possible +that memory size of numa node0 is 128M. + +Limitations for minimum memory size for both total memory and numa +node0 is removed for fadt numa memory table creation. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240515093927.3453674-3-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 46 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 43 insertions(+), 3 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index c3514f9293..31a2598e7c 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -502,6 +502,48 @@ static void fdt_add_memory_node(MachineState *ms, + g_free(nodename); + } + ++static void fdt_add_memory_nodes(MachineState *ms) ++{ ++ hwaddr base, size, ram_size, gap; ++ int i, nb_numa_nodes, nodes; ++ NodeInfo *numa_info; ++ ++ ram_size = ms->ram_size; ++ base = VIRT_LOWMEM_BASE; ++ gap = VIRT_LOWMEM_SIZE; ++ nodes = nb_numa_nodes = ms->numa_state->num_nodes; ++ numa_info = ms->numa_state->nodes; ++ if (!nodes) { ++ nodes = 1; ++ } ++ ++ for (i = 0; i < nodes; i++) { ++ if (nb_numa_nodes) { ++ size = numa_info[i].node_mem; ++ } else { ++ size = ram_size; ++ } ++ ++ /* ++ * memory for the node splited into two part ++ * lowram: [base, +gap) ++ * highram: [VIRT_HIGHMEM_BASE, +(len - gap)) ++ */ ++ if (size >= gap) { ++ fdt_add_memory_node(ms, base, gap, i); ++ size -= gap; ++ base = VIRT_HIGHMEM_BASE; ++ gap = ram_size - VIRT_LOWMEM_SIZE; ++ } ++ ++ if (size) { ++ fdt_add_memory_node(ms, base, size, i); ++ base += size; ++ gap -= size; ++ } ++ } ++} ++ + static void virt_build_smbios(LoongArchVirtMachineState *lvms) + { + MachineState *ms = MACHINE(lvms); +@@ -1008,10 +1050,10 @@ static void virt_init(MachineState *machine) + lacpu->phy_id = machine->possible_cpus->cpus[i].arch_id; + } + fdt_add_cpu_nodes(lvms); ++ fdt_add_memory_nodes(machine); + + /* Node0 memory */ + memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1); +- fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0); + memory_region_init_alias(&lvms->lowmem, NULL, "loongarch.node0.lowram", + machine->ram, offset, VIRT_LOWMEM_SIZE); + memory_region_add_subregion(address_space_mem, phyAddr, &lvms->lowmem); +@@ -1025,7 +1067,6 @@ static void virt_init(MachineState *machine) + } + phyAddr = VIRT_HIGHMEM_BASE; + memmap_add_entry(phyAddr, highram_size, 1); +- fdt_add_memory_node(machine, phyAddr, highram_size, 0); + memory_region_init_alias(&lvms->highmem, NULL, "loongarch.node0.highram", + machine->ram, offset, highram_size); + memory_region_add_subregion(address_space_mem, phyAddr, &lvms->highmem); +@@ -1041,7 +1082,6 @@ static void virt_init(MachineState *machine) + offset, numa_info[i].node_mem); + memory_region_add_subregion(address_space_mem, phyAddr, nodemem); + memmap_add_entry(phyAddr, numa_info[i].node_mem, 1); +- fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, i); + offset += numa_info[i].node_mem; + phyAddr += numa_info[i].node_mem; + } +-- +2.39.1 + diff --git a/hw-loongarch-Refine-fwcfg-memory-map.patch b/hw-loongarch-Refine-fwcfg-memory-map.patch new file mode 100644 index 0000000000000000000000000000000000000000..38df11db93419eefc9656a954beeede6ba88ce33 --- /dev/null +++ b/hw-loongarch-Refine-fwcfg-memory-map.patch @@ -0,0 +1,120 @@ +From 88b12e40d6a479dfb376fb6a91ef24e07a59d33a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 15 May 2024 17:39:24 +0800 +Subject: [PATCH 25/78] hw/loongarch: Refine fwcfg memory map + +Memory map table for fwcfg is used for UEFI BIOS, UEFI BIOS uses the first +entry from fwcfg memory map as the first memory HOB, the second memory HOB +will be used if the first memory HOB is used up. + +Memory map table for fwcfg does not care about numa node, however in +generic the first memory HOB is part of numa node0, so that runtime +memory of UEFI which is allocated from the first memory HOB is located +at numa node0. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240515093927.3453674-4-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 60 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 57 insertions(+), 3 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 31a2598e7c..7e89921431 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1005,6 +1005,62 @@ static const MemoryRegionOps virt_iocsr_misc_ops = { + }, + }; + ++static void fw_cfg_add_memory(MachineState *ms) ++{ ++ hwaddr base, size, ram_size, gap; ++ int nb_numa_nodes, nodes; ++ NodeInfo *numa_info; ++ ++ ram_size = ms->ram_size; ++ base = VIRT_LOWMEM_BASE; ++ gap = VIRT_LOWMEM_SIZE; ++ nodes = nb_numa_nodes = ms->numa_state->num_nodes; ++ numa_info = ms->numa_state->nodes; ++ if (!nodes) { ++ nodes = 1; ++ } ++ ++ /* add fw_cfg memory map of node0 */ ++ if (nb_numa_nodes) { ++ size = numa_info[0].node_mem; ++ } else { ++ size = ram_size; ++ } ++ ++ if (size >= gap) { ++ memmap_add_entry(base, gap, 1); ++ size -= gap; ++ base = VIRT_HIGHMEM_BASE; ++ gap = ram_size - VIRT_LOWMEM_SIZE; ++ } ++ ++ if (size) { ++ memmap_add_entry(base, size, 1); ++ base += size; ++ } ++ ++ if (nodes < 2) { ++ return; ++ } ++ ++ /* add fw_cfg memory map of other nodes */ ++ size = ram_size - numa_info[0].node_mem; ++ gap = VIRT_LOWMEM_BASE + VIRT_LOWMEM_SIZE; ++ if (base < gap && (base + size) > gap) { ++ /* ++ * memory map for the maining nodes splited into two part ++ * lowram: [base, +(gap - base)) ++ * highram: [VIRT_HIGHMEM_BASE, +(size - (gap - base))) ++ */ ++ memmap_add_entry(base, gap - base, 1); ++ size -= gap - base; ++ base = VIRT_HIGHMEM_BASE; ++ } ++ ++ if (size) ++ memmap_add_entry(base, size, 1); ++} ++ + static void virt_init(MachineState *machine) + { + LoongArchCPU *lacpu; +@@ -1051,9 +1107,9 @@ static void virt_init(MachineState *machine) + } + fdt_add_cpu_nodes(lvms); + fdt_add_memory_nodes(machine); ++ fw_cfg_add_memory(machine); + + /* Node0 memory */ +- memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1); + memory_region_init_alias(&lvms->lowmem, NULL, "loongarch.node0.lowram", + machine->ram, offset, VIRT_LOWMEM_SIZE); + memory_region_add_subregion(address_space_mem, phyAddr, &lvms->lowmem); +@@ -1066,7 +1122,6 @@ static void virt_init(MachineState *machine) + highram_size = ram_size - VIRT_LOWMEM_SIZE; + } + phyAddr = VIRT_HIGHMEM_BASE; +- memmap_add_entry(phyAddr, highram_size, 1); + memory_region_init_alias(&lvms->highmem, NULL, "loongarch.node0.highram", + machine->ram, offset, highram_size); + memory_region_add_subregion(address_space_mem, phyAddr, &lvms->highmem); +@@ -1081,7 +1136,6 @@ static void virt_init(MachineState *machine) + memory_region_init_alias(nodemem, NULL, ramName, machine->ram, + offset, numa_info[i].node_mem); + memory_region_add_subregion(address_space_mem, phyAddr, nodemem); +- memmap_add_entry(phyAddr, numa_info[i].node_mem, 1); + offset += numa_info[i].node_mem; + phyAddr += numa_info[i].node_mem; + } +-- +2.39.1 + diff --git a/hw-loongarch-Refine-system-dram-memory-region.patch b/hw-loongarch-Refine-system-dram-memory-region.patch new file mode 100644 index 0000000000000000000000000000000000000000..3db195d89ab32a09e7635de8e2a02f154dab0c07 --- /dev/null +++ b/hw-loongarch-Refine-system-dram-memory-region.patch @@ -0,0 +1,110 @@ +From 1a7f567308756a2a26020802b24fe7fd106cf84a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 15 May 2024 17:39:25 +0800 +Subject: [PATCH 26/78] hw/loongarch: Refine system dram memory region + +For system dram memory region, it is not necessary to use numa node +information. There is only low memory region and high memory region. + +Remove numa node information for ddr memory region here, it can reduce +memory region number on LoongArch virt machine. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240515093927.3453674-5-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 53 +++++++++++++++------------------------------ + 1 file changed, 17 insertions(+), 36 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 7e89921431..96755f5deb 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1065,14 +1065,10 @@ static void virt_init(MachineState *machine) + { + LoongArchCPU *lacpu; + const char *cpu_model = machine->cpu_type; +- ram_addr_t offset = 0; +- ram_addr_t ram_size = machine->ram_size; +- uint64_t highram_size = 0, phyAddr = 0; + MemoryRegion *address_space_mem = get_system_memory(); + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); +- int nb_numa_nodes = machine->numa_state->num_nodes; +- NodeInfo *numa_info = machine->numa_state->nodes; + int i; ++ hwaddr base, size, ram_size = machine->ram_size; + const CPUArchIdList *possible_cpus; + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUState *cpu; +@@ -1110,40 +1106,27 @@ static void virt_init(MachineState *machine) + fw_cfg_add_memory(machine); + + /* Node0 memory */ +- memory_region_init_alias(&lvms->lowmem, NULL, "loongarch.node0.lowram", +- machine->ram, offset, VIRT_LOWMEM_SIZE); +- memory_region_add_subregion(address_space_mem, phyAddr, &lvms->lowmem); +- +- offset += VIRT_LOWMEM_SIZE; +- if (nb_numa_nodes > 0) { +- assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE); +- highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE; +- } else { +- highram_size = ram_size - VIRT_LOWMEM_SIZE; ++ size = ram_size; ++ base = VIRT_LOWMEM_BASE; ++ if (size > VIRT_LOWMEM_SIZE) { ++ size = VIRT_LOWMEM_SIZE; + } +- phyAddr = VIRT_HIGHMEM_BASE; +- memory_region_init_alias(&lvms->highmem, NULL, "loongarch.node0.highram", +- machine->ram, offset, highram_size); +- memory_region_add_subregion(address_space_mem, phyAddr, &lvms->highmem); +- +- /* Node1 - Nodemax memory */ +- offset += highram_size; +- phyAddr += highram_size; +- +- for (i = 1; i < nb_numa_nodes; i++) { +- MemoryRegion *nodemem = g_new(MemoryRegion, 1); +- g_autofree char *ramName = g_strdup_printf("loongarch.node%d.ram", i); +- memory_region_init_alias(nodemem, NULL, ramName, machine->ram, +- offset, numa_info[i].node_mem); +- memory_region_add_subregion(address_space_mem, phyAddr, nodemem); +- offset += numa_info[i].node_mem; +- phyAddr += numa_info[i].node_mem; ++ ++ memory_region_init_alias(&lvms->lowmem, NULL, "loongarch.lowram", ++ machine->ram, base, size); ++ memory_region_add_subregion(address_space_mem, base, &lvms->lowmem); ++ base += size; ++ if (ram_size - size) { ++ base = VIRT_HIGHMEM_BASE; ++ memory_region_init_alias(&lvms->highmem, NULL, "loongarch.highram", ++ machine->ram, VIRT_LOWMEM_BASE + size, ram_size - size); ++ memory_region_add_subregion(address_space_mem, base, &lvms->highmem); ++ base += ram_size - size; + } + + /* initialize device memory address space */ + if (machine->ram_size < machine->maxram_size) { + ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size; +- hwaddr device_mem_base; + + if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { + error_report("unsupported amount of memory slots: %"PRIu64, +@@ -1157,9 +1140,7 @@ static void virt_init(MachineState *machine) + "%d bytes", TARGET_PAGE_SIZE); + exit(EXIT_FAILURE); + } +- /* device memory base is the top of high memory address. */ +- device_mem_base = ROUND_UP(VIRT_HIGHMEM_BASE + highram_size, 1 * GiB); +- machine_memory_devices_init(machine, device_mem_base, device_mem_size); ++ machine_memory_devices_init(machine, base, device_mem_size); + } + + /* load the BIOS image. */ +-- +2.39.1 + diff --git a/hw-loongarch-Remove-default-enable-with-VIRTIO_VGA-d.patch b/hw-loongarch-Remove-default-enable-with-VIRTIO_VGA-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..abf36b0b9bcbe433ca4d6a326c1f673cea5f9253 --- /dev/null +++ b/hw-loongarch-Remove-default-enable-with-VIRTIO_VGA-d.patch @@ -0,0 +1,39 @@ +From 94fa0b75c098ca6fc987f103760c1e07695ffd1a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Fri, 23 Aug 2024 15:30:50 +0800 +Subject: [PATCH 34/78] hw/loongarch: Remove default enable with VIRTIO_VGA + device + +For virtio VGA deivce libvirt will select VIRTIO_VGA firstly rather than +VIRTIO_GPU, VIRTIO_VGA device supports frame buffer however it requires +legacy VGA compatible support. Frame buffer area 0xa0000 -- 0xc0000 +conflicts with low memory area 0 -- 0x10000000. + +Here remove default support for VIRTIO_VGA device, VIRTIO_GPU is prefered +on LoongArch system. For frame buffer video card support, standard VGA can +be used. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240823073050.2619484-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/Kconfig | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig +index b2a3adb7dc..40944a8365 100644 +--- a/hw/loongarch/Kconfig ++++ b/hw/loongarch/Kconfig +@@ -4,7 +4,6 @@ config LOONGARCH_VIRT + depends on LOONGARCH64 + select PCI + select PCI_EXPRESS_GENERIC_BRIDGE +- imply VIRTIO_VGA + imply PCI_DEVICES + imply NVDIMM + imply TPM_TIS_SYSBUS +-- +2.39.1 + diff --git a/hw-loongarch-Remove-minimum-and-default-memory-size.patch b/hw-loongarch-Remove-minimum-and-default-memory-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..9298f984b95f334c2080b8b621b45e680ef4cbed --- /dev/null +++ b/hw-loongarch-Remove-minimum-and-default-memory-size.patch @@ -0,0 +1,46 @@ +From 858f16ea09fbbac9966ca73b6b86d290a36be6f5 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 15 May 2024 17:39:26 +0800 +Subject: [PATCH 27/78] hw/loongarch: Remove minimum and default memory size + +Some qtest test cases such as numa use default memory size of generic +machine class, which is 128M by fault. + +Here generic default memory size is used, and also remove minimum memory +size which is 1G originally. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240515093927.3453674-6-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 96755f5deb..11ba879e52 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1077,10 +1077,6 @@ static void virt_init(MachineState *machine) + cpu_model = LOONGARCH_CPU_TYPE_NAME("la464"); + } + +- if (ram_size < 1 * GiB) { +- error_report("ram_size must be greater than 1G."); +- exit(1); +- } + create_fdt(lvms); + + /* Create IOCSR space */ +@@ -1369,7 +1365,6 @@ static void virt_class_init(ObjectClass *oc, void *data) + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + + mc->init = virt_init; +- mc->default_ram_size = 1 * GiB; + mc->default_cpu_type = LOONGARCH_CPU_TYPE_NAME("la464"); + mc->default_ram_id = "loongarch.ram"; + mc->max_cpus = LOONGARCH_MAX_CPUS; +-- +2.39.1 + diff --git a/hw-loongarch-Rename-LOONGARCH_MACHINE-with-LOONGARCH.patch b/hw-loongarch-Rename-LOONGARCH_MACHINE-with-LOONGARCH.patch new file mode 100644 index 0000000000000000000000000000000000000000..38fc14acdb8923769779d6376a7533b5207f8afa --- /dev/null +++ b/hw-loongarch-Rename-LOONGARCH_MACHINE-with-LOONGARCH.patch @@ -0,0 +1,188 @@ +From 8e2986a6fc5dda2afbe33f723efdacd01f147b7a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 8 May 2024 11:11:06 +0800 +Subject: [PATCH 19/78] hw/loongarch: Rename LOONGARCH_MACHINE with + LOONGARCH_VIRT_MACHINE +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On LoongArch system, there is only virt machine type now, name +LOONGARCH_MACHINE is confused, rename it with LOONGARCH_VIRT_MACHINE. +Machine name about Other real hw boards can be added in future. + +Signed-off-by: Bibo Mao +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240508031110.2507477-2-maobibo@loongson.cn> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Xianglai Li +--- + hw/loongarch/acpi-build.c | 8 ++++---- + hw/loongarch/boot.c | 2 +- + hw/loongarch/virt.c | 19 +++++++++---------- + include/hw/loongarch/virt.h | 4 ++-- + 4 files changed, 16 insertions(+), 17 deletions(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index f990405d04..fff3497c62 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -167,7 +167,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + int i, arch_id, node_id; + uint64_t mem_len, mem_base; + int nb_numa_nodes = machine->numa_state->num_nodes; +- LoongArchMachineState *lams = LOONGARCH_MACHINE(machine); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); + MachineClass *mc = MACHINE_GET_CLASS(lams); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine); + AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id, +@@ -279,7 +279,7 @@ static void + build_la_ged_aml(Aml *dsdt, MachineState *machine) + { + uint32_t event; +- LoongArchMachineState *lams = LOONGARCH_MACHINE(machine); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); + + build_ged_aml(dsdt, "\\_SB."GED_DEVICE, + HOTPLUG_HANDLER(lams->acpi_ged), +@@ -391,7 +391,7 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine) + { + Aml *dsdt, *scope, *pkg; +- LoongArchMachineState *lams = LOONGARCH_MACHINE(machine); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); + AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = lams->oem_id, + .oem_table_id = lams->oem_table_id }; + +@@ -421,7 +421,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine) + + static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(machine); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); + GArray *table_offsets; + AcpiFadtData fadt_data; + unsigned facs, rsdt, dsdt; +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 03f6301a77..e37512729d 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -319,7 +319,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + + void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(ms); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(ms); + int i; + + /* register reset function */ +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 76b36539e2..cca220cb5b 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -970,7 +970,7 @@ static void loongarch_init(MachineState *machine) + ram_addr_t ram_size = machine->ram_size; + uint64_t highram_size = 0, phyAddr = 0; + MemoryRegion *address_space_mem = get_system_memory(); +- LoongArchMachineState *lams = LOONGARCH_MACHINE(machine); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); + int nb_numa_nodes = machine->numa_state->num_nodes; + NodeInfo *numa_info = machine->numa_state->nodes; + int i; +@@ -1121,7 +1121,7 @@ bool loongarch_is_acpi_enabled(LoongArchMachineState *lams) + static void loongarch_get_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(obj); + OnOffAuto acpi = lams->acpi; + + visit_type_OnOffAuto(v, name, &acpi, errp); +@@ -1130,14 +1130,14 @@ static void loongarch_get_acpi(Object *obj, Visitor *v, const char *name, + static void loongarch_set_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(obj); + + visit_type_OnOffAuto(v, name, &lams->acpi, errp); + } + + static void loongarch_machine_initfn(Object *obj) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(obj); + + if (tcg_enabled()) { + lams->veiointc = ON_OFF_AUTO_OFF; +@@ -1172,7 +1172,7 @@ static void virt_machine_device_pre_plug(HotplugHandler *hotplug_dev, + static void virt_mem_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(hotplug_dev); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); + + /* the acpi ged is always exist */ + hotplug_handler_unplug_request(HOTPLUG_HANDLER(lams->acpi_ged), dev, +@@ -1190,7 +1190,7 @@ static void virt_machine_device_unplug_request(HotplugHandler *hotplug_dev, + static void virt_mem_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(hotplug_dev); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); + + hotplug_handler_unplug(HOTPLUG_HANDLER(lams->acpi_ged), dev, errp); + pc_dimm_unplug(PC_DIMM(dev), MACHINE(lams)); +@@ -1208,7 +1208,7 @@ static void virt_machine_device_unplug(HotplugHandler *hotplug_dev, + static void virt_mem_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(hotplug_dev); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); + + pc_dimm_plug(PC_DIMM(dev), MACHINE(lams)); + hotplug_handler_plug(HOTPLUG_HANDLER(lams->acpi_ged), +@@ -1218,7 +1218,7 @@ static void virt_mem_plug(HotplugHandler *hotplug_dev, + static void loongarch_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(hotplug_dev); ++ LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); + MachineClass *mc = MACHINE_GET_CLASS(lams); + + if (device_is_dynamic_sysbus(mc, dev)) { +@@ -1300,7 +1300,6 @@ static void loongarch_class_init(ObjectClass *oc, void *data) + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + +- mc->desc = "Loongson-3A5000 LS7A1000 machine"; + mc->init = loongarch_init; + mc->default_ram_size = 1 * GiB; + mc->default_cpu_type = LOONGARCH_CPU_TYPE_NAME("la464"); +@@ -1341,7 +1340,7 @@ static void loongarch_class_init(ObjectClass *oc, void *data) + + static const TypeInfo loongarch_machine_types[] = { + { +- .name = TYPE_LOONGARCH_MACHINE, ++ .name = TYPE_LOONGARCH_VIRT_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(LoongArchMachineState), + .class_init = loongarch_class_init, +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 36158c758f..0509b9a9af 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -66,8 +66,8 @@ struct LoongArchMachineState { + struct loongarch_boot_info bootinfo; + }; + +-#define TYPE_LOONGARCH_MACHINE MACHINE_TYPE_NAME("virt") +-OBJECT_DECLARE_SIMPLE_TYPE(LoongArchMachineState, LOONGARCH_MACHINE) ++#define TYPE_LOONGARCH_VIRT_MACHINE MACHINE_TYPE_NAME("virt") ++OBJECT_DECLARE_SIMPLE_TYPE(LoongArchMachineState, LOONGARCH_VIRT_MACHINE) + bool loongarch_is_acpi_enabled(LoongArchMachineState *lams); + void loongarch_acpi_setup(LoongArchMachineState *lams); + #endif +-- +2.39.1 + diff --git a/hw-loongarch-Rename-LoongArchMachineState-with-Loong.patch b/hw-loongarch-Rename-LoongArchMachineState-with-Loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..65f8a224abf16bd450cdf57fc49445de2f7a3985 --- /dev/null +++ b/hw-loongarch-Rename-LoongArchMachineState-with-Loong.patch @@ -0,0 +1,1324 @@ +From a501582ef5e986bfa9dc198c63582b3e35332643 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 8 May 2024 11:11:07 +0800 +Subject: [PATCH 20/78] hw/loongarch: Rename LoongArchMachineState with + LoongArchVirtMachineState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Rename LoongArchMachineState with LoongArchVirtMachineState, and change +variable name LoongArchMachineState *lams with LoongArchVirtMachineState +*lvms. + +Rename function specific for virtmachine loongarch_xxx() +with virt_xxx(). However some common functions keep unchanged such as +loongarch_acpi_setup()/loongarch_load_kernel(), since there functions +can be used for real hw boards. + +Signed-off-by: Bibo Mao +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240508031110.2507477-3-maobibo@loongson.cn> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Xianglai Li +--- + hw/loongarch/acpi-build.c | 89 +++++---- + hw/loongarch/boot.c | 10 +- + hw/loongarch/fw_cfg.c | 2 +- + hw/loongarch/fw_cfg.h | 2 +- + hw/loongarch/virt.c | 366 ++++++++++++++++++------------------ + include/hw/loongarch/virt.h | 7 +- + 6 files changed, 239 insertions(+), 237 deletions(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index fff3497c62..2b4e09bf37 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -105,14 +105,15 @@ build_facs(GArray *table_data) + + /* build MADT */ + static void +-build_madt(GArray *table_data, BIOSLinker *linker, LoongArchMachineState *lams) ++build_madt(GArray *table_data, BIOSLinker *linker, ++ LoongArchVirtMachineState *lvms) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + int i, arch_id; +- AcpiTable table = { .sig = "APIC", .rev = 1, .oem_id = lams->oem_id, +- .oem_table_id = lams->oem_table_id }; ++ AcpiTable table = { .sig = "APIC", .rev = 1, .oem_id = lvms->oem_id, ++ .oem_table_id = lvms->oem_table_id }; + + acpi_table_begin(&table, table_data); + +@@ -167,11 +168,11 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + int i, arch_id, node_id; + uint64_t mem_len, mem_base; + int nb_numa_nodes = machine->numa_state->num_nodes; +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); +- MachineClass *mc = MACHINE_GET_CLASS(lams); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); ++ MachineClass *mc = MACHINE_GET_CLASS(lvms); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine); +- AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id, +- .oem_table_id = lams->oem_table_id }; ++ AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lvms->oem_id, ++ .oem_table_id = lvms->oem_table_id }; + + acpi_table_begin(&table, table_data); + build_append_int_noprefix(table_data, 1, 4); /* Reserved */ +@@ -279,13 +280,13 @@ static void + build_la_ged_aml(Aml *dsdt, MachineState *machine) + { + uint32_t event; +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); + + build_ged_aml(dsdt, "\\_SB."GED_DEVICE, +- HOTPLUG_HANDLER(lams->acpi_ged), ++ HOTPLUG_HANDLER(lvms->acpi_ged), + VIRT_SCI_IRQ, AML_SYSTEM_MEMORY, + VIRT_GED_EVT_ADDR); +- event = object_property_get_uint(OBJECT(lams->acpi_ged), ++ event = object_property_get_uint(OBJECT(lvms->acpi_ged), + "ged-event", &error_abort); + if (event & ACPI_GED_MEM_HOTPLUG_EVT) { + build_memory_hotplug_aml(dsdt, machine->ram_slots, "\\_SB", NULL, +@@ -295,7 +296,7 @@ build_la_ged_aml(Aml *dsdt, MachineState *machine) + acpi_dsdt_add_power_button(dsdt); + } + +-static void build_pci_device_aml(Aml *scope, LoongArchMachineState *lams) ++static void build_pci_device_aml(Aml *scope, LoongArchVirtMachineState *lvms) + { + struct GPEXConfig cfg = { + .mmio64.base = VIRT_PCI_MEM_BASE, +@@ -305,13 +306,13 @@ static void build_pci_device_aml(Aml *scope, LoongArchMachineState *lams) + .ecam.base = VIRT_PCI_CFG_BASE, + .ecam.size = VIRT_PCI_CFG_SIZE, + .irq = VIRT_GSI_BASE + VIRT_DEVICE_IRQS, +- .bus = lams->pci_bus, ++ .bus = lvms->pci_bus, + }; + + acpi_dsdt_add_gpex(scope, &cfg); + } + +-static void build_flash_aml(Aml *scope, LoongArchMachineState *lams) ++static void build_flash_aml(Aml *scope, LoongArchVirtMachineState *lvms) + { + Aml *dev, *crs; + MemoryRegion *flash_mem; +@@ -322,11 +323,11 @@ static void build_flash_aml(Aml *scope, LoongArchMachineState *lams) + hwaddr flash1_base; + hwaddr flash1_size; + +- flash_mem = pflash_cfi01_get_memory(lams->flash[0]); ++ flash_mem = pflash_cfi01_get_memory(lvms->flash[0]); + flash0_base = flash_mem->addr; + flash0_size = memory_region_size(flash_mem); + +- flash_mem = pflash_cfi01_get_memory(lams->flash[1]); ++ flash_mem = pflash_cfi01_get_memory(lvms->flash[1]); + flash1_base = flash_mem->addr; + flash1_size = memory_region_size(flash_mem); + +@@ -352,7 +353,7 @@ static void build_flash_aml(Aml *scope, LoongArchMachineState *lams) + } + + #ifdef CONFIG_TPM +-static void acpi_dsdt_add_tpm(Aml *scope, LoongArchMachineState *vms) ++static void acpi_dsdt_add_tpm(Aml *scope, LoongArchVirtMachineState *vms) + { + PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev); + hwaddr pbus_base = VIRT_PLATFORM_BUS_BASEADDRESS; +@@ -391,18 +392,18 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine) + { + Aml *dsdt, *scope, *pkg; +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); +- AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = lams->oem_id, +- .oem_table_id = lams->oem_table_id }; ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); ++ AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = lvms->oem_id, ++ .oem_table_id = lvms->oem_table_id }; + + acpi_table_begin(&table, table_data); + dsdt = init_aml_allocator(); + build_uart_device_aml(dsdt); +- build_pci_device_aml(dsdt, lams); ++ build_pci_device_aml(dsdt, lvms); + build_la_ged_aml(dsdt, machine); +- build_flash_aml(dsdt, lams); ++ build_flash_aml(dsdt, lvms); + #ifdef CONFIG_TPM +- acpi_dsdt_add_tpm(dsdt, lams); ++ acpi_dsdt_add_tpm(dsdt, lvms); + #endif + /* System State Package */ + scope = aml_scope("\\"); +@@ -421,7 +422,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine) + + static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); + GArray *table_offsets; + AcpiFadtData fadt_data; + unsigned facs, rsdt, dsdt; +@@ -455,14 +456,14 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + fadt_data.dsdt_tbl_offset = &dsdt; + fadt_data.xdsdt_tbl_offset = &dsdt; + build_fadt(tables_blob, tables->linker, &fadt_data, +- lams->oem_id, lams->oem_table_id); ++ lvms->oem_id, lvms->oem_table_id); + + acpi_add_table(table_offsets, tables_blob); +- build_madt(tables_blob, tables->linker, lams); ++ build_madt(tables_blob, tables->linker, lvms); + + acpi_add_table(table_offsets, tables_blob); + build_pptt(tables_blob, tables->linker, machine, +- lams->oem_id, lams->oem_table_id); ++ lvms->oem_id, lvms->oem_table_id); + + acpi_add_table(table_offsets, tables_blob); + build_srat(tables_blob, tables->linker, machine); +@@ -470,13 +471,13 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + if (machine->numa_state->num_nodes) { + if (machine->numa_state->have_numa_distance) { + acpi_add_table(table_offsets, tables_blob); +- build_slit(tables_blob, tables->linker, machine, lams->oem_id, +- lams->oem_table_id); ++ build_slit(tables_blob, tables->linker, machine, lvms->oem_id, ++ lvms->oem_table_id); + } + if (machine->numa_state->hmat_enabled) { + acpi_add_table(table_offsets, tables_blob); + build_hmat(tables_blob, tables->linker, machine->numa_state, +- lams->oem_id, lams->oem_table_id); ++ lvms->oem_id, lvms->oem_table_id); + } + } + +@@ -486,8 +487,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + .base = cpu_to_le64(VIRT_PCI_CFG_BASE), + .size = cpu_to_le64(VIRT_PCI_CFG_SIZE), + }; +- build_mcfg(tables_blob, tables->linker, &mcfg, lams->oem_id, +- lams->oem_table_id); ++ build_mcfg(tables_blob, tables->linker, &mcfg, lvms->oem_id, ++ lvms->oem_table_id); + } + + #ifdef CONFIG_TPM +@@ -495,8 +496,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) { + acpi_add_table(table_offsets, tables_blob); + build_tpm2(tables_blob, tables->linker, +- tables->tcpalog, lams->oem_id, +- lams->oem_table_id); ++ tables->tcpalog, lvms->oem_id, ++ lvms->oem_table_id); + } + #endif + /* Add tables supplied by user (if any) */ +@@ -510,13 +511,13 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) + /* RSDT is pointed to by RSDP */ + rsdt = tables_blob->len; + build_rsdt(tables_blob, tables->linker, table_offsets, +- lams->oem_id, lams->oem_table_id); ++ lvms->oem_id, lvms->oem_table_id); + + /* RSDP is in FSEG memory, so allocate it separately */ + { + AcpiRsdpData rsdp_data = { + .revision = 0, +- .oem_id = lams->oem_id, ++ .oem_id = lvms->oem_id, + .xsdt_tbl_offset = NULL, + .rsdt_tbl_offset = &rsdt, + }; +@@ -593,17 +594,25 @@ static const VMStateDescription vmstate_acpi_build = { + }, + }; + +-void loongarch_acpi_setup(LoongArchMachineState *lams) ++static bool loongarch_is_acpi_enabled(LoongArchVirtMachineState *lvms) ++{ ++ if (lvms->acpi == ON_OFF_AUTO_OFF) { ++ return false; ++ } ++ return true; ++} ++ ++void loongarch_acpi_setup(LoongArchVirtMachineState *lvms) + { + AcpiBuildTables tables; + AcpiBuildState *build_state; + +- if (!lams->fw_cfg) { ++ if (!lvms->fw_cfg) { + ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); + return; + } + +- if (!loongarch_is_acpi_enabled(lams)) { ++ if (!loongarch_is_acpi_enabled(lvms)) { + ACPI_BUILD_DPRINTF("ACPI disabled. Bailing out.\n"); + return; + } +@@ -611,7 +620,7 @@ void loongarch_acpi_setup(LoongArchMachineState *lams) + build_state = g_malloc0(sizeof *build_state); + + acpi_build_tables_init(&tables); +- acpi_build(&tables, MACHINE(lams)); ++ acpi_build(&tables, MACHINE(lvms)); + + /* Now expose it all to Guest */ + build_state->table_mr = acpi_add_rom_blob(acpi_build_update, +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index e37512729d..b8e1aa18d5 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -259,10 +259,10 @@ static void fw_cfg_add_kernel_info(struct loongarch_boot_info *info, + } + } + +-static void loongarch_firmware_boot(LoongArchMachineState *lams, ++static void loongarch_firmware_boot(LoongArchVirtMachineState *lvms, + struct loongarch_boot_info *info) + { +- fw_cfg_add_kernel_info(info, lams->fw_cfg); ++ fw_cfg_add_kernel_info(info, lvms->fw_cfg); + } + + static void init_boot_rom(struct loongarch_boot_info *info, void *p) +@@ -319,7 +319,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + + void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(ms); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms); + int i; + + /* register reset function */ +@@ -331,8 +331,8 @@ void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) + info->kernel_cmdline = ms->kernel_cmdline; + info->initrd_filename = ms->initrd_filename; + +- if (lams->bios_loaded) { +- loongarch_firmware_boot(lams, info); ++ if (lvms->bios_loaded) { ++ loongarch_firmware_boot(lvms, info); + } else { + loongarch_direct_kernel_boot(info); + } +diff --git a/hw/loongarch/fw_cfg.c b/hw/loongarch/fw_cfg.c +index f15a17416c..35aeb2decb 100644 +--- a/hw/loongarch/fw_cfg.c ++++ b/hw/loongarch/fw_cfg.c +@@ -17,7 +17,7 @@ static void fw_cfg_boot_set(void *opaque, const char *boot_device, + fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]); + } + +-FWCfgState *loongarch_fw_cfg_init(ram_addr_t ram_size, MachineState *ms) ++FWCfgState *virt_fw_cfg_init(ram_addr_t ram_size, MachineState *ms) + { + FWCfgState *fw_cfg; + int max_cpus = ms->smp.max_cpus; +diff --git a/hw/loongarch/fw_cfg.h b/hw/loongarch/fw_cfg.h +index 7c0de4db4a..27ee68286e 100644 +--- a/hw/loongarch/fw_cfg.h ++++ b/hw/loongarch/fw_cfg.h +@@ -11,5 +11,5 @@ + #include "hw/boards.h" + #include "hw/nvram/fw_cfg.h" + +-FWCfgState *loongarch_fw_cfg_init(ram_addr_t ram_size, MachineState *ms); ++FWCfgState *virt_fw_cfg_init(ram_addr_t ram_size, MachineState *ms); + #endif +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index cca220cb5b..e39989193e 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -48,9 +48,9 @@ + #include "hw/block/flash.h" + #include "qemu/error-report.h" + +-static bool virt_is_veiointc_enabled(LoongArchMachineState *lams) ++static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms) + { +- if (lams->veiointc == ON_OFF_AUTO_OFF) { ++ if (lvms->veiointc == ON_OFF_AUTO_OFF) { + return false; + } + return true; +@@ -59,8 +59,8 @@ static bool virt_is_veiointc_enabled(LoongArchMachineState *lams) + static void virt_get_veiointc(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); +- OnOffAuto veiointc = lams->veiointc; ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj); ++ OnOffAuto veiointc = lvms->veiointc; + + visit_type_OnOffAuto(v, name, &veiointc, errp); + } +@@ -68,12 +68,12 @@ static void virt_get_veiointc(Object *obj, Visitor *v, const char *name, + static void virt_set_veiointc(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj); + +- visit_type_OnOffAuto(v, name, &lams->veiointc, errp); ++ visit_type_OnOffAuto(v, name, &lvms->veiointc, errp); + } + +-static PFlashCFI01 *virt_flash_create1(LoongArchMachineState *lams, ++static PFlashCFI01 *virt_flash_create1(LoongArchVirtMachineState *lvms, + const char *name, + const char *alias_prop_name) + { +@@ -88,16 +88,16 @@ static PFlashCFI01 *virt_flash_create1(LoongArchMachineState *lams, + qdev_prop_set_uint16(dev, "id2", 0x00); + qdev_prop_set_uint16(dev, "id3", 0x00); + qdev_prop_set_string(dev, "name", name); +- object_property_add_child(OBJECT(lams), name, OBJECT(dev)); +- object_property_add_alias(OBJECT(lams), alias_prop_name, ++ object_property_add_child(OBJECT(lvms), name, OBJECT(dev)); ++ object_property_add_alias(OBJECT(lvms), alias_prop_name, + OBJECT(dev), "drive"); + return PFLASH_CFI01(dev); + } + +-static void virt_flash_create(LoongArchMachineState *lams) ++static void virt_flash_create(LoongArchVirtMachineState *lvms) + { +- lams->flash[0] = virt_flash_create1(lams, "virt.flash0", "pflash0"); +- lams->flash[1] = virt_flash_create1(lams, "virt.flash1", "pflash1"); ++ lvms->flash[0] = virt_flash_create1(lvms, "virt.flash0", "pflash0"); ++ lvms->flash[1] = virt_flash_create1(lvms, "virt.flash1", "pflash1"); + } + + static void virt_flash_map1(PFlashCFI01 *flash, +@@ -123,20 +123,20 @@ static void virt_flash_map1(PFlashCFI01 *flash, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0)); + } + +-static void virt_flash_map(LoongArchMachineState *lams, ++static void virt_flash_map(LoongArchVirtMachineState *lvms, + MemoryRegion *sysmem) + { +- PFlashCFI01 *flash0 = lams->flash[0]; +- PFlashCFI01 *flash1 = lams->flash[1]; ++ PFlashCFI01 *flash0 = lvms->flash[0]; ++ PFlashCFI01 *flash1 = lvms->flash[1]; + + virt_flash_map1(flash0, VIRT_FLASH0_BASE, VIRT_FLASH0_SIZE, sysmem); + virt_flash_map1(flash1, VIRT_FLASH1_BASE, VIRT_FLASH1_SIZE, sysmem); + } + +-static void fdt_add_cpuic_node(LoongArchMachineState *lams, ++static void fdt_add_cpuic_node(LoongArchVirtMachineState *lvms, + uint32_t *cpuintc_phandle) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + char *nodename; + + *cpuintc_phandle = qemu_fdt_alloc_phandle(ms->fdt); +@@ -150,11 +150,11 @@ static void fdt_add_cpuic_node(LoongArchMachineState *lams, + g_free(nodename); + } + +-static void fdt_add_eiointc_node(LoongArchMachineState *lams, ++static void fdt_add_eiointc_node(LoongArchVirtMachineState *lvms, + uint32_t *cpuintc_phandle, + uint32_t *eiointc_phandle) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + char *nodename; + hwaddr extioi_base = APIC_BASE; + hwaddr extioi_size = EXTIOI_SIZE; +@@ -175,11 +175,11 @@ static void fdt_add_eiointc_node(LoongArchMachineState *lams, + g_free(nodename); + } + +-static void fdt_add_pch_pic_node(LoongArchMachineState *lams, ++static void fdt_add_pch_pic_node(LoongArchVirtMachineState *lvms, + uint32_t *eiointc_phandle, + uint32_t *pch_pic_phandle) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + char *nodename; + hwaddr pch_pic_base = VIRT_PCH_REG_BASE; + hwaddr pch_pic_size = VIRT_PCH_REG_SIZE; +@@ -200,11 +200,11 @@ static void fdt_add_pch_pic_node(LoongArchMachineState *lams, + g_free(nodename); + } + +-static void fdt_add_pch_msi_node(LoongArchMachineState *lams, ++static void fdt_add_pch_msi_node(LoongArchVirtMachineState *lvms, + uint32_t *eiointc_phandle, + uint32_t *pch_msi_phandle) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + char *nodename; + hwaddr pch_msi_base = VIRT_PCH_MSI_ADDR_LOW; + hwaddr pch_msi_size = VIRT_PCH_MSI_SIZE; +@@ -228,9 +228,9 @@ static void fdt_add_pch_msi_node(LoongArchMachineState *lams, + g_free(nodename); + } + +-static void fdt_add_flash_node(LoongArchMachineState *lams) ++static void fdt_add_flash_node(LoongArchVirtMachineState *lvms) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + char *nodename; + MemoryRegion *flash_mem; + +@@ -240,11 +240,11 @@ static void fdt_add_flash_node(LoongArchMachineState *lams) + hwaddr flash1_base; + hwaddr flash1_size; + +- flash_mem = pflash_cfi01_get_memory(lams->flash[0]); ++ flash_mem = pflash_cfi01_get_memory(lvms->flash[0]); + flash0_base = flash_mem->addr; + flash0_size = memory_region_size(flash_mem); + +- flash_mem = pflash_cfi01_get_memory(lams->flash[1]); ++ flash_mem = pflash_cfi01_get_memory(lvms->flash[1]); + flash1_base = flash_mem->addr; + flash1_size = memory_region_size(flash_mem); + +@@ -258,13 +258,13 @@ static void fdt_add_flash_node(LoongArchMachineState *lams) + g_free(nodename); + } + +-static void fdt_add_rtc_node(LoongArchMachineState *lams, ++static void fdt_add_rtc_node(LoongArchVirtMachineState *lvms, + uint32_t *pch_pic_phandle) + { + char *nodename; + hwaddr base = VIRT_RTC_REG_BASE; + hwaddr size = VIRT_RTC_LEN; +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + + nodename = g_strdup_printf("/rtc@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -278,13 +278,13 @@ static void fdt_add_rtc_node(LoongArchMachineState *lams, + g_free(nodename); + } + +-static void fdt_add_uart_node(LoongArchMachineState *lams, ++static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, + uint32_t *pch_pic_phandle) + { + char *nodename; + hwaddr base = VIRT_UART_BASE; + hwaddr size = VIRT_UART_SIZE; +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + + nodename = g_strdup_printf("/serial@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -299,11 +299,11 @@ static void fdt_add_uart_node(LoongArchMachineState *lams, + g_free(nodename); + } + +-static void create_fdt(LoongArchMachineState *lams) ++static void create_fdt(LoongArchVirtMachineState *lvms) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + +- ms->fdt = create_device_tree(&lams->fdt_size); ++ ms->fdt = create_device_tree(&lvms->fdt_size); + if (!ms->fdt) { + error_report("create_device_tree() failed"); + exit(1); +@@ -317,10 +317,10 @@ static void create_fdt(LoongArchMachineState *lams) + qemu_fdt_add_subnode(ms->fdt, "/chosen"); + } + +-static void fdt_add_cpu_nodes(const LoongArchMachineState *lams) ++static void fdt_add_cpu_nodes(const LoongArchVirtMachineState *lvms) + { + int num; +- const MachineState *ms = MACHINE(lams); ++ const MachineState *ms = MACHINE(lvms); + int smp_cpus = ms->smp.cpus; + + qemu_fdt_add_subnode(ms->fdt, "/cpus"); +@@ -374,11 +374,11 @@ static void fdt_add_cpu_nodes(const LoongArchMachineState *lams) + } + } + +-static void fdt_add_fw_cfg_node(const LoongArchMachineState *lams) ++static void fdt_add_fw_cfg_node(const LoongArchVirtMachineState *lvms) + { + char *nodename; + hwaddr base = VIRT_FWCFG_BASE; +- const MachineState *ms = MACHINE(lams); ++ const MachineState *ms = MACHINE(lvms); + + nodename = g_strdup_printf("/fw_cfg@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -390,7 +390,7 @@ static void fdt_add_fw_cfg_node(const LoongArchMachineState *lams) + g_free(nodename); + } + +-static void fdt_add_pcie_irq_map_node(const LoongArchMachineState *lams, ++static void fdt_add_pcie_irq_map_node(const LoongArchVirtMachineState *lvms, + char *nodename, + uint32_t *pch_pic_phandle) + { +@@ -398,7 +398,7 @@ static void fdt_add_pcie_irq_map_node(const LoongArchMachineState *lams, + uint32_t irq_map_stride = 0; + uint32_t full_irq_map[GPEX_NUM_IRQS *GPEX_NUM_IRQS * 10] = {}; + uint32_t *irq_map = full_irq_map; +- const MachineState *ms = MACHINE(lams); ++ const MachineState *ms = MACHINE(lvms); + + /* This code creates a standard swizzle of interrupts such that + * each device's first interrupt is based on it's PCI_SLOT number. +@@ -443,7 +443,7 @@ static void fdt_add_pcie_irq_map_node(const LoongArchMachineState *lams, + 0x1800, 0, 0, 0x7); + } + +-static void fdt_add_pcie_node(const LoongArchMachineState *lams, ++static void fdt_add_pcie_node(const LoongArchVirtMachineState *lvms, + uint32_t *pch_pic_phandle, + uint32_t *pch_msi_phandle) + { +@@ -456,7 +456,7 @@ static void fdt_add_pcie_node(const LoongArchMachineState *lams, + hwaddr size_pcie = VIRT_PCI_CFG_SIZE; + hwaddr base = base_pcie; + +- const MachineState *ms = MACHINE(lams); ++ const MachineState *ms = MACHINE(lvms); + + nodename = g_strdup_printf("/pcie@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -479,7 +479,7 @@ static void fdt_add_pcie_node(const LoongArchMachineState *lams, + qemu_fdt_setprop_cells(ms->fdt, nodename, "msi-map", + 0, *pch_msi_phandle, 0, 0x10000); + +- fdt_add_pcie_irq_map_node(lams, nodename, pch_pic_phandle); ++ fdt_add_pcie_irq_map_node(lvms, nodename, pch_pic_phandle); + + g_free(nodename); + } +@@ -501,15 +501,15 @@ static void fdt_add_memory_node(MachineState *ms, + g_free(nodename); + } + +-static void virt_build_smbios(LoongArchMachineState *lams) ++static void virt_build_smbios(LoongArchVirtMachineState *lvms) + { +- MachineState *ms = MACHINE(lams); +- MachineClass *mc = MACHINE_GET_CLASS(lams); ++ MachineState *ms = MACHINE(lvms); ++ MachineClass *mc = MACHINE_GET_CLASS(lvms); + uint8_t *smbios_tables, *smbios_anchor; + size_t smbios_tables_len, smbios_anchor_len; + const char *product = "QEMU Virtual Machine"; + +- if (!lams->fw_cfg) { ++ if (!lvms->fw_cfg) { + return; + } + +@@ -520,26 +520,26 @@ static void virt_build_smbios(LoongArchMachineState *lams) + &smbios_anchor, &smbios_anchor_len, &error_fatal); + + if (smbios_anchor) { +- fw_cfg_add_file(lams->fw_cfg, "etc/smbios/smbios-tables", ++ fw_cfg_add_file(lvms->fw_cfg, "etc/smbios/smbios-tables", + smbios_tables, smbios_tables_len); +- fw_cfg_add_file(lams->fw_cfg, "etc/smbios/smbios-anchor", ++ fw_cfg_add_file(lvms->fw_cfg, "etc/smbios/smbios-anchor", + smbios_anchor, smbios_anchor_len); + } + } + +-static void virt_machine_done(Notifier *notifier, void *data) ++static void virt_done(Notifier *notifier, void *data) + { +- LoongArchMachineState *lams = container_of(notifier, +- LoongArchMachineState, machine_done); +- virt_build_smbios(lams); +- loongarch_acpi_setup(lams); ++ LoongArchVirtMachineState *lvms = container_of(notifier, ++ LoongArchVirtMachineState, machine_done); ++ virt_build_smbios(lvms); ++ loongarch_acpi_setup(lvms); + } + + static void virt_powerdown_req(Notifier *notifier, void *opaque) + { +- LoongArchMachineState *s = container_of(notifier, +- LoongArchMachineState, powerdown_notifier); ++ LoongArchVirtMachineState *s; + ++ s = container_of(notifier, LoongArchVirtMachineState, powerdown_notifier); + acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS); + } + +@@ -559,10 +559,11 @@ static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) + memmap_entries++; + } + +-static DeviceState *create_acpi_ged(DeviceState *pch_pic, LoongArchMachineState *lams) ++static DeviceState *create_acpi_ged(DeviceState *pch_pic, ++ LoongArchVirtMachineState *lvms) + { + DeviceState *dev; +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + uint32_t event = ACPI_GED_PWR_DOWN_EVT; + + if (ms->ram_slots) { +@@ -609,12 +610,12 @@ static DeviceState *create_platform_bus(DeviceState *pch_pic) + return dev; + } + +-static void loongarch_devices_init(DeviceState *pch_pic, +- LoongArchMachineState *lams, ++static void virt_devices_init(DeviceState *pch_pic, ++ LoongArchVirtMachineState *lvms, + uint32_t *pch_pic_phandle, + uint32_t *pch_msi_phandle) + { +- MachineClass *mc = MACHINE_GET_CLASS(lams); ++ MachineClass *mc = MACHINE_GET_CLASS(lvms); + DeviceState *gpex_dev; + SysBusDevice *d; + PCIBus *pci_bus; +@@ -626,7 +627,7 @@ static void loongarch_devices_init(DeviceState *pch_pic, + d = SYS_BUS_DEVICE(gpex_dev); + sysbus_realize_and_unref(d, &error_fatal); + pci_bus = PCI_HOST_BRIDGE(gpex_dev)->bus; +- lams->pci_bus = pci_bus; ++ lvms->pci_bus = pci_bus; + + /* Map only part size_ecam bytes of ECAM space */ + ecam_alias = g_new0(MemoryRegion, 1); +@@ -659,13 +660,13 @@ static void loongarch_devices_init(DeviceState *pch_pic, + } + + /* Add pcie node */ +- fdt_add_pcie_node(lams, pch_pic_phandle, pch_msi_phandle); ++ fdt_add_pcie_node(lvms, pch_pic_phandle, pch_msi_phandle); + + serial_mm_init(get_system_memory(), VIRT_UART_BASE, 0, + qdev_get_gpio_in(pch_pic, + VIRT_UART_IRQ - VIRT_GSI_BASE), + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); +- fdt_add_uart_node(lams, pch_pic_phandle); ++ fdt_add_uart_node(lvms, pch_pic_phandle); + + /* Network init */ + for (i = 0; i < nb_nics; i++) { +@@ -680,17 +681,17 @@ static void loongarch_devices_init(DeviceState *pch_pic, + sysbus_create_simple("ls7a_rtc", VIRT_RTC_REG_BASE, + qdev_get_gpio_in(pch_pic, + VIRT_RTC_IRQ - VIRT_GSI_BASE)); +- fdt_add_rtc_node(lams, pch_pic_phandle); ++ fdt_add_rtc_node(lvms, pch_pic_phandle); + + /* acpi ged */ +- lams->acpi_ged = create_acpi_ged(pch_pic, lams); ++ lvms->acpi_ged = create_acpi_ged(pch_pic, lvms); + /* platform bus */ +- lams->platform_bus_dev = create_platform_bus(pch_pic); ++ lvms->platform_bus_dev = create_platform_bus(pch_pic); + } + +-static void loongarch_irq_init(LoongArchMachineState *lams) ++static void virt_irq_init(LoongArchVirtMachineState *lvms) + { +- MachineState *ms = MACHINE(lams); ++ MachineState *ms = MACHINE(lvms); + DeviceState *pch_pic, *pch_msi, *cpudev; + DeviceState *ipi, *extioi; + SysBusDevice *d; +@@ -728,20 +729,20 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); + + /* IPI iocsr memory region */ +- memory_region_add_subregion(&lams->system_iocsr, SMP_IPI_MAILBOX, ++ memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX, + sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0)); +- memory_region_add_subregion(&lams->system_iocsr, MAIL_SEND_ADDR, ++ memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR, + sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); + + /* Add cpu interrupt-controller */ +- fdt_add_cpuic_node(lams, &cpuintc_phandle); ++ fdt_add_cpuic_node(lvms, &cpuintc_phandle); + + for (cpu = 0; cpu < ms->smp.cpus; cpu++) { + cpu_state = qemu_get_cpu(cpu); + cpudev = DEVICE(cpu_state); + lacpu = LOONGARCH_CPU(cpu_state); + env = &(lacpu->env); +- env->address_space_iocsr = &lams->as_iocsr; ++ env->address_space_iocsr = &lvms->as_iocsr; + + /* connect ipi irq to cpu irq */ + qdev_connect_gpio_out(ipi, cpu, qdev_get_gpio_in(cpudev, IRQ_IPI)); +@@ -751,18 +752,18 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + /* Create EXTIOI device */ + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); + qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus); +- if (virt_is_veiointc_enabled(lams)) { ++ if (virt_is_veiointc_enabled(lvms)) { + qdev_prop_set_bit(extioi, "has-virtualization-extension", true); + } + sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); + +- memory_region_add_subregion(&lams->system_iocsr, APIC_BASE, ++ memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); +- if (virt_is_veiointc_enabled(lams)) { +- memory_region_add_subregion(&lams->system_iocsr, EXTIOI_VIRT_BASE, ++ if (virt_is_veiointc_enabled(lvms)) { ++ memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1)); + } +- lams->extioi = extioi; ++ lvms->extioi = extioi; + + /* + * connect ext irq to the cpu irq +@@ -777,7 +778,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + } + + /* Add Extend I/O Interrupt Controller node */ +- fdt_add_eiointc_node(lams, &cpuintc_phandle, &eiointc_phandle); ++ fdt_add_eiointc_node(lvms, &cpuintc_phandle, &eiointc_phandle); + + pch_pic = qdev_new(TYPE_LOONGARCH_PCH_PIC); + num = VIRT_PCH_PIC_IRQ_NUM; +@@ -799,7 +800,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + } + + /* Add PCH PIC node */ +- fdt_add_pch_pic_node(lams, &eiointc_phandle, &pch_pic_phandle); ++ fdt_add_pch_pic_node(lvms, &eiointc_phandle, &pch_pic_phandle); + + pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); + start = num; +@@ -816,30 +817,30 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + } + + /* Add PCH MSI node */ +- fdt_add_pch_msi_node(lams, &eiointc_phandle, &pch_msi_phandle); ++ fdt_add_pch_msi_node(lvms, &eiointc_phandle, &pch_msi_phandle); + +- loongarch_devices_init(pch_pic, lams, &pch_pic_phandle, &pch_msi_phandle); ++ virt_devices_init(pch_pic, lvms, &pch_pic_phandle, &pch_msi_phandle); + } + +-static void loongarch_firmware_init(LoongArchMachineState *lams) ++static void virt_firmware_init(LoongArchVirtMachineState *lvms) + { +- char *filename = MACHINE(lams)->firmware; ++ char *filename = MACHINE(lvms)->firmware; + char *bios_name = NULL; + int bios_size, i; + BlockBackend *pflash_blk0; + MemoryRegion *mr; + +- lams->bios_loaded = false; ++ lvms->bios_loaded = false; + + /* Map legacy -drive if=pflash to machine properties */ +- for (i = 0; i < ARRAY_SIZE(lams->flash); i++) { +- pflash_cfi01_legacy_drive(lams->flash[i], ++ for (i = 0; i < ARRAY_SIZE(lvms->flash); i++) { ++ pflash_cfi01_legacy_drive(lvms->flash[i], + drive_get(IF_PFLASH, 0, i)); + } + +- virt_flash_map(lams, get_system_memory()); ++ virt_flash_map(lvms, get_system_memory()); + +- pflash_blk0 = pflash_cfi01_get_blk(lams->flash[0]); ++ pflash_blk0 = pflash_cfi01_get_blk(lvms->flash[0]); + + if (pflash_blk0) { + if (filename) { +@@ -847,7 +848,7 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + "options at once"); + exit(1); + } +- lams->bios_loaded = true; ++ lvms->bios_loaded = true; + return; + } + +@@ -858,14 +859,14 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + exit(1); + } + +- mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(lams->flash[0]), 0); ++ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(lvms->flash[0]), 0); + bios_size = load_image_mr(bios_name, mr); + if (bios_size < 0) { + error_report("Could not load ROM image '%s'", bios_name); + exit(1); + } + g_free(bios_name); +- lams->bios_loaded = true; ++ lvms->bios_loaded = true; + } + } + +@@ -873,16 +874,16 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + static MemTxResult loongarch_qemu_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size, MemTxAttrs attrs) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(opaque); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque); + uint64_t features; + + switch (addr) { + case MISC_FUNC_REG: +- if (!virt_is_veiointc_enabled(lams)) { ++ if (!virt_is_veiointc_enabled(lvms)) { + return MEMTX_OK; + } + +- features = address_space_ldl(&lams->as_iocsr, ++ features = address_space_ldl(&lvms->as_iocsr, + EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, + attrs, NULL); + if (val & BIT_ULL(IOCSRM_EXTIOI_EN)) { +@@ -892,7 +893,7 @@ static MemTxResult loongarch_qemu_write(void *opaque, hwaddr addr, uint64_t val, + features |= BIT(EXTIOI_ENABLE_INT_ENCODE); + } + +- address_space_stl(&lams->as_iocsr, ++ address_space_stl(&lvms->as_iocsr, + EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, + features, attrs, NULL); + } +@@ -904,7 +905,7 @@ static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, + uint64_t *data, + unsigned size, MemTxAttrs attrs) + { +- LoongArchMachineState *lams = LOONGARCH_MACHINE(opaque); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque); + uint64_t ret = 0; + int features; + +@@ -926,12 +927,12 @@ static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, + ret = 0x303030354133ULL; /* "3A5000" */ + break; + case MISC_FUNC_REG: +- if (!virt_is_veiointc_enabled(lams)) { ++ if (!virt_is_veiointc_enabled(lvms)) { + ret |= BIT_ULL(IOCSRM_EXTIOI_EN); + break; + } + +- features = address_space_ldl(&lams->as_iocsr, ++ features = address_space_ldl(&lvms->as_iocsr, + EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, + attrs, NULL); + if (features & BIT(EXTIOI_ENABLE)) { +@@ -948,7 +949,7 @@ static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, + return MEMTX_OK; + } + +-static const MemoryRegionOps loongarch_qemu_ops = { ++static const MemoryRegionOps virt_iocsr_misc_ops = { + .read_with_attrs = loongarch_qemu_read, + .write_with_attrs = loongarch_qemu_write, + .endianness = DEVICE_LITTLE_ENDIAN, +@@ -962,7 +963,7 @@ static const MemoryRegionOps loongarch_qemu_ops = { + }, + }; + +-static void loongarch_init(MachineState *machine) ++static void virt_init(MachineState *machine) + { + LoongArchCPU *lacpu; + const char *cpu_model = machine->cpu_type; +@@ -970,7 +971,7 @@ static void loongarch_init(MachineState *machine) + ram_addr_t ram_size = machine->ram_size; + uint64_t highram_size = 0, phyAddr = 0; + MemoryRegion *address_space_mem = get_system_memory(); +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(machine); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); + int nb_numa_nodes = machine->numa_state->num_nodes; + NodeInfo *numa_info = machine->numa_state->nodes; + int i; +@@ -986,16 +987,16 @@ static void loongarch_init(MachineState *machine) + error_report("ram_size must be greater than 1G."); + exit(1); + } +- create_fdt(lams); ++ create_fdt(lvms); + + /* Create IOCSR space */ +- memory_region_init_io(&lams->system_iocsr, OBJECT(machine), NULL, ++ memory_region_init_io(&lvms->system_iocsr, OBJECT(machine), NULL, + machine, "iocsr", UINT64_MAX); +- address_space_init(&lams->as_iocsr, &lams->system_iocsr, "IOCSR"); +- memory_region_init_io(&lams->iocsr_mem, OBJECT(machine), +- &loongarch_qemu_ops, ++ address_space_init(&lvms->as_iocsr, &lvms->system_iocsr, "IOCSR"); ++ memory_region_init_io(&lvms->iocsr_mem, OBJECT(machine), ++ &virt_iocsr_misc_ops, + machine, "iocsr_misc", 0x428); +- memory_region_add_subregion(&lams->system_iocsr, 0, &lams->iocsr_mem); ++ memory_region_add_subregion(&lvms->system_iocsr, 0, &lvms->iocsr_mem); + + /* Init CPUs */ + possible_cpus = mc->possible_cpu_arch_ids(machine); +@@ -1006,14 +1007,14 @@ static void loongarch_init(MachineState *machine) + lacpu = LOONGARCH_CPU(cpu); + lacpu->phy_id = machine->possible_cpus->cpus[i].arch_id; + } +- fdt_add_cpu_nodes(lams); ++ fdt_add_cpu_nodes(lvms); + + /* Node0 memory */ + memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1); + fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0); +- memory_region_init_alias(&lams->lowmem, NULL, "loongarch.node0.lowram", ++ memory_region_init_alias(&lvms->lowmem, NULL, "loongarch.node0.lowram", + machine->ram, offset, VIRT_LOWMEM_SIZE); +- memory_region_add_subregion(address_space_mem, phyAddr, &lams->lowmem); ++ memory_region_add_subregion(address_space_mem, phyAddr, &lvms->lowmem); + + offset += VIRT_LOWMEM_SIZE; + if (nb_numa_nodes > 0) { +@@ -1025,9 +1026,9 @@ static void loongarch_init(MachineState *machine) + phyAddr = VIRT_HIGHMEM_BASE; + memmap_add_entry(phyAddr, highram_size, 1); + fdt_add_memory_node(machine, phyAddr, highram_size, 0); +- memory_region_init_alias(&lams->highmem, NULL, "loongarch.node0.highram", ++ memory_region_init_alias(&lvms->highmem, NULL, "loongarch.node0.highram", + machine->ram, offset, highram_size); +- memory_region_add_subregion(address_space_mem, phyAddr, &lams->highmem); ++ memory_region_add_subregion(address_space_mem, phyAddr, &lvms->highmem); + + /* Node1 - Nodemax memory */ + offset += highram_size; +@@ -1068,30 +1069,30 @@ static void loongarch_init(MachineState *machine) + } + + /* load the BIOS image. */ +- loongarch_firmware_init(lams); ++ virt_firmware_init(lvms); + + /* fw_cfg init */ +- lams->fw_cfg = loongarch_fw_cfg_init(ram_size, machine); +- rom_set_fw(lams->fw_cfg); +- if (lams->fw_cfg != NULL) { +- fw_cfg_add_file(lams->fw_cfg, "etc/memmap", ++ lvms->fw_cfg = virt_fw_cfg_init(ram_size, machine); ++ rom_set_fw(lvms->fw_cfg); ++ if (lvms->fw_cfg != NULL) { ++ fw_cfg_add_file(lvms->fw_cfg, "etc/memmap", + memmap_table, + sizeof(struct memmap_entry) * (memmap_entries)); + } +- fdt_add_fw_cfg_node(lams); +- fdt_add_flash_node(lams); ++ fdt_add_fw_cfg_node(lvms); ++ fdt_add_flash_node(lvms); + + /* Initialize the IO interrupt subsystem */ +- loongarch_irq_init(lams); ++ virt_irq_init(lvms); + platform_bus_add_all_fdt_nodes(machine->fdt, "/platic", + VIRT_PLATFORM_BUS_BASEADDRESS, + VIRT_PLATFORM_BUS_SIZE, + VIRT_PLATFORM_BUS_IRQ); +- lams->machine_done.notify = virt_machine_done; +- qemu_add_machine_init_done_notifier(&lams->machine_done); ++ lvms->machine_done.notify = virt_done; ++ qemu_add_machine_init_done_notifier(&lvms->machine_done); + /* connect powerdown request */ +- lams->powerdown_notifier.notify = virt_powerdown_req; +- qemu_register_powerdown_notifier(&lams->powerdown_notifier); ++ lvms->powerdown_notifier.notify = virt_powerdown_req; ++ qemu_register_powerdown_notifier(&lvms->powerdown_notifier); + + /* + * Since lowmem region starts from 0 and Linux kernel legacy start address +@@ -1100,52 +1101,44 @@ static void loongarch_init(MachineState *machine) + * Put the FDT into the memory map as a ROM image: this will ensure + * the FDT is copied again upon reset, even if addr points into RAM. + */ +- qemu_fdt_dumpdtb(machine->fdt, lams->fdt_size); +- rom_add_blob_fixed_as("fdt", machine->fdt, lams->fdt_size, FDT_BASE, ++ qemu_fdt_dumpdtb(machine->fdt, lvms->fdt_size); ++ rom_add_blob_fixed_as("fdt", machine->fdt, lvms->fdt_size, FDT_BASE, + &address_space_memory); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, +- rom_ptr_for_as(&address_space_memory, FDT_BASE, lams->fdt_size)); ++ rom_ptr_for_as(&address_space_memory, FDT_BASE, lvms->fdt_size)); + +- lams->bootinfo.ram_size = ram_size; +- loongarch_load_kernel(machine, &lams->bootinfo); ++ lvms->bootinfo.ram_size = ram_size; ++ loongarch_load_kernel(machine, &lvms->bootinfo); + } + +-bool loongarch_is_acpi_enabled(LoongArchMachineState *lams) ++static void virt_get_acpi(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) + { +- if (lams->acpi == ON_OFF_AUTO_OFF) { +- return false; +- } +- return true; +-} +- +-static void loongarch_get_acpi(Object *obj, Visitor *v, const char *name, +- void *opaque, Error **errp) +-{ +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(obj); +- OnOffAuto acpi = lams->acpi; ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj); ++ OnOffAuto acpi = lvms->acpi; + + visit_type_OnOffAuto(v, name, &acpi, errp); + } + +-static void loongarch_set_acpi(Object *obj, Visitor *v, const char *name, ++static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(obj); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj); + +- visit_type_OnOffAuto(v, name, &lams->acpi, errp); ++ visit_type_OnOffAuto(v, name, &lvms->acpi, errp); + } + +-static void loongarch_machine_initfn(Object *obj) ++static void virt_initfn(Object *obj) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(obj); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj); + + if (tcg_enabled()) { +- lams->veiointc = ON_OFF_AUTO_OFF; ++ lvms->veiointc = ON_OFF_AUTO_OFF; + } +- lams->acpi = ON_OFF_AUTO_AUTO; +- lams->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); +- lams->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +- virt_flash_create(lams); ++ lvms->acpi = ON_OFF_AUTO_AUTO; ++ lvms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ lvms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ virt_flash_create(lvms); + } + + static bool memhp_type_supported(DeviceState *dev) +@@ -1161,7 +1154,7 @@ static void virt_mem_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), NULL, errp); + } + +-static void virt_machine_device_pre_plug(HotplugHandler *hotplug_dev, ++static void virt_device_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { + if (memhp_type_supported(dev)) { +@@ -1172,14 +1165,14 @@ static void virt_machine_device_pre_plug(HotplugHandler *hotplug_dev, + static void virt_mem_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev); + + /* the acpi ged is always exist */ +- hotplug_handler_unplug_request(HOTPLUG_HANDLER(lams->acpi_ged), dev, ++ hotplug_handler_unplug_request(HOTPLUG_HANDLER(lvms->acpi_ged), dev, + errp); + } + +-static void virt_machine_device_unplug_request(HotplugHandler *hotplug_dev, ++static void virt_device_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { + if (memhp_type_supported(dev)) { +@@ -1190,14 +1183,14 @@ static void virt_machine_device_unplug_request(HotplugHandler *hotplug_dev, + static void virt_mem_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev); + +- hotplug_handler_unplug(HOTPLUG_HANDLER(lams->acpi_ged), dev, errp); +- pc_dimm_unplug(PC_DIMM(dev), MACHINE(lams)); ++ hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, errp); ++ pc_dimm_unplug(PC_DIMM(dev), MACHINE(lvms)); + qdev_unrealize(dev); + } + +-static void virt_machine_device_unplug(HotplugHandler *hotplug_dev, ++static void virt_device_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { + if (memhp_type_supported(dev)) { +@@ -1208,31 +1201,32 @@ static void virt_machine_device_unplug(HotplugHandler *hotplug_dev, + static void virt_mem_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev); + +- pc_dimm_plug(PC_DIMM(dev), MACHINE(lams)); +- hotplug_handler_plug(HOTPLUG_HANDLER(lams->acpi_ged), ++ pc_dimm_plug(PC_DIMM(dev), MACHINE(lvms)); ++ hotplug_handler_plug(HOTPLUG_HANDLER(lvms->acpi_ged), + dev, &error_abort); + } + +-static void loongarch_machine_device_plug_cb(HotplugHandler *hotplug_dev, ++static void virt_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- LoongArchMachineState *lams = LOONGARCH_VIRT_MACHINE(hotplug_dev); +- MachineClass *mc = MACHINE_GET_CLASS(lams); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev); ++ MachineClass *mc = MACHINE_GET_CLASS(lvms); ++ PlatformBusDevice *pbus; + + if (device_is_dynamic_sysbus(mc, dev)) { +- if (lams->platform_bus_dev) { +- platform_bus_link_device(PLATFORM_BUS_DEVICE(lams->platform_bus_dev), +- SYS_BUS_DEVICE(dev)); ++ if (lvms->platform_bus_dev) { ++ pbus = PLATFORM_BUS_DEVICE(lvms->platform_bus_dev); ++ platform_bus_link_device(pbus, SYS_BUS_DEVICE(dev)); + } + } else if (memhp_type_supported(dev)) { + virt_mem_plug(hotplug_dev, dev, errp); + } + } + +-static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, +- DeviceState *dev) ++static HotplugHandler *virt_get_hotplug_handler(MachineState *machine, ++ DeviceState *dev) + { + MachineClass *mc = MACHINE_GET_CLASS(machine); + +@@ -1272,8 +1266,8 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + +-static CpuInstanceProperties +-virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) ++static CpuInstanceProperties virt_cpu_index_to_props(MachineState *ms, ++ unsigned cpu_index) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); +@@ -1295,12 +1289,12 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + return nidx; + } + +-static void loongarch_class_init(ObjectClass *oc, void *data) ++static void virt_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + +- mc->init = loongarch_init; ++ mc->init = virt_init; + mc->default_ram_size = 1 * GiB; + mc->default_cpu_type = LOONGARCH_CPU_TYPE_NAME("la464"); + mc->default_ram_id = "loongarch.ram"; +@@ -1316,15 +1310,15 @@ static void loongarch_class_init(ObjectClass *oc, void *data) + mc->numa_mem_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; +- mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ mc->get_hotplug_handler = virt_get_hotplug_handler; + mc->default_nic = "virtio-net-pci"; +- hc->plug = loongarch_machine_device_plug_cb; +- hc->pre_plug = virt_machine_device_pre_plug; +- hc->unplug_request = virt_machine_device_unplug_request; +- hc->unplug = virt_machine_device_unplug; ++ hc->plug = virt_device_plug_cb; ++ hc->pre_plug = virt_device_pre_plug; ++ hc->unplug_request = virt_device_unplug_request; ++ hc->unplug = virt_device_unplug; + + object_class_property_add(oc, "acpi", "OnOffAuto", +- loongarch_get_acpi, loongarch_set_acpi, ++ virt_get_acpi, virt_set_acpi, + NULL, NULL); + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); +@@ -1338,13 +1332,13 @@ static void loongarch_class_init(ObjectClass *oc, void *data) + #endif + } + +-static const TypeInfo loongarch_machine_types[] = { ++static const TypeInfo virt_machine_types[] = { + { + .name = TYPE_LOONGARCH_VIRT_MACHINE, + .parent = TYPE_MACHINE, +- .instance_size = sizeof(LoongArchMachineState), +- .class_init = loongarch_class_init, +- .instance_init = loongarch_machine_initfn, ++ .instance_size = sizeof(LoongArchVirtMachineState), ++ .class_init = virt_class_init, ++ .instance_init = virt_initfn, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } +@@ -1352,4 +1346,4 @@ static const TypeInfo loongarch_machine_types[] = { + } + }; + +-DEFINE_TYPES(loongarch_machine_types) ++DEFINE_TYPES(virt_machine_types) +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 0509b9a9af..0a4d9a25f0 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -37,7 +37,7 @@ + + #define FDT_BASE 0x100000 + +-struct LoongArchMachineState { ++struct LoongArchVirtMachineState { + /*< private >*/ + MachineState parent_obj; + +@@ -67,7 +67,6 @@ struct LoongArchMachineState { + }; + + #define TYPE_LOONGARCH_VIRT_MACHINE MACHINE_TYPE_NAME("virt") +-OBJECT_DECLARE_SIMPLE_TYPE(LoongArchMachineState, LOONGARCH_VIRT_MACHINE) +-bool loongarch_is_acpi_enabled(LoongArchMachineState *lams); +-void loongarch_acpi_setup(LoongArchMachineState *lams); ++OBJECT_DECLARE_SIMPLE_TYPE(LoongArchVirtMachineState, LOONGARCH_VIRT_MACHINE) ++void loongarch_acpi_setup(LoongArchVirtMachineState *lvms); + #endif +-- +2.39.1 + diff --git a/hw-loongarch-boot-Adjust-the-loading-position-of-the.patch b/hw-loongarch-boot-Adjust-the-loading-position-of-the.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a10afd591917cb63988dfc9fe9ef975dacc627d --- /dev/null +++ b/hw-loongarch-boot-Adjust-the-loading-position-of-the.patch @@ -0,0 +1,95 @@ +From 16670675cbf7fc4db147a698ba7787d2e2fa675b Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Wed, 26 Mar 2025 17:02:37 +0800 +Subject: [PATCH] hw/loongarch/boot: Adjust the loading position of the initrd + +When only the -kernel parameter is used to load the elf kernel, +the initrd is loaded in the ram. If the initrd size is too large, +the loading fails, resulting in a VM startup failure. +This patch first loads initrd near the kernel. +When the nearby memory space of the kernel is insufficient, +it tries to load it to the starting position of high memory. +If there is still not enough, qemu will report an error +and ask the user to increase the memory space for the +virtual machine to boot. + +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 53 +++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 44 insertions(+), 9 deletions(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 53dcefbb55..39c4a6d8c6 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -171,6 +171,48 @@ static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) + return addr & MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS); + } + ++static void find_initrd_loadoffset(struct loongarch_boot_info *info, ++ uint64_t kernel_high, ssize_t kernel_size) ++{ ++ hwaddr base, size, gap, low_end; ++ ram_addr_t initrd_end, initrd_start; ++ ++ base = VIRT_LOWMEM_BASE; ++ gap = VIRT_LOWMEM_SIZE; ++ initrd_start = ROUND_UP(kernel_high + 4 * kernel_size, 64 * KiB); ++ initrd_end = initrd_start + initrd_size; ++ ++ size = info->ram_size; ++ low_end = base + MIN(size, gap); ++ if (initrd_end <= low_end) { ++ initrd_offset = initrd_start; ++ return; ++ } ++ ++ if (size <= gap) { ++ error_report("The low memory too small for initial ram disk '%s'," ++ "You need to expand the memory space", ++ info->initrd_filename); ++ exit(1); ++ } ++ ++ /* ++ * Try to load initrd in the high memory ++ */ ++ size -= gap; ++ base = VIRT_HIGHMEM_BASE; ++ initrd_start = ROUND_UP(base, 64 * KiB); ++ if (initrd_size <= size) { ++ initrd_offset = initrd_start; ++ return; ++ } ++ ++ error_report("The high memory too small for initial ram disk '%s'," ++ "You need to expand the memory space", ++ info->initrd_filename); ++ exit(1); ++} ++ + static int64_t load_kernel_info(struct loongarch_boot_info *info) + { + uint64_t kernel_entry, kernel_low, kernel_high; +@@ -192,16 +234,9 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) + if (info->initrd_filename) { + initrd_size = get_image_size(info->initrd_filename); + if (initrd_size > 0) { +- initrd_offset = ROUND_UP(kernel_high + 4 * kernel_size, 64 * KiB); +- +- if (initrd_offset + initrd_size > info->ram_size) { +- error_report("memory too small for initial ram disk '%s'", +- info->initrd_filename); +- exit(1); +- } +- ++ find_initrd_loadoffset(info, kernel_high, kernel_size); + initrd_size = load_image_targphys(info->initrd_filename, initrd_offset, +- info->ram_size - initrd_offset); ++ initrd_size); + } + + if (initrd_size == (target_ulong)-1) { +-- +2.41.0.windows.1 + diff --git a/hw-loongarch-boot-Use-warn_report-when-no-kernel-fil.patch b/hw-loongarch-boot-Use-warn_report-when-no-kernel-fil.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ac61a24faf92d8331dba704172323b3239f362f --- /dev/null +++ b/hw-loongarch-boot-Use-warn_report-when-no-kernel-fil.patch @@ -0,0 +1,47 @@ +From b7217c8f9b3f1d611485bad1263e109484a743e3 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 30 Oct 2024 09:23:59 +0800 +Subject: [PATCH 77/78] hw/loongarch/boot: Use warn_report when no kernel + filename +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When we run “qemu-system-loongarch64 -qmp stdio -vnc none -S”, +we get an error message “Need kernel filename” and then we can't use qmp cmd to query some information. +So, we just throw a warning and then the cpus starts running from address VIRT_FLASH0_BASE. + +Signed-off-by: Song Gao +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20241030012359.4040817-1-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index cb668703bd..f258eefe9a 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -278,7 +278,7 @@ static void init_boot_rom(struct loongarch_boot_info *info, void *p) + static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + { + void *p, *bp; +- int64_t kernel_addr = 0; ++ int64_t kernel_addr = VIRT_FLASH0_BASE; + LoongArchCPU *lacpu; + CPUState *cs; + +@@ -286,8 +286,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) + kernel_addr = load_kernel_info(info); + } else { + if(!qtest_enabled()) { +- error_report("Need kernel filename\n"); +- exit(1); ++ warn_report("No kernel provided, booting from flash drive."); + } + } + +-- +2.39.1 + diff --git a/hw-loongarch-boot.c-fix-out-of-bound-reading.patch b/hw-loongarch-boot.c-fix-out-of-bound-reading.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e1b0e90b50a7347cf64f5d2764137b6f742d507 --- /dev/null +++ b/hw-loongarch-boot.c-fix-out-of-bound-reading.patch @@ -0,0 +1,35 @@ +From f9cc704bbcf8bb8a06095289921dc88944d0fe94 Mon Sep 17 00:00:00 2001 +From: Dmitry Frolov +Date: Fri, 28 Jun 2024 15:39:10 +0300 +Subject: [PATCH 30/78] hw/loongarch/boot.c: fix out-of-bound reading + +memcpy() is trying to READ 512 bytes from memory, +pointed by info->kernel_cmdline, +which was (presumable) allocated by g_strdup(""); +Found with ASAN, making check with enabled sanitizers. + +Signed-off-by: Dmitry Frolov +Reviewed-by: Song Gao +Message-Id: <20240628123910.577740-1-frolov@swemel.ru> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index b8e1aa18d5..cb668703bd 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -163,7 +163,7 @@ static void init_cmdline(struct loongarch_boot_info *info, void *p, void *start) + info->a0 = 1; + info->a1 = cmdline_addr; + +- memcpy(p, info->kernel_cmdline, COMMAND_LINE_SIZE); ++ g_strlcpy(p, info->kernel_cmdline, COMMAND_LINE_SIZE); + } + + static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) +-- +2.39.1 + diff --git a/hw-loongarch-clean-code.patch b/hw-loongarch-clean-code.patch new file mode 100644 index 0000000000000000000000000000000000000000..45d44f40e665c5780870407720971d1558b78031 --- /dev/null +++ b/hw-loongarch-clean-code.patch @@ -0,0 +1,197 @@ +From 4a74147e1b2e276eb2ad2855bafc3c0136bc18a3 Mon Sep 17 00:00:00 2001 +From: gaosong +Date: Sun, 8 Sep 2024 22:34:57 +0800 +Subject: [PATCH 76/78] hw/loongarch: clean code + +remove some unused code + +Signed-off-by: gaosong +Signed-off-by: Xianglai Li +--- + target/loongarch/kvm/kvm.c | 103 --------------------------- + target/loongarch/kvm/kvm_loongarch.h | 2 - + target/loongarch/machine.c | 20 ------ + 3 files changed, 125 deletions(-) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index ab1ea3d4fd..0acdd5c4c1 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -684,53 +684,6 @@ static int kvm_check_cpucfg2(CPUState *cs) + return ret; + } + +-static int kvm_check_cpucfg6(CPUState *cs) +-{ +- int ret; +- uint64_t val; +- struct kvm_device_attr attr = { +- .group = KVM_LOONGARCH_VCPU_CPUCFG, +- .attr = 6, +- .addr = (uint64_t)&val, +- }; +- LoongArchCPU *cpu = LOONGARCH_CPU(cs); +- CPULoongArchState *env = &cpu->env; +- +- ret = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, &attr); +- if (!ret) { +- kvm_vcpu_ioctl(cs, KVM_GET_DEVICE_ATTR, &attr); +- +- if (FIELD_EX32(env->cpucfg[6], CPUCFG6, PMP)) { +- /* Check PMP */ +- if (!FIELD_EX32(val, CPUCFG6, PMP)) { +- error_report("'pmu' feature not supported by KVM on this host" +- " Please disable 'pmu' with " +- "'... -cpu XXX,pmu=off ...'\n"); +- exit(EXIT_FAILURE); +- } +- /* Check PMNUM */ +- int guest_pmnum = FIELD_EX32(env->cpucfg[6], CPUCFG6, PMNUM); +- int host_pmnum = FIELD_EX32(val, CPUCFG6, PMNUM); +- if (guest_pmnum > host_pmnum){ +- warn_report("The guest pmnum %d larger than KVM support %d\n", +- guest_pmnum, host_pmnum); +- env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, +- PMNUM, host_pmnum); +- } +- /* Check PMBITS */ +- int guest_pmbits = FIELD_EX32(env->cpucfg[6], CPUCFG6, PMBITS); +- int host_pmbits = FIELD_EX32(val, CPUCFG6, PMBITS); +- if (guest_pmbits != host_pmbits) { +- warn_report("The host not support PMBITS %d\n", guest_pmbits); +- env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, +- PMBITS, host_pmbits); +- } +- } +- } +- +- return ret; +-} +- + static int kvm_loongarch_put_cpucfg(CPUState *cs) + { + int i, ret = 0; +@@ -745,12 +698,6 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + return ret; + } + } +- if (i == 6) { +- ret = kvm_check_cpucfg6(cs); +- if (ret) { +- return ret; +- } +- } + val = env->cpucfg[i]; + ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), &val); + if (ret < 0) { +@@ -760,56 +707,6 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + return ret; + } + +-int kvm_loongarch_put_pvtime(LoongArchCPU *cpu) +-{ +- CPULoongArchState *env = &cpu->env; +- int err; +- struct kvm_device_attr attr = { +- .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, +- .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, +- .addr = (uint64_t)&env->st.guest_addr, +- }; +- +- err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); +- if (err != 0) { +- /* It's ok even though kvm has not such attr */ +- return 0; +- } +- +- err = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEVICE_ATTR, attr); +- if (err != 0) { +- error_report("PVTIME IPA: KVM_SET_DEVICE_ATTR: %s", strerror(-err)); +- return err; +- } +- +- return 0; +-} +- +-int kvm_loongarch_get_pvtime(LoongArchCPU *cpu) +-{ +- CPULoongArchState *env = &cpu->env; +- int err; +- struct kvm_device_attr attr = { +- .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, +- .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, +- .addr = (uint64_t)&env->st.guest_addr, +- }; +- +- err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); +- if (err != 0) { +- /* It's ok even though kvm has not such attr */ +- return 0; +- } +- +- err = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEVICE_ATTR, attr); +- if (err != 0) { +- error_report("PVTIME IPA: KVM_GET_DEVICE_ATTR: %s", strerror(-err)); +- return err; +- } +- +- return 0; +-} +- + int kvm_arch_get_registers(CPUState *cs) + { + int ret; +diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h +index 8482f9308d..1051a341ec 100644 +--- a/target/loongarch/kvm/kvm_loongarch.h ++++ b/target/loongarch/kvm/kvm_loongarch.h +@@ -11,8 +11,6 @@ + #define QEMU_KVM_LOONGARCH_H + + int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); +-int kvm_loongarch_put_pvtime(LoongArchCPU *cpu); +-int kvm_loongarch_get_pvtime(LoongArchCPU *cpu); + void kvm_arch_reset_vcpu(CPUState *cs); + + #endif +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index fd69ea05dc..57abdddc09 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -112,24 +112,6 @@ static const VMStateDescription vmstate_lasx = { + }, + }; + +-static int cpu_post_load(void *opaque, int version_id) +-{ +-#ifdef CONFIG_KVM +- LoongArchCPU *cpu = opaque; +- kvm_loongarch_put_pvtime(cpu); +-#endif +- return 0; +-} +- +-static int cpu_pre_save(void *opaque) +-{ +-#ifdef CONFIG_KVM +- LoongArchCPU *cpu = opaque; +- kvm_loongarch_get_pvtime(cpu); +-#endif +- return 0; +-} +- + static bool lbt_needed(void *opaque) + { + LoongArchCPU *cpu = opaque; +@@ -190,8 +172,6 @@ const VMStateDescription vmstate_loongarch_cpu = { + .name = "cpu", + .version_id = 3, + .minimum_version_id = 3, +- .post_load = cpu_post_load, +- .pre_save = cpu_pre_save, + .fields = (const VMStateField[]) { + VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), + VMSTATE_UINTTL(env.pc, LoongArchCPU), +-- +2.39.1 + diff --git a/hw-loongarch-fdt-adds-Extend-I-O-Interrupt-Controlle.patch b/hw-loongarch-fdt-adds-Extend-I-O-Interrupt-Controlle.patch new file mode 100644 index 0000000000000000000000000000000000000000..ebab65007b888cafff0af13bf9942fe1c6e68a79 --- /dev/null +++ b/hw-loongarch-fdt-adds-Extend-I-O-Interrupt-Controlle.patch @@ -0,0 +1,91 @@ +From ed42940a2d943fd0e666e46bbc9b599b9ed1bd75 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:45 +0800 +Subject: [PATCH 10/78] hw/loongarch: fdt adds Extend I/O Interrupt Controller + +fdt adds Extend I/O Interrupt Controller, +we use 'loongson,ls2k2000-eiointc'. + +See: +https://github.com/torvalds/linux/blob/v6.7/drivers/irqchip/irq-loongson-eiointc.c +https://lore.kernel.org/r/764e02d924094580ac0f1d15535f4b98308705c6.1683279769.git.zhoubinbin@loongson.cn + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-12-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 30 +++++++++++++++++++++++++++++- + include/hw/intc/loongarch_extioi.h | 1 + + 2 files changed, 30 insertions(+), 1 deletion(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index fdc4a5d708..820eb52cba 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -150,6 +150,31 @@ static void fdt_add_cpuic_node(LoongArchMachineState *lams, + g_free(nodename); + } + ++static void fdt_add_eiointc_node(LoongArchMachineState *lams, ++ uint32_t *cpuintc_phandle, ++ uint32_t *eiointc_phandle) ++{ ++ MachineState *ms = MACHINE(lams); ++ char *nodename; ++ hwaddr extioi_base = APIC_BASE; ++ hwaddr extioi_size = EXTIOI_SIZE; ++ ++ *eiointc_phandle = qemu_fdt_alloc_phandle(ms->fdt); ++ nodename = g_strdup_printf("/eiointc@%" PRIx64, extioi_base); ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", *eiointc_phandle); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", ++ "loongson,ls2k2000-eiointc"); ++ qemu_fdt_setprop(ms->fdt, nodename, "interrupt-controller", NULL, 0); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "#interrupt-cells", 1); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", ++ *cpuintc_phandle); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupts", 3); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0x0, ++ extioi_base, 0x0, extioi_size); ++ g_free(nodename); ++} ++ + static void fdt_add_flash_node(LoongArchMachineState *lams) + { + MachineState *ms = MACHINE(lams); +@@ -574,7 +599,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + CPULoongArchState *env; + CPUState *cpu_state; + int cpu, pin, i, start, num; +- uint32_t cpuintc_phandle; ++ uint32_t cpuintc_phandle, eiointc_phandle; + + /* + * The connection of interrupts: +@@ -652,6 +677,9 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + } + } + ++ /* Add Extend I/O Interrupt Controller node */ ++ fdt_add_eiointc_node(lams, &cpuintc_phandle, &eiointc_phandle); ++ + pch_pic = qdev_new(TYPE_LOONGARCH_PCH_PIC); + num = VIRT_PCH_PIC_IRQ_NUM; + qdev_prop_set_uint32(pch_pic, "pch_pic_irq_num", num); +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index 98f348c49d..722ffee1bc 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -39,6 +39,7 @@ + #define EXTIOI_COREISR_END (0xB20 - APIC_OFFSET) + #define EXTIOI_COREMAP_START (0xC00 - APIC_OFFSET) + #define EXTIOI_COREMAP_END (0xD00 - APIC_OFFSET) ++#define EXTIOI_SIZE 0x800 + + #define EXTIOI_VIRT_BASE (0x40000000) + #define EXTIOI_VIRT_SIZE (0x1000) +-- +2.39.1 + diff --git a/hw-loongarch-fdt-adds-cpu-interrupt-controller-node.patch b/hw-loongarch-fdt-adds-cpu-interrupt-controller-node.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c65e60d3ed95b8a5052e7e2fadcdff9672d3a45 --- /dev/null +++ b/hw-loongarch-fdt-adds-cpu-interrupt-controller-node.patch @@ -0,0 +1,69 @@ +From cd506fbf0d9a00aa0f25de1e7bd26ad4335c8257 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:44 +0800 +Subject: [PATCH 09/78] hw/loongarch: fdt adds cpu interrupt controller node + +fdt adds cpu interrupt controller node, +we use 'loongson,cpu-interrupt-controller'. + +See: +https://github.com/torvalds/linux/blob/v6.7/drivers/irqchip/irq-loongarch-cpu.c +https://lore.kernel.org/r/20221114113824.1880-2-liupeibao@loongson.cn + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-11-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 99a3dc8696..fdc4a5d708 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -133,6 +133,23 @@ static void virt_flash_map(LoongArchMachineState *lams, + virt_flash_map1(flash1, VIRT_FLASH1_BASE, VIRT_FLASH1_SIZE, sysmem); + } + ++static void fdt_add_cpuic_node(LoongArchMachineState *lams, ++ uint32_t *cpuintc_phandle) ++{ ++ MachineState *ms = MACHINE(lams); ++ char *nodename; ++ ++ *cpuintc_phandle = qemu_fdt_alloc_phandle(ms->fdt); ++ nodename = g_strdup_printf("/cpuic"); ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", *cpuintc_phandle); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", ++ "loongson,cpu-interrupt-controller"); ++ qemu_fdt_setprop(ms->fdt, nodename, "interrupt-controller", NULL, 0); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "#interrupt-cells", 1); ++ g_free(nodename); ++} ++ + static void fdt_add_flash_node(LoongArchMachineState *lams) + { + MachineState *ms = MACHINE(lams); +@@ -557,6 +574,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + CPULoongArchState *env; + CPUState *cpu_state; + int cpu, pin, i, start, num; ++ uint32_t cpuintc_phandle; + + /* + * The connection of interrupts: +@@ -591,6 +609,9 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + memory_region_add_subregion(&lams->system_iocsr, MAIL_SEND_ADDR, + sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); + ++ /* Add cpu interrupt-controller */ ++ fdt_add_cpuic_node(lams, &cpuintc_phandle); ++ + for (cpu = 0; cpu < ms->smp.cpus; cpu++) { + cpu_state = qemu_get_cpu(cpu); + cpudev = DEVICE(cpu_state); +-- +2.39.1 + diff --git a/hw-loongarch-fdt-adds-pch_msi-Controller.patch b/hw-loongarch-fdt-adds-pch_msi-Controller.patch new file mode 100644 index 0000000000000000000000000000000000000000..ce895f52286086d5e4d3f7f98a401b8ac0b7a5a8 --- /dev/null +++ b/hw-loongarch-fdt-adds-pch_msi-Controller.patch @@ -0,0 +1,93 @@ +From ea34d3896abfaf67cdf7fdb3cb205cc5a0e2e708 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:47 +0800 +Subject: [PATCH 12/78] hw/loongarch: fdt adds pch_msi Controller + +fdt adds pch msi controller, we use 'loongson,pch-msi-1.0'. + +See: +https://github.com/torvalds/linux/blob/v6.7/drivers/irqchip/irq-loongson-pch-msi.c +https://lore.kernel.org/r/20200528152757.1028711-6-jiaxun.yang@flygoat.com + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-14-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 33 ++++++++++++++++++++++++++++++++- + include/hw/pci-host/ls7a.h | 1 + + 2 files changed, 33 insertions(+), 1 deletion(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 36fcfd12eb..032106ebad 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -200,6 +200,34 @@ static void fdt_add_pch_pic_node(LoongArchMachineState *lams, + g_free(nodename); + } + ++static void fdt_add_pch_msi_node(LoongArchMachineState *lams, ++ uint32_t *eiointc_phandle, ++ uint32_t *pch_msi_phandle) ++{ ++ MachineState *ms = MACHINE(lams); ++ char *nodename; ++ hwaddr pch_msi_base = VIRT_PCH_MSI_ADDR_LOW; ++ hwaddr pch_msi_size = VIRT_PCH_MSI_SIZE; ++ ++ *pch_msi_phandle = qemu_fdt_alloc_phandle(ms->fdt); ++ nodename = g_strdup_printf("/msi@%" PRIx64, pch_msi_base); ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", *pch_msi_phandle); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", ++ "loongson,pch-msi-1.0"); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", ++ 0, pch_msi_base, ++ 0, pch_msi_size); ++ qemu_fdt_setprop(ms->fdt, nodename, "interrupt-controller", NULL, 0); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", ++ *eiointc_phandle); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "loongson,msi-base-vec", ++ VIRT_PCH_PIC_IRQ_NUM); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "loongson,msi-num-vecs", ++ EXTIOI_IRQS - VIRT_PCH_PIC_IRQ_NUM); ++ g_free(nodename); ++} ++ + static void fdt_add_flash_node(LoongArchMachineState *lams) + { + MachineState *ms = MACHINE(lams); +@@ -624,7 +652,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + CPULoongArchState *env; + CPUState *cpu_state; + int cpu, pin, i, start, num; +- uint32_t cpuintc_phandle, eiointc_phandle, pch_pic_phandle; ++ uint32_t cpuintc_phandle, eiointc_phandle, pch_pic_phandle, pch_msi_phandle; + + /* + * The connection of interrupts: +@@ -741,6 +769,9 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + qdev_get_gpio_in(extioi, i + start)); + } + ++ /* Add PCH MSI node */ ++ fdt_add_pch_msi_node(lams, &eiointc_phandle, &pch_msi_phandle); ++ + loongarch_devices_init(pch_pic, lams); + } + +diff --git a/include/hw/pci-host/ls7a.h b/include/hw/pci-host/ls7a.h +index fe260f0183..cd7c9ec7bc 100644 +--- a/include/hw/pci-host/ls7a.h ++++ b/include/hw/pci-host/ls7a.h +@@ -25,6 +25,7 @@ + #define VIRT_IOAPIC_REG_BASE (VIRT_PCH_REG_BASE) + #define VIRT_PCH_MSI_ADDR_LOW 0x2FF00000UL + #define VIRT_PCH_REG_SIZE 0x400 ++#define VIRT_PCH_MSI_SIZE 0x8 + + /* + * GSI_BASE is hard-coded with 64 in linux kernel, else kernel fails to boot +-- +2.39.1 + diff --git a/hw-loongarch-fdt-adds-pch_pic-Controller.patch b/hw-loongarch-fdt-adds-pch_pic-Controller.patch new file mode 100644 index 0000000000000000000000000000000000000000..87ab827d7acff7a2c3663db79b4d7f5aa3a67890 --- /dev/null +++ b/hw-loongarch-fdt-adds-pch_pic-Controller.patch @@ -0,0 +1,90 @@ +From 78222abb3bde044b4520f23c6fc2f0f0bd805d2a Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:46 +0800 +Subject: [PATCH 11/78] hw/loongarch: fdt adds pch_pic Controller + +fdt adds pch pic controller, we use 'loongson,pch-pic-1.0' + +See: +https://github.com/torvalds/linux/blob/v6.7/drivers/irqchip/irq-loongson-pch-pic.c +https://lore.kernel.org/r/20200528152757.1028711-4-jiaxun.yang@flygoat.com + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-13-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 30 +++++++++++++++++++++++++++++- + include/hw/pci-host/ls7a.h | 1 + + 2 files changed, 30 insertions(+), 1 deletion(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 820eb52cba..36fcfd12eb 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -175,6 +175,31 @@ static void fdt_add_eiointc_node(LoongArchMachineState *lams, + g_free(nodename); + } + ++static void fdt_add_pch_pic_node(LoongArchMachineState *lams, ++ uint32_t *eiointc_phandle, ++ uint32_t *pch_pic_phandle) ++{ ++ MachineState *ms = MACHINE(lams); ++ char *nodename; ++ hwaddr pch_pic_base = VIRT_PCH_REG_BASE; ++ hwaddr pch_pic_size = VIRT_PCH_REG_SIZE; ++ ++ *pch_pic_phandle = qemu_fdt_alloc_phandle(ms->fdt); ++ nodename = g_strdup_printf("/platic@%" PRIx64, pch_pic_base); ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", *pch_pic_phandle); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", ++ "loongson,pch-pic-1.0"); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0, ++ pch_pic_base, 0, pch_pic_size); ++ qemu_fdt_setprop(ms->fdt, nodename, "interrupt-controller", NULL, 0); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "#interrupt-cells", 2); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", ++ *eiointc_phandle); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "loongson,pic-base-vec", 0); ++ g_free(nodename); ++} ++ + static void fdt_add_flash_node(LoongArchMachineState *lams) + { + MachineState *ms = MACHINE(lams); +@@ -599,7 +624,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + CPULoongArchState *env; + CPUState *cpu_state; + int cpu, pin, i, start, num; +- uint32_t cpuintc_phandle, eiointc_phandle; ++ uint32_t cpuintc_phandle, eiointc_phandle, pch_pic_phandle; + + /* + * The connection of interrupts: +@@ -699,6 +724,9 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i)); + } + ++ /* Add PCH PIC node */ ++ fdt_add_pch_pic_node(lams, &eiointc_phandle, &pch_pic_phandle); ++ + pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); + start = num; + num = EXTIOI_IRQS - start; +diff --git a/include/hw/pci-host/ls7a.h b/include/hw/pci-host/ls7a.h +index e753449593..fe260f0183 100644 +--- a/include/hw/pci-host/ls7a.h ++++ b/include/hw/pci-host/ls7a.h +@@ -24,6 +24,7 @@ + #define VIRT_PCH_REG_BASE 0x10000000UL + #define VIRT_IOAPIC_REG_BASE (VIRT_PCH_REG_BASE) + #define VIRT_PCH_MSI_ADDR_LOW 0x2FF00000UL ++#define VIRT_PCH_REG_SIZE 0x400 + + /* + * GSI_BASE is hard-coded with 64 in linux kernel, else kernel fails to boot +-- +2.39.1 + diff --git a/hw-loongarch-fdt-adds-pcie-irq_map-node.patch b/hw-loongarch-fdt-adds-pcie-irq_map-node.patch new file mode 100644 index 0000000000000000000000000000000000000000..459138ed2fd0544d0c4d4622a065c349f355b637 --- /dev/null +++ b/hw-loongarch-fdt-adds-pcie-irq_map-node.patch @@ -0,0 +1,137 @@ +From 1325effbd595781b9ab75dceab9f87944156c606 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:48 +0800 +Subject: [PATCH 13/78] hw/loongarch: fdt adds pcie irq_map node + +This patch adds pcie irq_map node for FDT. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-15-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 73 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 69 insertions(+), 4 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 032106ebad..c32cc3c818 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -379,7 +379,62 @@ static void fdt_add_fw_cfg_node(const LoongArchMachineState *lams) + g_free(nodename); + } + +-static void fdt_add_pcie_node(const LoongArchMachineState *lams) ++static void fdt_add_pcie_irq_map_node(const LoongArchMachineState *lams, ++ char *nodename, ++ uint32_t *pch_pic_phandle) ++{ ++ int pin, dev; ++ uint32_t irq_map_stride = 0; ++ uint32_t full_irq_map[GPEX_NUM_IRQS *GPEX_NUM_IRQS * 10] = {}; ++ uint32_t *irq_map = full_irq_map; ++ const MachineState *ms = MACHINE(lams); ++ ++ /* This code creates a standard swizzle of interrupts such that ++ * each device's first interrupt is based on it's PCI_SLOT number. ++ * (See pci_swizzle_map_irq_fn()) ++ * ++ * We only need one entry per interrupt in the table (not one per ++ * possible slot) seeing the interrupt-map-mask will allow the table ++ * to wrap to any number of devices. ++ */ ++ ++ for (dev = 0; dev < GPEX_NUM_IRQS; dev++) { ++ int devfn = dev * 0x8; ++ ++ for (pin = 0; pin < GPEX_NUM_IRQS; pin++) { ++ int irq_nr = 16 + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS); ++ int i = 0; ++ ++ /* Fill PCI address cells */ ++ irq_map[i] = cpu_to_be32(devfn << 8); ++ i += 3; ++ ++ /* Fill PCI Interrupt cells */ ++ irq_map[i] = cpu_to_be32(pin + 1); ++ i += 1; ++ ++ /* Fill interrupt controller phandle and cells */ ++ irq_map[i++] = cpu_to_be32(*pch_pic_phandle); ++ irq_map[i++] = cpu_to_be32(irq_nr); ++ ++ if (!irq_map_stride) { ++ irq_map_stride = i; ++ } ++ irq_map += irq_map_stride; ++ } ++ } ++ ++ ++ qemu_fdt_setprop(ms->fdt, nodename, "interrupt-map", full_irq_map, ++ GPEX_NUM_IRQS * GPEX_NUM_IRQS * ++ irq_map_stride * sizeof(uint32_t)); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupt-map-mask", ++ 0x1800, 0, 0, 0x7); ++} ++ ++static void fdt_add_pcie_node(const LoongArchMachineState *lams, ++ uint32_t *pch_pic_phandle, ++ uint32_t *pch_msi_phandle) + { + char *nodename; + hwaddr base_mmio = VIRT_PCI_MEM_BASE; +@@ -410,6 +465,11 @@ static void fdt_add_pcie_node(const LoongArchMachineState *lams) + 2, base_pio, 2, size_pio, + 1, FDT_PCI_RANGE_MMIO, 2, base_mmio, + 2, base_mmio, 2, size_mmio); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "msi-map", ++ 0, *pch_msi_phandle, 0, 0x10000); ++ ++ fdt_add_pcie_irq_map_node(lams, nodename, pch_pic_phandle); ++ + g_free(nodename); + } + +@@ -569,7 +629,10 @@ static DeviceState *create_platform_bus(DeviceState *pch_pic) + return dev; + } + +-static void loongarch_devices_init(DeviceState *pch_pic, LoongArchMachineState *lams) ++static void loongarch_devices_init(DeviceState *pch_pic, ++ LoongArchMachineState *lams, ++ uint32_t *pch_pic_phandle, ++ uint32_t *pch_msi_phandle) + { + MachineClass *mc = MACHINE_GET_CLASS(lams); + DeviceState *gpex_dev; +@@ -615,6 +678,9 @@ static void loongarch_devices_init(DeviceState *pch_pic, LoongArchMachineState * + gpex_set_irq_num(GPEX_HOST(gpex_dev), i, 16 + i); + } + ++ /* Add pcie node */ ++ fdt_add_pcie_node(lams, pch_pic_phandle, pch_msi_phandle); ++ + serial_mm_init(get_system_memory(), VIRT_UART_BASE, 0, + qdev_get_gpio_in(pch_pic, + VIRT_UART_IRQ - VIRT_GSI_BASE), +@@ -772,7 +838,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + /* Add PCH MSI node */ + fdt_add_pch_msi_node(lams, &eiointc_phandle, &pch_msi_phandle); + +- loongarch_devices_init(pch_pic, lams); ++ loongarch_devices_init(pch_pic, lams, &pch_pic_phandle, &pch_msi_phandle); + } + + static void loongarch_firmware_init(LoongArchMachineState *lams) +@@ -1048,7 +1114,6 @@ static void loongarch_init(MachineState *machine) + lams->powerdown_notifier.notify = virt_powerdown_req; + qemu_register_powerdown_notifier(&lams->powerdown_notifier); + +- fdt_add_pcie_node(lams); + /* + * Since lowmem region starts from 0 and Linux kernel legacy start address + * at 2 MiB, FDT base address is located at 1 MiB to avoid NULL pointer +-- +2.39.1 + diff --git a/hw-loongarch-fdt-remove-unused-irqchip-node.patch b/hw-loongarch-fdt-remove-unused-irqchip-node.patch new file mode 100644 index 0000000000000000000000000000000000000000..75c7217971b92cc7b22b82097c98218fc4c0927d --- /dev/null +++ b/hw-loongarch-fdt-remove-unused-irqchip-node.patch @@ -0,0 +1,67 @@ +From e87697c72641ab2209d4004f573f47283d118235 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 26 Apr 2024 17:15:49 +0800 +Subject: [PATCH 14/78] hw/loongarch: fdt remove unused irqchip node + +This patch removes the unused fdt irqchip node. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240426091551.2397867-16-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 31 +------------------------------ + 1 file changed, 1 insertion(+), 30 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index c32cc3c818..ff9513034b 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -473,34 +473,6 @@ static void fdt_add_pcie_node(const LoongArchMachineState *lams, + g_free(nodename); + } + +-static void fdt_add_irqchip_node(LoongArchMachineState *lams) +-{ +- MachineState *ms = MACHINE(lams); +- char *nodename; +- uint32_t irqchip_phandle; +- +- irqchip_phandle = qemu_fdt_alloc_phandle(ms->fdt); +- qemu_fdt_setprop_cell(ms->fdt, "/", "interrupt-parent", irqchip_phandle); +- +- nodename = g_strdup_printf("/intc@%lx", VIRT_IOAPIC_REG_BASE); +- qemu_fdt_add_subnode(ms->fdt, nodename); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "#interrupt-cells", 3); +- qemu_fdt_setprop(ms->fdt, nodename, "interrupt-controller", NULL, 0); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "#address-cells", 0x2); +- qemu_fdt_setprop_cell(ms->fdt, nodename, "#size-cells", 0x2); +- qemu_fdt_setprop(ms->fdt, nodename, "ranges", NULL, 0); +- +- qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", +- "loongarch,ls7a"); +- +- qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", +- 2, VIRT_IOAPIC_REG_BASE, +- 2, PCH_PIC_ROUTE_ENTRY_OFFSET); +- +- qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", irqchip_phandle); +- g_free(nodename); +-} +- + static void fdt_add_memory_node(MachineState *ms, + uint64_t base, uint64_t size, int node_id) + { +@@ -1103,8 +1075,7 @@ static void loongarch_init(MachineState *machine) + + /* Initialize the IO interrupt subsystem */ + loongarch_irq_init(lams); +- fdt_add_irqchip_node(lams); +- platform_bus_add_all_fdt_nodes(machine->fdt, "/intc", ++ platform_bus_add_all_fdt_nodes(machine->fdt, "/platic", + VIRT_PLATFORM_BUS_BASEADDRESS, + VIRT_PLATFORM_BUS_SIZE, + VIRT_PLATFORM_BUS_IRQ); +-- +2.39.1 + diff --git a/hw-loongarch-fix-cpu-hotplug-reset.patch b/hw-loongarch-fix-cpu-hotplug-reset.patch new file mode 100644 index 0000000000000000000000000000000000000000..d86cc0577b38fde20bfdc9a6301a0d70a7fd902a --- /dev/null +++ b/hw-loongarch-fix-cpu-hotplug-reset.patch @@ -0,0 +1,51 @@ +From f3f7b49a8a323ebfe2be176985336aaf2c97c6c2 Mon Sep 17 00:00:00 2001 +From: gaosong +Date: Mon, 9 Sep 2024 04:14:49 +0800 +Subject: [PATCH 78/78] hw/loongarch: fix cpu hotplug reset + +Signed-off-by: gaosong +Signed-off-by: Xianglai Li +--- + hw/loongarch/boot.c | 2 +- + hw/loongarch/virt.c | 1 + + include/hw/loongarch/virt.h | 1 + + 3 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index f258eefe9a..53dcefbb55 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -216,7 +216,7 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) + return kernel_entry; + } + +-static void reset_load_elf(void *opaque) ++void reset_load_elf(void *opaque) + { + LoongArchCPU *cpu = opaque; + CPULoongArchState *env = &cpu->env; +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 5b0468f6cb..0c24e632bb 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1494,6 +1494,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, + env = &(cpu->env); + env->address_space_iocsr = &lvms->as_iocsr; + ++ qemu_register_reset(reset_load_elf, LOONGARCH_CPU(qemu_get_cpu(cs->cpu_index))); + env->ipistate = lvms->ipi; + if (!(kvm_enabled() && kvm_irqchip_in_kernel())) { + /* connect ipi irq to cpu irq, logic cpu index used here */ +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 168b40c31b..a79ad41663 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -86,4 +86,5 @@ struct LoongArchVirtMachineState { + #define TYPE_LOONGARCH_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + OBJECT_DECLARE_SIMPLE_TYPE(LoongArchVirtMachineState, LOONGARCH_VIRT_MACHINE) + void loongarch_acpi_setup(LoongArchVirtMachineState *lvms); ++void reset_load_elf(void *opaque); + #endif +-- +2.39.1 + diff --git a/hw-loongarch-move-memory-map-to-boot.c.patch b/hw-loongarch-move-memory-map-to-boot.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..ac4f660d88891f631c4f82fe3bac59a7f9092517 --- /dev/null +++ b/hw-loongarch-move-memory-map-to-boot.c.patch @@ -0,0 +1,113 @@ +From 5e4d612de23539499b9a22986bebe9a3007edae1 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 7 May 2024 16:51:35 +0200 +Subject: [PATCH 18/78] hw/loongarch: move memory map to boot.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ensure that it can be used even if virt.c is not included in the build, as +is the case for --without-default-devices. + +Signed-off-by: Paolo Bonzini +Acked-by: Richard Henderson +Message-ID: <20240507145135.270803-1-pbonzini@redhat.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Xianglai Li +--- + .gitlab-ci.d/buildtest.yml | 5 +++-- + hw/loongarch/boot.c | 3 +++ + hw/loongarch/virt.c | 3 --- + include/hw/loongarch/boot.h | 10 ++++++++++ + include/hw/loongarch/virt.h | 10 ---------- + 5 files changed, 16 insertions(+), 15 deletions(-) + +diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml +index 3fb99e79e9..983c3c132e 100644 +--- a/.gitlab-ci.d/buildtest.yml ++++ b/.gitlab-ci.d/buildtest.yml +@@ -579,8 +579,9 @@ build-tci: + - make check-tcg + + # Check our reduced build configurations +-# requires libfdt: aarch64, arm, i386, loongarch64, x86_64 +-# does not build without boards: i386, loongarch64, x86_64 ++# requires libfdt: aarch64, arm, i386, loongarch64, microblaze, microblazeel, ++# mips64el, or1k, ppc, ppc64, riscv32, riscv64, rx, x86_64 ++# does not build without boards: i386, s390x, sh4, sh4eb, x86_64 + build-without-defaults: + extends: .native_build_job_template + needs: +diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c +index 7d1630b2e7..03f6301a77 100644 +--- a/hw/loongarch/boot.c ++++ b/hw/loongarch/boot.c +@@ -15,6 +15,9 @@ + #include "sysemu/reset.h" + #include "sysemu/qtest.h" + ++struct memmap_entry *memmap_table; ++unsigned memmap_entries; ++ + ram_addr_t initrd_offset; + uint64_t initrd_size; + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 0972ebd150..76b36539e2 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -543,9 +543,6 @@ static void virt_powerdown_req(Notifier *notifier, void *opaque) + acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS); + } + +-struct memmap_entry *memmap_table; +-unsigned memmap_entries; +- + static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) + { + /* Ensure there are no duplicate entries. */ +diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h +index 4ebcc89dcf..b3b870df1f 100644 +--- a/include/hw/loongarch/boot.h ++++ b/include/hw/loongarch/boot.h +@@ -104,6 +104,16 @@ struct loongarch_boot_info { + uint64_t a0, a1, a2; + }; + ++extern struct memmap_entry *memmap_table; ++extern unsigned memmap_entries; ++ ++struct memmap_entry { ++ uint64_t address; ++ uint64_t length; ++ uint32_t type; ++ uint32_t reserved; ++}; ++ + void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info); + + #endif /* HW_LOONGARCH_BOOT_H */ +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 673b57aa2b..36158c758f 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -37,16 +37,6 @@ + + #define FDT_BASE 0x100000 + +-extern struct memmap_entry *memmap_table; +-extern unsigned memmap_entries; +- +-struct memmap_entry { +- uint64_t address; +- uint64_t length; +- uint32_t type; +- uint32_t reserved; +-}; +- + struct LoongArchMachineState { + /*< private >*/ + MachineState parent_obj; +-- +2.39.1 + diff --git a/hw-loongarch-virt-Add-CPU-topology-support.patch b/hw-loongarch-virt-Add-CPU-topology-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..acdcf11d7443c21fc75ad54cf918ef1b1bd89e5c --- /dev/null +++ b/hw-loongarch-virt-Add-CPU-topology-support.patch @@ -0,0 +1,278 @@ +From 8d440efd992fd6be0aca55118a9b60c224f6eade Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 23 Oct 2024 15:13:10 +0800 +Subject: [PATCH 69/78] hw/loongarch/virt: Add CPU topology support + +Add topological relationships for Loongarch VCPU and initialize +topology member variables. Also physical cpu id calculation +method comes from its topo information. + +Co-developed-by: Xianglai Li +Signed-off-by: Bibo Mao +Message-ID: <20241023071312.881866-2-maobibo@loongson.cn> +Signed-off-by: Xianglai Li +--- + docs/system/loongarch/virt.rst | 31 +++++++++++++ + hw/loongarch/virt.c | 82 ++++++++++++++++++++++++++++------ + target/loongarch/cpu.c | 12 +++++ + target/loongarch/cpu.h | 11 +++++ + 4 files changed, 122 insertions(+), 14 deletions(-) + +diff --git a/docs/system/loongarch/virt.rst b/docs/system/loongarch/virt.rst +index c37268b404..aa4719d4bd 100644 +--- a/docs/system/loongarch/virt.rst ++++ b/docs/system/loongarch/virt.rst +@@ -28,6 +28,37 @@ The ``qemu-system-loongarch64`` provides emulation for virt + machine. You can specify the machine type ``virt`` and + cpu type ``la464``. + ++CPU Topology ++------------ ++ ++The ``LA464`` type CPUs have the concept of Socket Core and Thread. ++ ++For example: ++ ++``-smp 1,maxcpus=M,sockets=S,cores=C,threads=T`` ++ ++The above parameters indicate that the machine has a maximum of ``M`` vCPUs and ++``S`` sockets, each socket has ``C`` cores, each core has ``T`` threads, ++and each thread corresponds to a vCPU. ++ ++Then ``M`` ``S`` ``C`` ``T`` has the following relationship: ++ ++``M = S * C * T`` ++ ++In the CPU topology relationship, When we know the ``socket_id`` ``core_id`` ++and ``thread_id`` of the CPU, we can calculate its ``arch_id``: ++ ++``arch_id = (socket_id * S) + (core_id * C) + (thread_id * T)`` ++ ++Similarly, when we know the ``arch_id`` of the CPU, ++we can also get its ``socket_id`` ``core_id`` and ``thread_id``: ++ ++``socket_id = arch_id / (C * T)`` ++ ++``core_id = (arch_id / T) % C`` ++ ++``thread_id = arch_id % T`` ++ + Boot options + ------------ + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 9510aa7a7e..8d1e53ff62 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1123,9 +1123,7 @@ static void virt_init(MachineState *machine) + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); + int i; + hwaddr base, size, ram_size = machine->ram_size; +- const CPUArchIdList *possible_cpus; + MachineClass *mc = MACHINE_GET_CLASS(machine); +- CPUState *cpu; + + if (!cpu_model) { + cpu_model = LOONGARCH_CPU_TYPE_NAME("la464"); +@@ -1143,14 +1141,39 @@ static void virt_init(MachineState *machine) + memory_region_add_subregion(&lvms->system_iocsr, 0, &lvms->iocsr_mem); + + /* Init CPUs */ +- possible_cpus = mc->possible_cpu_arch_ids(machine); +- for (i = 0; i < possible_cpus->len; i++) { +- cpu = cpu_create(machine->cpu_type); +- cpu->cpu_index = i; +- machine->possible_cpus->cpus[i].cpu = OBJECT(cpu); +- lacpu = LOONGARCH_CPU(cpu); ++ mc->possible_cpu_arch_ids(machine); ++ for (i = 0; i < machine->smp.cpus; i++) { ++ Object *cpuobj; ++ cpuobj = object_new(machine->cpu_type); ++ lacpu = LOONGARCH_CPU(cpuobj); ++ + lacpu->phy_id = machine->possible_cpus->cpus[i].arch_id; ++ object_property_set_int(cpuobj, "socket-id", ++ machine->possible_cpus->cpus[i].props.socket_id, ++ NULL); ++ object_property_set_int(cpuobj, "core-id", ++ machine->possible_cpus->cpus[i].props.core_id, ++ NULL); ++ object_property_set_int(cpuobj, "thread-id", ++ machine->possible_cpus->cpus[i].props.thread_id, ++ NULL); ++ /* ++ * The CPU in place at the time of machine startup will also enter ++ * the CPU hot-plug process when it is created, but at this time, ++ * the GED device has not been created, resulting in exit in the CPU ++ * hot-plug process, which can avoid the incumbent CPU repeatedly ++ * applying for resources. ++ * ++ * The interrupt resource of the in-place CPU will be requested at ++ * the current function call loongarch_irq_init(). ++ * ++ * The interrupt resource of the subsequently inserted CPU will be ++ * requested in the CPU hot-plug process. ++ */ ++ qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); ++ object_unref(cpuobj); + } ++ + fdt_add_cpu_nodes(lvms); + fdt_add_memory_nodes(machine); + fw_cfg_add_memory(machine); +@@ -1266,6 +1289,27 @@ static void virt_initfn(Object *obj) + virt_flash_create(lvms); + } + ++static int virt_get_arch_id_from_topo(MachineState *ms, LoongArchCPUTopo *topo) ++{ ++ int arch_id, sock_vcpu_num, core_vcpu_num; ++ ++ /* ++ * calculate total logical cpus across socket/core/thread. ++ * For more information on how to calculate the arch_id, ++ * you can refer to the CPU Topology chapter of the ++ * docs/system/loongarch/virt.rst document. ++ */ ++ sock_vcpu_num = topo->socket_id * (ms->smp.threads * ms->smp.cores); ++ core_vcpu_num = topo->core_id * ms->smp.threads; ++ ++ /* get vcpu-id(logical cpu index) for this vcpu from this topology */ ++ arch_id = (sock_vcpu_num + core_vcpu_num) + topo->thread_id; ++ ++ assert(arch_id >= 0 && arch_id < ms->possible_cpus->len); ++ ++ return arch_id; ++} ++ + static bool memhp_type_supported(DeviceState *dev) + { + /* we only support pc dimm now */ +@@ -1363,10 +1407,19 @@ static HotplugHandler *virt_get_hotplug_handler(MachineState *machine, + return NULL; + } + ++static void virt_get_cpu_topo_from_index(MachineState *ms, ++ LoongArchCPUTopo *topo, int index) ++{ ++ topo->socket_id = index / (ms->smp.cores * ms->smp.threads); ++ topo->core_id = index / ms->smp.threads % ms->smp.cores; ++ topo->thread_id = index % ms->smp.threads; ++} ++ + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + { + int n; + unsigned int max_cpus = ms->smp.max_cpus; ++ LoongArchCPUTopo topo; + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); +@@ -1377,17 +1430,18 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + sizeof(CPUArchId) * max_cpus); + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { ++ ms->possible_cpus->cpus[n].vcpus_count = ms->smp.threads; + ms->possible_cpus->cpus[n].type = ms->cpu_type; +- ms->possible_cpus->cpus[n].arch_id = n; ++ virt_get_cpu_topo_from_index(ms, &topo, n); + + ms->possible_cpus->cpus[n].props.has_socket_id = true; +- ms->possible_cpus->cpus[n].props.socket_id = +- n / (ms->smp.cores * ms->smp.threads); ++ ms->possible_cpus->cpus[n].props.socket_id = topo.socket_id; + ms->possible_cpus->cpus[n].props.has_core_id = true; +- ms->possible_cpus->cpus[n].props.core_id = +- n / ms->smp.threads % ms->smp.cores; ++ ms->possible_cpus->cpus[n].props.core_id = topo.core_id; + ms->possible_cpus->cpus[n].props.has_thread_id = true; +- ms->possible_cpus->cpus[n].props.thread_id = n % ms->smp.threads; ++ ms->possible_cpus->cpus[n].props.thread_id = topo.thread_id; ++ ms->possible_cpus->cpus[n].arch_id = ++ virt_get_arch_id_from_topo(ms, &topo); + } + return ms->possible_cpus; + } +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 2ee1d63989..673ed8ea18 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -17,6 +17,7 @@ + #include "kvm/kvm_loongarch.h" + #include "exec/exec-all.h" + #include "cpu.h" ++#include "hw/qdev-properties.h" + #include "internals.h" + #include "fpu/softfloat-helpers.h" + #include "cpu-csr.h" +@@ -860,6 +861,15 @@ static int64_t loongarch_cpu_get_arch_id(CPUState *cs) + } + #endif + ++static Property loongarch_cpu_properties[] = { ++ DEFINE_PROP_INT32("socket-id", LoongArchCPU, socket_id, 0), ++ DEFINE_PROP_INT32("core-id", LoongArchCPU, core_id, 0), ++ DEFINE_PROP_INT32("thread-id", LoongArchCPU, thread_id, 0), ++ DEFINE_PROP_INT32("node-id", LoongArchCPU, node_id, CPU_UNSET_NUMA_NODE_ID), ++ ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void loongarch_cpu_class_init(ObjectClass *c, void *data) + { + LoongArchCPUClass *lacc = LOONGARCH_CPU_CLASS(c); +@@ -867,6 +877,7 @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data) + DeviceClass *dc = DEVICE_CLASS(c); + ResettableClass *rc = RESETTABLE_CLASS(c); + ++ device_class_set_props(dc, loongarch_cpu_properties); + device_class_set_parent_realize(dc, loongarch_cpu_realizefn, + &lacc->parent_realize); + resettable_class_set_parent_phases(rc, NULL, loongarch_cpu_reset_hold, NULL, +@@ -890,6 +901,7 @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data) + #ifdef CONFIG_TCG + cc->tcg_ops = &loongarch_tcg_ops; + #endif ++ dc->user_creatable = true; + } + + static const gchar *loongarch32_gdb_arch_name(CPUState *cs) +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 4c90cf9ef3..9af622aba5 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -398,6 +398,12 @@ typedef struct CPUArchState { + } st; + } CPULoongArchState; + ++typedef struct LoongArchCPUTopo { ++ int32_t socket_id; /* socket-id of this VCPU */ ++ int32_t core_id; /* core-id of this VCPU */ ++ int32_t thread_id; /* thread-id of this VCPU */ ++} LoongArchCPUTopo; ++ + /** + * LoongArchCPU: + * @env: #CPULoongArchState +@@ -412,6 +418,10 @@ struct ArchCPU { + uint32_t phy_id; + OnOffAuto lbt; + OnOffAuto pmu; ++ int32_t socket_id; /* socket-id of this VCPU */ ++ int32_t core_id; /* core-id of this VCPU */ ++ int32_t thread_id; /* thread-id of this VCPU */ ++ int32_t node_id; /* NUMA node this CPU belongs to */ + + /* 'compatible' string for this CPU for Linux device trees */ + const char *dtb_compatible; +@@ -430,6 +440,7 @@ struct LoongArchCPUClass { + CPUClass parent_class; + + DeviceRealize parent_realize; ++ DeviceUnrealize parent_unrealize; + ResettablePhases parent_phases; + }; + +-- +2.39.1 + diff --git a/hw-loongarch-virt-Add-FDT-table-support-with-acpi-ge.patch b/hw-loongarch-virt-Add-FDT-table-support-with-acpi-ge.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2f0b2b19d2f352725ca87390c9ee1d38f8d4da6 --- /dev/null +++ b/hw-loongarch-virt-Add-FDT-table-support-with-acpi-ge.patch @@ -0,0 +1,84 @@ +From fa276847efb3fd47a730d279f1b14705fe3991b1 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 18 Sep 2024 09:42:06 +0800 +Subject: [PATCH 39/78] hw/loongarch/virt: Add FDT table support with acpi ged + pm register + +ACPI ged is used for power management on LoongArch virt platform, in +general it is parsed from acpi table. However if system boot directly from +elf kernel, no UEFI bios is provided and acpi table cannot be used also. + +Here acpi ged pm register is exposed with FDT table, it is compatbile +with syscon method in FDT table, only that acpi ged pm register is accessed +with 8-bit mode, rather with 32-bit mode. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Tested-by: Song Gao +Message-Id: <20240918014206.2165821-3-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 39 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 9f47107379..9510aa7a7e 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -281,6 +281,44 @@ static void fdt_add_rtc_node(LoongArchVirtMachineState *lvms, + g_free(nodename); + } + ++static void fdt_add_ged_reset(LoongArchVirtMachineState *lvms) ++{ ++ char *name; ++ uint32_t ged_handle; ++ MachineState *ms = MACHINE(lvms); ++ hwaddr base = VIRT_GED_REG_ADDR; ++ hwaddr size = ACPI_GED_REG_COUNT; ++ ++ ged_handle = qemu_fdt_alloc_phandle(ms->fdt); ++ name = g_strdup_printf("/ged@%" PRIx64, base); ++ qemu_fdt_add_subnode(ms->fdt, name); ++ qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon"); ++ qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0x0, base, 0x0, size); ++ /* 8 bit registers */ ++ qemu_fdt_setprop_cell(ms->fdt, name, "reg-shift", 0); ++ qemu_fdt_setprop_cell(ms->fdt, name, "reg-io-width", 1); ++ qemu_fdt_setprop_cell(ms->fdt, name, "phandle", ged_handle); ++ ged_handle = qemu_fdt_get_phandle(ms->fdt, name); ++ g_free(name); ++ ++ name = g_strdup_printf("/reboot"); ++ qemu_fdt_add_subnode(ms->fdt, name); ++ qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-reboot"); ++ qemu_fdt_setprop_cell(ms->fdt, name, "regmap", ged_handle); ++ qemu_fdt_setprop_cell(ms->fdt, name, "offset", ACPI_GED_REG_RESET); ++ qemu_fdt_setprop_cell(ms->fdt, name, "value", ACPI_GED_RESET_VALUE); ++ g_free(name); ++ ++ name = g_strdup_printf("/poweroff"); ++ qemu_fdt_add_subnode(ms->fdt, name); ++ qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-poweroff"); ++ qemu_fdt_setprop_cell(ms->fdt, name, "regmap", ged_handle); ++ qemu_fdt_setprop_cell(ms->fdt, name, "offset", ACPI_GED_REG_SLEEP_CTL); ++ qemu_fdt_setprop_cell(ms->fdt, name, "value", ACPI_GED_SLP_EN | ++ (ACPI_GED_SLP_TYP_S5 << ACPI_GED_SLP_TYP_POS)); ++ g_free(name); ++} ++ + static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, + uint32_t *pch_pic_phandle, hwaddr base, + int irq, bool chosen) +@@ -739,6 +777,7 @@ static void virt_devices_init(DeviceState *pch_pic, + qdev_get_gpio_in(pch_pic, + VIRT_RTC_IRQ - VIRT_GSI_BASE)); + fdt_add_rtc_node(lvms, pch_pic_phandle); ++ fdt_add_ged_reset(lvms); + + /* acpi ged */ + lvms->acpi_ged = create_acpi_ged(pch_pic, lvms); +-- +2.39.1 + diff --git a/hw-loongarch-virt-Add-basic-CPU-plug-support.patch b/hw-loongarch-virt-Add-basic-CPU-plug-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..76015bfd9b05841afef58648f6f297acf573a77b --- /dev/null +++ b/hw-loongarch-virt-Add-basic-CPU-plug-support.patch @@ -0,0 +1,346 @@ +From 212ea93178ad1e65e625ec6942ee9aff93dd5321 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 23 Oct 2024 15:13:11 +0800 +Subject: [PATCH 70/78] hw/loongarch/virt: Add basic CPU plug support + +Implement interface for cpu hotplug function, and enable cpu hotplug +feature on virt machine. + +Co-developed-by: Xianglai Li +Signed-off-by: Bibo Mao +Message-ID: <20241023071312.881866-3-maobibo@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/Kconfig | 1 + + hw/loongarch/virt.c | 193 +++++++++++++++++++++++++++++++++++- + include/hw/loongarch/virt.h | 1 + + target/loongarch/cpu.c | 13 +++ + 4 files changed, 206 insertions(+), 2 deletions(-) + +diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig +index 40944a8365..b42a8573d4 100644 +--- a/hw/loongarch/Kconfig ++++ b/hw/loongarch/Kconfig +@@ -16,6 +16,7 @@ config LOONGARCH_VIRT + select LOONGARCH_EXTIOI + select LS7A_RTC + select SMBIOS ++ select ACPI_CPU_HOTPLUG + select ACPI_PCI + select ACPI_HW_REDUCED + select FW_CFG_DMA +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 8d1e53ff62..e7734ed3c0 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -821,7 +821,7 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + + /* Create IPI device */ + ipi = qdev_new(TYPE_LOONGARCH_IPI); +- qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.cpus); ++ qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.max_cpus); + sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); + + /* IPI iocsr memory region */ +@@ -845,9 +845,11 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + env->ipistate = ipi; + } + ++ lvms->ipi = ipi; ++ + /* Create EXTIOI device */ + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); +- qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus); ++ qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.max_cpus); + if (virt_is_veiointc_enabled(lvms)) { + qdev_prop_set_bit(extioi, "has-virtualization-extension", true); + } +@@ -873,6 +875,8 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) + } + } + ++ lvms->extioi = extioi; ++ + /* Add Extend I/O Interrupt Controller node */ + fdt_add_eiointc_node(lvms, &cpuintc_phandle, &eiointc_phandle); + +@@ -1310,6 +1314,181 @@ static int virt_get_arch_id_from_topo(MachineState *ms, LoongArchCPUTopo *topo) + return arch_id; + } + ++/* find cpu slot in machine->possible_cpus by arch_id */ ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int arch_id, int *index) ++{ ++ int n; ++ for (n = 0; n < ms->possible_cpus->len; n++) { ++ if (ms->possible_cpus->cpus[n].arch_id == arch_id) { ++ if (index) { ++ *index = n; ++ } ++ return &ms->possible_cpus->cpus[n]; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ MachineState *ms = MACHINE(OBJECT(hotplug_dev)); ++ MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); ++ LoongArchCPU *cpu = LOONGARCH_CPU(dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ Error *local_err = NULL; ++ LoongArchCPUTopo topo; ++ int arch_id, index; ++ ++ if (dev->hotplugged && !mc->has_hotpluggable_cpus) { ++ error_setg(&local_err, "CPU hotplug not supported for this machine"); ++ goto out; ++ } ++ ++ /* sanity check the cpu */ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(&local_err, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ goto out; ++ } ++ ++ if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) { ++ error_setg(&local_err, ++ "Invalid thread-id %u specified, must be in range 1:%u", ++ cpu->thread_id, ms->smp.threads - 1); ++ goto out; ++ } ++ ++ if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) { ++ error_setg(&local_err, ++ "Invalid core-id %u specified, must be in range 1:%u", ++ cpu->core_id, ms->smp.cores - 1); ++ goto out; ++ } ++ ++ if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) { ++ error_setg(&local_err, ++ "Invalid socket-id %u specified, must be in range 1:%u", ++ cpu->socket_id, ms->smp.sockets - 1); ++ goto out; ++ } ++ ++ topo.socket_id = cpu->socket_id; ++ topo.core_id = cpu->core_id; ++ topo.thread_id = cpu->thread_id; ++ arch_id = virt_get_arch_id_from_topo(ms, &topo); ++ cpu_slot = virt_find_cpu_slot(ms, arch_id, &index); ++ if (CPU(cpu_slot->cpu)) { ++ error_setg(&local_err, ++ "cpu(id%d=%d:%d:%d) with arch-id %" PRIu64 " exists", ++ cs->cpu_index, cpu->socket_id, cpu->core_id, ++ cpu->thread_id, cpu_slot->arch_id); ++ goto out; ++ } ++ cpu->phy_id = arch_id; ++ /* ++ * update cpu_index calculation method since it is easily used as index ++ * with possible_cpus array by function virt_cpu_index_to_props ++ */ ++ cs->cpu_index = index; ++ numa_cpu_pre_plug(cpu_slot, dev, &local_err); ++ return ; ++ ++out: ++ error_propagate(errp, local_err); ++} ++ ++static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev); ++ Error *local_err = NULL; ++ HotplugHandlerClass *hhc; ++ LoongArchCPU *cpu = LOONGARCH_CPU(dev); ++ CPUState *cs = CPU(dev); ++ ++ if (!lvms->acpi_ged) { ++ error_setg(&local_err, "CPU hot unplug not supported without ACPI"); ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ if (cs->cpu_index == 0) { ++ error_setg(&local_err, ++ "hot-unplug of boot cpu(id%d=%d:%d:%d) not supported", ++ cs->cpu_index, cpu->socket_id, ++ cpu->core_id, cpu->thread_id); ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(lvms->acpi_ged); ++ hhc->unplug_request(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &local_err); ++} ++ ++static void virt_cpu_unplug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *cpu_slot; ++ HotplugHandlerClass *hhc; ++ Error *local_err = NULL; ++ LoongArchCPU *cpu = LOONGARCH_CPU(dev); ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev); ++ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(lvms->acpi_ged); ++ hhc->unplug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id, NULL); ++ cpu_slot->cpu = NULL; ++ return; ++} ++ ++static void virt_cpu_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *cpu_slot; ++ HotplugHandlerClass *hhc; ++ Error *local_err = NULL; ++ LoongArchCPU *cpu = LOONGARCH_CPU(dev); ++ CPUState *cs = CPU(cpu); ++ CPULoongArchState *env; ++ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(hotplug_dev); ++ int pin; ++ ++ if (lvms->acpi_ged) { ++ env = &(cpu->env); ++ env->address_space_iocsr = &lvms->as_iocsr; ++ ++ env->ipistate = lvms->ipi; ++ if (!(kvm_enabled() && kvm_irqchip_in_kernel())) { ++ /* connect ipi irq to cpu irq, logic cpu index used here */ ++ qdev_connect_gpio_out(lvms->ipi, cs->cpu_index, ++ qdev_get_gpio_in(dev, IRQ_IPI)); ++ ++ for (pin = 0; pin < LS3A_INTC_IP; pin++) { ++ qdev_connect_gpio_out(lvms->extioi, (cs->cpu_index * 8 + pin), ++ qdev_get_gpio_in(dev, pin + 2)); ++ } ++ } ++ hhc = HOTPLUG_HANDLER_GET_CLASS(lvms->acpi_ged); ++ hhc->plug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } ++ ++ cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id, NULL); ++ cpu_slot->cpu = OBJECT(dev); ++ return; ++} ++ + static bool memhp_type_supported(DeviceState *dev) + { + /* we only support pc dimm now */ +@@ -1328,6 +1507,8 @@ static void virt_device_pre_plug(HotplugHandler *hotplug_dev, + { + if (memhp_type_supported(dev)) { + virt_mem_pre_plug(hotplug_dev, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) { ++ virt_cpu_pre_plug(hotplug_dev, dev, errp); + } + } + +@@ -1346,6 +1527,8 @@ static void virt_device_unplug_request(HotplugHandler *hotplug_dev, + { + if (memhp_type_supported(dev)) { + virt_mem_unplug_request(hotplug_dev, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) { ++ virt_cpu_unplug_request(hotplug_dev, dev, errp); + } + } + +@@ -1364,6 +1547,8 @@ static void virt_device_unplug(HotplugHandler *hotplug_dev, + { + if (memhp_type_supported(dev)) { + virt_mem_unplug(hotplug_dev, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) { ++ virt_cpu_unplug(hotplug_dev, dev, errp); + } + } + +@@ -1391,6 +1576,8 @@ static void virt_device_plug_cb(HotplugHandler *hotplug_dev, + } + } else if (memhp_type_supported(dev)) { + virt_mem_plug(hotplug_dev, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU)) { ++ virt_cpu_plug(hotplug_dev, dev, errp); + } + } + +@@ -1400,6 +1587,7 @@ static HotplugHandler *virt_get_hotplug_handler(MachineState *machine, + MachineClass *mc = MACHINE_GET_CLASS(machine); + + if (device_is_dynamic_sysbus(mc, dev) || ++ object_dynamic_cast(OBJECT(dev), TYPE_LOONGARCH_CPU) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || + memhp_type_supported(dev)) { + return HOTPLUG_HANDLER(machine); +@@ -1489,6 +1677,7 @@ static void virt_class_init(ObjectClass *oc, void *data) + mc->numa_mem_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ mc->has_hotpluggable_cpus = true; + mc->get_hotplug_handler = virt_get_hotplug_handler; + mc->default_nic = "virtio-net-pci"; + hc->plug = virt_device_plug_cb; +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 0a4d9a25f0..27c52af9f3 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -64,6 +64,7 @@ struct LoongArchVirtMachineState { + AddressSpace as_iocsr; + int features; + struct loongarch_boot_info bootinfo; ++ DeviceState *ipi; + }; + + #define TYPE_LOONGARCH_VIRT_MACHINE MACHINE_TYPE_NAME("virt") +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 673ed8ea18..ee764f0bc7 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -644,6 +644,17 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error **errp) + lacc->parent_realize(dev, errp); + } + ++static void loongarch_cpu_unrealizefn(DeviceState *dev) ++{ ++ LoongArchCPUClass *mcc = LOONGARCH_CPU_GET_CLASS(dev); ++ ++#ifndef CONFIG_USER_ONLY ++ cpu_remove_sync(CPU(dev)); ++#endif ++ ++ mcc->parent_unrealize(dev); ++} ++ + static bool loongarch_get_lsx(Object *obj, Error **errp) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +@@ -880,6 +891,8 @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data) + device_class_set_props(dc, loongarch_cpu_properties); + device_class_set_parent_realize(dc, loongarch_cpu_realizefn, + &lacc->parent_realize); ++ device_class_set_parent_unrealize(dc, loongarch_cpu_unrealizefn, ++ &lacc->parent_unrealize); + resettable_class_set_parent_phases(rc, NULL, loongarch_cpu_reset_hold, NULL, + &lacc->parent_phases); + +-- +2.39.1 + diff --git a/hw-loongarch-virt-Add-description-for-virt-machine-t.patch b/hw-loongarch-virt-Add-description-for-virt-machine-t.patch new file mode 100644 index 0000000000000000000000000000000000000000..37d210c7cf8eb7e665ea468122747737f5351f71 --- /dev/null +++ b/hw-loongarch-virt-Add-description-for-virt-machine-t.patch @@ -0,0 +1,46 @@ +From 080ca7865257d70b6be671cbc17a97c5ebffbd68 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Fri, 13 Sep 2024 17:52:02 +0800 +Subject: [PATCH 38/78] hw/loongarch/virt: Add description for virt machine + type + +The description about virt machine type is removed by mistake, add +new description here. Here is output result with command +"./qemu-system-loongarch64 -M help" + +Supported machines are: +none empty machine +virt QEMU LoongArch Virtual Machine (default) +x-remote Experimental remote machine + +Without the patch, it shows as follows: +Supported machines are: +none empty machine +virt (null) (default) +x-remote Experimental remote machine + +Fixes: ef2f11454c(hw/loongarch/virt: Replace Loongson IPI with LoongArch IPI) +Signed-off-by: Bibo Mao +Reviewed-by: Thomas Huth +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 79b16953d2..9f47107379 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1383,6 +1383,7 @@ static void virt_class_init(ObjectClass *oc, void *data) + mc->init = virt_init; + mc->default_cpu_type = LOONGARCH_CPU_TYPE_NAME("la464"); + mc->default_ram_id = "loongarch.ram"; ++ mc->desc = "QEMU LoongArch Virtual Machine"; + mc->max_cpus = LOONGARCH_MAX_CPUS; + mc->is_default = 1; + mc->default_kernel_irqchip_split = false; +-- +2.39.1 + diff --git a/hw-loongarch-virt-Align-high-memory-base-address-wit.patch b/hw-loongarch-virt-Align-high-memory-base-address-wit.patch new file mode 100644 index 0000000000000000000000000000000000000000..5dc5ce306046135f7a644e3c26ab88ded6a14320 --- /dev/null +++ b/hw-loongarch-virt-Align-high-memory-base-address-wit.patch @@ -0,0 +1,39 @@ +From 70e7ffec16e91138309ad3f76588cbd10c084394 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 27 Nov 2023 12:02:31 +0800 +Subject: [PATCH] hw/loongarch/virt: Align high memory base address with super + page size + +With LoongArch virt machine, there is low memory space with region +0--0x10000000, and high memory space with started from 0x90000000. +High memory space is aligned with 256M, it will be better if it is +aligned with 1G, which is super page aligned for 4K page size. + +Currently linux kernel and uefi bios has no limitation with high +memory base address, it is ok to set high memory base address +with 0x80000000. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231127040231.4123715-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + include/hw/loongarch/virt.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 674f4655e0..db0831b471 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -25,7 +25,7 @@ + + #define VIRT_LOWMEM_BASE 0 + #define VIRT_LOWMEM_SIZE 0x10000000 +-#define VIRT_HIGHMEM_BASE 0x90000000 ++#define VIRT_HIGHMEM_BASE 0x80000000 + #define VIRT_GED_EVT_ADDR 0x100e0000 + #define VIRT_GED_MEM_ADDR (VIRT_GED_EVT_ADDR + ACPI_GED_EVT_SEL_LEN) + #define VIRT_GED_REG_ADDR (VIRT_GED_MEM_ADDR + MEMORY_HOTPLUG_IO_LEN) +-- +2.27.0 + diff --git a/hw-loongarch-virt-Fix-FDT-memory-node-address-width.patch b/hw-loongarch-virt-Fix-FDT-memory-node-address-width.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7d5ce8faddfbae68329b3c217a9ece3589b133e --- /dev/null +++ b/hw-loongarch-virt-Fix-FDT-memory-node-address-width.patch @@ -0,0 +1,36 @@ +From b9e94d97025251cfd13b3ad859b97002504285ce Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Fri, 13 Sep 2024 18:57:20 +0800 +Subject: [PATCH] hw/loongarch/virt: Fix FDT memory node address width + +cherry picked from commitd 6204af704a071ea68d3af55c0502b112a7af9546 + +Higher bits for memory nodes were omitted at qemu_fdt_setprop_cells. + +Cc: mailto:qemu-stable@nongnu.org +Signed-off-by: Jiaxun Yang jiaxun.yang@flygoat.com +Reviewed-by: Song Gao gaosong@loongson.cn +Message-Id: 20240520-loongarch-fdt-memnode-v1-1-5ea9be93911e@flygoat.com +Signed-off-by: Song Gao gaosong@loongson.cn +Signed-off-by: Gao Jiazhen gaojiazhen_yewu@cmss.chinamobile.com +--- + hw/loongarch/virt.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index fc7b70ed4e..5d4fcb7a55 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -360,7 +360,8 @@ static void fdt_add_memory_node(MachineState *ms, + char *nodename = g_strdup_printf("/memory@%" PRIx64, base); + + qemu_fdt_add_subnode(ms->fdt, nodename); +- qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0, base, 0, size); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", base >> 32, base, ++ size >> 32, size); + qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory"); + + if (ms->numa_state && ms->numa_state->num_nodes) { +-- +2.41.0.windows.1 + diff --git a/hw-loongarch-virt-Fix-memory-leak.patch b/hw-loongarch-virt-Fix-memory-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7f240a9112684ec104ac0fcb31e522a86bbb5ce --- /dev/null +++ b/hw-loongarch-virt-Fix-memory-leak.patch @@ -0,0 +1,49 @@ +From 7d0006839846bef68fa3d96886b8e5d8f8ec52f1 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 7 May 2024 10:22:39 +0800 +Subject: [PATCH] hw/loongarch/virt: Fix memory leak +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The char pointer 'ramName' point to a block of memory, +but never free it. Use 'g_autofree' to automatically free it. + +Resolves: Coverity CID 1544773 + +Fixes: 0cf1478d6 ("hw/loongarch: Add numa support") +Signed-off-by: Song Gao +Reviewed-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240507022239.3113987-1-gaosong@loongson.cn> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 54c52ec719fb8c83bbde54cb87b58688ab27c166) +Signed-off-by: zhujun2 +--- + hw/loongarch/virt.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 5d4fcb7a55..eca3b94581 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -925,7 +925,6 @@ static void loongarch_init(MachineState *machine) + const CPUArchIdList *possible_cpus; + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUState *cpu; +- char *ramName = NULL; + struct loaderparams loaderparams = { }; + + if (!cpu_model) { +@@ -985,7 +984,7 @@ static void loongarch_init(MachineState *machine) + + for (i = 1; i < nb_numa_nodes; i++) { + MemoryRegion *nodemem = g_new(MemoryRegion, 1); +- ramName = g_strdup_printf("loongarch.node%d.ram", i); ++ g_autofree char *ramName = g_strdup_printf("loongarch.node%d.ram", i); + memory_region_init_alias(nodemem, NULL, ramName, machine->ram, + offset, numa_info[i].node_mem); + memory_region_add_subregion(address_space_mem, phyAddr, nodemem); +-- +2.41.0.windows.1 + diff --git a/hw-loongarch-virt-Remove-unnecessary-cpu.h-inclusion.patch b/hw-loongarch-virt-Remove-unnecessary-cpu.h-inclusion.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b15c9470d14f356ecce1f174bbc1b1969dd9844 --- /dev/null +++ b/hw-loongarch-virt-Remove-unnecessary-cpu.h-inclusion.patch @@ -0,0 +1,33 @@ +From cbd62b91ecdd0ec5f4ccb4c726e0adcdd2808270 Mon Sep 17 00:00:00 2001 +From: Susanooo +Date: Thu, 24 Oct 2024 10:34:17 +0800 +Subject: [PATCH] hw/loongarch/virt: Remove unnecessary 'cpu.h' inclusion +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Song Gao +Reviewed-by: Thomas Huth +Message-Id: <20240927213254.17552-2-philmd@linaro.org> +Signed-off-by: Song Gao +Signed-off-by: zhangchujun +--- + include/hw/loongarch/virt.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 99447fd1d6..17a792e596 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -8,7 +8,6 @@ + #ifndef HW_LOONGARCH_H + #define HW_LOONGARCH_H + +-#include "target/loongarch/cpu.h" + #include "hw/boards.h" + #include "qemu/queue.h" + #include "hw/intc/loongarch_ipi.h" +-- +2.41.0.windows.1 + diff --git a/hw-loongarch-virt-Remove-unused-assignment.patch b/hw-loongarch-virt-Remove-unused-assignment.patch new file mode 100644 index 0000000000000000000000000000000000000000..37f7cd13f22d1f70fcfb076b6abe4a0a418657fd --- /dev/null +++ b/hw-loongarch-virt-Remove-unused-assignment.patch @@ -0,0 +1,59 @@ +From 5e0ec61ac98a025124912fc47552550b471ab638 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 12 Jun 2024 11:36:37 +0800 +Subject: [PATCH 32/78] hw/loongarch/virt: Remove unused assignment + +There is abuse usage about local variable gap. Remove +duplicated assignment and solve Coverity reported error. + +Resolves: Coverity CID 1546441 +Fixes: 3cc451cbce ("hw/loongarch: Refine fwcfg memory map") +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240612033637.167787-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 12816c6023..a7283e6755 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -1034,7 +1034,6 @@ static void fw_cfg_add_memory(MachineState *ms) + memmap_add_entry(base, gap, 1); + size -= gap; + base = VIRT_HIGHMEM_BASE; +- gap = ram_size - VIRT_LOWMEM_SIZE; + } + + if (size) { +@@ -1047,17 +1046,17 @@ static void fw_cfg_add_memory(MachineState *ms) + } + + /* add fw_cfg memory map of other nodes */ +- size = ram_size - numa_info[0].node_mem; +- gap = VIRT_LOWMEM_BASE + VIRT_LOWMEM_SIZE; +- if (base < gap && (base + size) > gap) { ++ if (numa_info[0].node_mem < gap && ram_size > gap) { + /* + * memory map for the maining nodes splited into two part +- * lowram: [base, +(gap - base)) +- * highram: [VIRT_HIGHMEM_BASE, +(size - (gap - base))) ++ * lowram: [base, +(gap - numa_info[0].node_mem)) ++ * highram: [VIRT_HIGHMEM_BASE, +(ram_size - gap)) + */ +- memmap_add_entry(base, gap - base, 1); +- size -= gap - base; ++ memmap_add_entry(base, gap - numa_info[0].node_mem, 1); ++ size = ram_size - gap; + base = VIRT_HIGHMEM_BASE; ++ } else { ++ size = ram_size - numa_info[0].node_mem; + } + + if (size) +-- +2.39.1 + diff --git a/hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch b/hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch new file mode 100644 index 0000000000000000000000000000000000000000..cd8ec42fdca10e12f964d76dbadf25294ac364b2 --- /dev/null +++ b/hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch @@ -0,0 +1,558 @@ +From 43100bba2bfd9de0c3bab7c3e815b02faa69242d Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 13 Dec 2023 12:13:14 +0800 +Subject: [PATCH] hw/loongarch/virt: Set iocsr address space per-board rather + than percpu + +LoongArch system has iocsr address space, most iocsr registers are +per-board, however some iocsr register spaces banked for percpu such +as ipi mailbox and extioi interrupt status. For banked iocsr space, +each cpu has the same iocsr space, but separate data. + +This patch changes iocsr address space per-board rather percpu, +for iocsr registers specified for cpu, MemTxAttrs.requester_id +can be parsed for the cpu. With this patches, the total address space +on board will be simple, only iocsr address space and system memory, +rather than the number of cpu and system memory. + +confict: + ++<<<<<<< HEAD + + .version_id = 1, + + .minimum_version_id = 1, + + .fields = (VMStateField[]) { + + VMSTATE_STRUCT(ipi_core, LoongArchIPI, 0, vmstate_ipi_core, IPICore), +++======= ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { ++ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchIPI, num_cpu, ++ vmstate_ipi_core, IPICore), +++>>>>>>> hw/loongarch/virt: Set iocsr address space per-board rather than percpu + +solve: +save: hw/loongarch/virt: Set iocsr address space per-board rather than percpu + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-3-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_extioi.c | 3 - + hw/intc/loongarch_ipi.c | 63 +++++++++++++++----- + hw/loongarch/virt.c | 91 +++++++++++++++++++++-------- + include/hw/intc/loongarch_extioi.h | 1 - + include/hw/intc/loongarch_ipi.h | 3 +- + include/hw/loongarch/virt.h | 3 + + target/loongarch/cpu.c | 48 --------------- + target/loongarch/cpu.h | 4 +- + target/loongarch/kvm/kvm.c | 2 +- + target/loongarch/tcg/iocsr_helper.c | 16 ++--- + 10 files changed, 129 insertions(+), 105 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index 24fb3af8cc..77b4776958 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -282,9 +282,6 @@ static void loongarch_extioi_instance_init(Object *obj) + qdev_init_gpio_in(DEVICE(obj), extioi_setirq, EXTIOI_IRQS); + + for (cpu = 0; cpu < EXTIOI_CPUS; cpu++) { +- memory_region_init_io(&s->extioi_iocsr_mem[cpu], OBJECT(s), &extioi_ops, +- s, "extioi_iocsr", 0x900); +- sysbus_init_mmio(dev, &s->extioi_iocsr_mem[cpu]); + for (pin = 0; pin < LS3A_INTC_IP; pin++) { + qdev_init_gpio_out(DEVICE(obj), &s->parent_irq[cpu][pin], 1); + } +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index 221246c5cb..e228669aa5 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -9,6 +9,7 @@ + #include "hw/sysbus.h" + #include "hw/intc/loongarch_ipi.h" + #include "hw/irq.h" ++#include "hw/qdev-properties.h" + #include "qapi/error.h" + #include "qemu/log.h" + #include "exec/address-spaces.h" +@@ -26,7 +27,7 @@ static MemTxResult loongarch_ipi_readl(void *opaque, hwaddr addr, + uint64_t ret = 0; + int index = 0; + +- s = &ipi->ipi_core; ++ s = &ipi->cpu[attrs.requester_id]; + addr &= 0xff; + switch (addr) { + case CORE_STATUS_OFF: +@@ -65,7 +66,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr, + * if the mask is 0, we need not to do anything. + */ + if ((val >> 27) & 0xf) { +- data = address_space_ldl(&env->address_space_iocsr, addr, ++ data = address_space_ldl(env->address_space_iocsr, addr, + attrs, NULL); + for (i = 0; i < 4; i++) { + /* get mask for byte writing */ +@@ -77,7 +78,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr, + + data &= mask; + data |= (val >> 32) & ~mask; +- address_space_stl(&env->address_space_iocsr, addr, ++ address_space_stl(env->address_space_iocsr, addr, + data, attrs, NULL); + } + +@@ -172,7 +173,7 @@ static MemTxResult loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, + uint8_t vector; + CPUState *cs; + +- s = &ipi->ipi_core; ++ s = &ipi->cpu[attrs.requester_id]; + addr &= 0xff; + trace_loongarch_ipi_write(size, (uint64_t)addr, val); + switch (addr) { +@@ -214,7 +215,6 @@ static MemTxResult loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, + + /* override requester_id */ + attrs.requester_id = cs->cpu_index; +- ipi = LOONGARCH_IPI(LOONGARCH_CPU(cs)->env.ipistate); + loongarch_ipi_writel(ipi, CORE_SET_OFF, BIT(vector), 4, attrs); + break; + default: +@@ -265,12 +265,18 @@ static const MemoryRegionOps loongarch_ipi64_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + +-static void loongarch_ipi_init(Object *obj) ++static void loongarch_ipi_realize(DeviceState *dev, Error **errp) + { +- LoongArchIPI *s = LOONGARCH_IPI(obj); +- SysBusDevice *sbd = SYS_BUS_DEVICE(obj); ++ LoongArchIPI *s = LOONGARCH_IPI(dev); ++ SysBusDevice *sbd = SYS_BUS_DEVICE(dev); ++ int i; ++ ++ if (s->num_cpu == 0) { ++ error_setg(errp, "num-cpu must be at least 1"); ++ return; ++ } + +- memory_region_init_io(&s->ipi_iocsr_mem, obj, &loongarch_ipi_ops, ++ memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev), &loongarch_ipi_ops, + s, "loongarch_ipi_iocsr", 0x48); + + /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ +@@ -278,10 +284,20 @@ static void loongarch_ipi_init(Object *obj) + + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem); + +- memory_region_init_io(&s->ipi64_iocsr_mem, obj, &loongarch_ipi64_ops, ++ memory_region_init_io(&s->ipi64_iocsr_mem, OBJECT(dev), ++ &loongarch_ipi64_ops, + s, "loongarch_ipi64_iocsr", 0x118); + sysbus_init_mmio(sbd, &s->ipi64_iocsr_mem); +- qdev_init_gpio_out(DEVICE(obj), &s->ipi_core.irq, 1); ++ ++ s->cpu = g_new0(IPICore, s->num_cpu); ++ if (s->cpu == NULL) { ++ error_setg(errp, "Memory allocation for ExtIOICore faile"); ++ return; ++ } ++ ++ for (i = 0; i < s->num_cpu; i++) { ++ qdev_init_gpio_out(dev, &s->cpu[i].irq, 1); ++ } + } + + static const VMStateDescription vmstate_ipi_core = { +@@ -300,27 +316,42 @@ static const VMStateDescription vmstate_ipi_core = { + + static const VMStateDescription vmstate_loongarch_ipi = { + .name = TYPE_LOONGARCH_IPI, +- .version_id = 1, +- .minimum_version_id = 1, +- .fields = (VMStateField[]) { +- VMSTATE_STRUCT(ipi_core, LoongArchIPI, 0, vmstate_ipi_core, IPICore), ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { ++ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchIPI, num_cpu, ++ vmstate_ipi_core, IPICore), + VMSTATE_END_OF_LIST() + } + }; + ++static Property ipi_properties[] = { ++ DEFINE_PROP_UINT32("num-cpu", LoongArchIPI, num_cpu, 1), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ + static void loongarch_ipi_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + ++ dc->realize = loongarch_ipi_realize; ++ device_class_set_props(dc, ipi_properties); + dc->vmsd = &vmstate_loongarch_ipi; + } + ++static void loongarch_ipi_finalize(Object *obj) ++{ ++ LoongArchIPI *s = LOONGARCH_IPI(obj); ++ ++ g_free(s->cpu); ++} ++ + static const TypeInfo loongarch_ipi_info = { + .name = TYPE_LOONGARCH_IPI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(LoongArchIPI), +- .instance_init = loongarch_ipi_init, + .class_init = loongarch_ipi_class_init, ++ .instance_finalize = loongarch_ipi_finalize, + }; + + static void loongarch_ipi_register_types(void) +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 4b7dc67a2d..13d19b6da3 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -535,9 +535,6 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + CPUState *cpu_state; + int cpu, pin, i, start, num; + +- extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); +- sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); +- + /* + * The connection of interrupts: + * +-----+ +---------+ +-------+ +@@ -559,36 +556,36 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + * | UARTs | | Devices | | Devices | + * +--------+ +---------+ +---------+ + */ ++ ++ /* Create IPI device */ ++ ipi = qdev_new(TYPE_LOONGARCH_IPI); ++ qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.cpus); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); ++ ++ /* IPI iocsr memory region */ ++ memory_region_add_subregion(&lams->system_iocsr, SMP_IPI_MAILBOX, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0)); ++ memory_region_add_subregion(&lams->system_iocsr, MAIL_SEND_ADDR, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); ++ + for (cpu = 0; cpu < ms->smp.cpus; cpu++) { + cpu_state = qemu_get_cpu(cpu); + cpudev = DEVICE(cpu_state); + lacpu = LOONGARCH_CPU(cpu_state); + env = &(lacpu->env); +- +- ipi = qdev_new(TYPE_LOONGARCH_IPI); +- sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); ++ env->address_space_iocsr = &lams->as_iocsr; + + /* connect ipi irq to cpu irq */ +- qdev_connect_gpio_out(ipi, 0, qdev_get_gpio_in(cpudev, IRQ_IPI)); +- /* IPI iocsr memory region */ +- memory_region_add_subregion(&env->system_iocsr, SMP_IPI_MAILBOX, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), +- 0)); +- memory_region_add_subregion(&env->system_iocsr, MAIL_SEND_ADDR, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), +- 1)); +- /* +- * extioi iocsr memory region +- * only one extioi is added on loongarch virt machine +- * external device interrupt can only be routed to cpu 0-3 +- */ +- if (cpu < EXTIOI_CPUS) +- memory_region_add_subregion(&env->system_iocsr, APIC_BASE, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), +- cpu)); ++ qdev_connect_gpio_out(ipi, cpu, qdev_get_gpio_in(cpudev, IRQ_IPI)); + env->ipistate = ipi; + } + ++ /* Create EXTIOI device */ ++ extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); ++ memory_region_add_subregion(&lams->system_iocsr, APIC_BASE, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); ++ + /* + * connect ext irq to the cpu irq + * cpu_pin[9:2] <= intc_pin[7:0] +@@ -733,6 +730,43 @@ static void loongarch_direct_kernel_boot(LoongArchMachineState *lams, + } + } + ++static void loongarch_qemu_write(void *opaque, hwaddr addr, ++ uint64_t val, unsigned size) ++{ ++} ++ ++static uint64_t loongarch_qemu_read(void *opaque, hwaddr addr, unsigned size) ++{ ++ switch (addr) { ++ case VERSION_REG: ++ return 0x11ULL; ++ case FEATURE_REG: ++ return 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | ++ 1ULL << IOCSRF_CSRIPI; ++ case VENDOR_REG: ++ return 0x6e6f73676e6f6f4cULL; /* "Loongson" */ ++ case CPUNAME_REG: ++ return 0x303030354133ULL; /* "3A5000" */ ++ case MISC_FUNC_REG: ++ return 1ULL << IOCSRM_EXTIOI_EN; ++ } ++ return 0ULL; ++} ++ ++static const MemoryRegionOps loongarch_qemu_ops = { ++ .read = loongarch_qemu_read, ++ .write = loongarch_qemu_write, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++ .valid = { ++ .min_access_size = 4, ++ .max_access_size = 8, ++ }, ++ .impl = { ++ .min_access_size = 8, ++ .max_access_size = 8, ++ }, ++}; ++ + static void loongarch_init(MachineState *machine) + { + LoongArchCPU *lacpu; +@@ -761,8 +795,17 @@ static void loongarch_init(MachineState *machine) + exit(1); + } + create_fdt(lams); +- /* Init CPUs */ + ++ /* Create IOCSR space */ ++ memory_region_init_io(&lams->system_iocsr, OBJECT(machine), NULL, ++ machine, "iocsr", UINT64_MAX); ++ address_space_init(&lams->as_iocsr, &lams->system_iocsr, "IOCSR"); ++ memory_region_init_io(&lams->iocsr_mem, OBJECT(machine), ++ &loongarch_qemu_ops, ++ machine, "iocsr_misc", 0x428); ++ memory_region_add_subregion(&lams->system_iocsr, 0, &lams->iocsr_mem); ++ ++ /* Init CPUs */ + possible_cpus = mc->possible_cpu_arch_ids(machine); + for (i = 0; i < possible_cpus->len; i++) { + cpu = cpu_create(machine->cpu_type); +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index fbdef9a7b3..110e5e8873 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -58,7 +58,6 @@ struct LoongArchExtIOI { + uint8_t sw_coremap[EXTIOI_IRQS]; + qemu_irq parent_irq[EXTIOI_CPUS][LS3A_INTC_IP]; + qemu_irq irq[EXTIOI_IRQS]; +- MemoryRegion extioi_iocsr_mem[EXTIOI_CPUS]; + MemoryRegion extioi_system_mem; + }; + #endif /* LOONGARCH_EXTIOI_H */ +diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h +index 6c6194786e..1c1e834849 100644 +--- a/include/hw/intc/loongarch_ipi.h ++++ b/include/hw/intc/loongarch_ipi.h +@@ -47,7 +47,8 @@ struct LoongArchIPI { + SysBusDevice parent_obj; + MemoryRegion ipi_iocsr_mem; + MemoryRegion ipi64_iocsr_mem; +- IPICore ipi_core; ++ uint32_t num_cpu; ++ IPICore *cpu; + }; + + #endif +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index db0831b471..6ef9a92394 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -50,6 +50,9 @@ struct LoongArchMachineState { + DeviceState *platform_bus_dev; + PCIBus *pci_bus; + PFlashCFI01 *flash; ++ MemoryRegion system_iocsr; ++ MemoryRegion iocsr_mem; ++ AddressSpace as_iocsr; + }; + + #define TYPE_LOONGARCH_MACHINE MACHINE_TYPE_NAME("virt") +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 413414392b..6611d137a1 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -602,47 +602,6 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error **errp) + lacc->parent_realize(dev, errp); + } + +-#ifndef CONFIG_USER_ONLY +-static void loongarch_qemu_write(void *opaque, hwaddr addr, +- uint64_t val, unsigned size) +-{ +- qemu_log_mask(LOG_UNIMP, "[%s]: Unimplemented reg 0x%" HWADDR_PRIx "\n", +- __func__, addr); +-} +- +-static uint64_t loongarch_qemu_read(void *opaque, hwaddr addr, unsigned size) +-{ +- switch (addr) { +- case VERSION_REG: +- return 0x11ULL; +- case FEATURE_REG: +- return 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | +- 1ULL << IOCSRF_CSRIPI; +- case VENDOR_REG: +- return 0x6e6f73676e6f6f4cULL; /* "Loongson" */ +- case CPUNAME_REG: +- return 0x303030354133ULL; /* "3A5000" */ +- case MISC_FUNC_REG: +- return 1ULL << IOCSRM_EXTIOI_EN; +- } +- return 0ULL; +-} +- +-static const MemoryRegionOps loongarch_qemu_ops = { +- .read = loongarch_qemu_read, +- .write = loongarch_qemu_write, +- .endianness = DEVICE_LITTLE_ENDIAN, +- .valid = { +- .min_access_size = 4, +- .max_access_size = 8, +- }, +- .impl = { +- .min_access_size = 8, +- .max_access_size = 8, +- }, +-}; +-#endif +- + static bool loongarch_get_lsx(Object *obj, Error **errp) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +@@ -713,19 +672,12 @@ static void loongarch_cpu_init(Object *obj) + { + #ifndef CONFIG_USER_ONLY + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +- CPULoongArchState *env = &cpu->env; + + qdev_init_gpio_in(DEVICE(cpu), loongarch_cpu_set_irq, N_IRQS); + #ifdef CONFIG_TCG + timer_init_ns(&cpu->timer, QEMU_CLOCK_VIRTUAL, + &loongarch_constant_timer_cb, cpu); + #endif +- memory_region_init_io(&env->system_iocsr, OBJECT(cpu), NULL, +- env, "iocsr", UINT64_MAX); +- address_space_init(&env->address_space_iocsr, &env->system_iocsr, "IOCSR"); +- memory_region_init_io(&env->iocsr_mem, OBJECT(cpu), &loongarch_qemu_ops, +- NULL, "iocsr_misc", 0x428); +- memory_region_add_subregion(&env->system_iocsr, 0, &env->iocsr_mem); + #endif + } + +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 8ebd6fa1a7..4aba8aba4c 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -355,9 +355,7 @@ typedef struct CPUArchState { + #ifndef CONFIG_USER_ONLY + LoongArchTLB tlb[LOONGARCH_TLB_MAX]; + +- AddressSpace address_space_iocsr; +- MemoryRegion system_iocsr; +- MemoryRegion iocsr_mem; ++ AddressSpace *address_space_iocsr; + bool load_elf; + uint64_t elf_address; + uint32_t mp_state; +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index bd33ec2114..84bcdf5f86 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -733,7 +733,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + trace_kvm_arch_handle_exit(run->exit_reason); + switch (run->exit_reason) { + case KVM_EXIT_LOONGARCH_IOCSR: +- address_space_rw(&env->address_space_iocsr, ++ address_space_rw(env->address_space_iocsr, + run->iocsr_io.phys_addr, + attrs, + run->iocsr_io.data, +diff --git a/target/loongarch/tcg/iocsr_helper.c b/target/loongarch/tcg/iocsr_helper.c +index 6cd01d5f09..b6916f53d2 100644 +--- a/target/loongarch/tcg/iocsr_helper.c ++++ b/target/loongarch/tcg/iocsr_helper.c +@@ -17,52 +17,52 @@ + + uint64_t helper_iocsrrd_b(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_ldub(&env->address_space_iocsr, r_addr, ++ return address_space_ldub(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + uint64_t helper_iocsrrd_h(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_lduw(&env->address_space_iocsr, r_addr, ++ return address_space_lduw(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + uint64_t helper_iocsrrd_w(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_ldl(&env->address_space_iocsr, r_addr, ++ return address_space_ldl(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + uint64_t helper_iocsrrd_d(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_ldq(&env->address_space_iocsr, r_addr, ++ return address_space_ldq(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_b(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stb(&env->address_space_iocsr, w_addr, ++ address_space_stb(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_h(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stw(&env->address_space_iocsr, w_addr, ++ address_space_stw(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_w(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stl(&env->address_space_iocsr, w_addr, ++ address_space_stl(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_d(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stq(&env->address_space_iocsr, w_addr, ++ address_space_stq(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } +-- +2.27.0 + diff --git a/hw-loongarch-virt-Update-the-ACPI-table-for-hotplug-.patch b/hw-loongarch-virt-Update-the-ACPI-table-for-hotplug-.patch new file mode 100644 index 0000000000000000000000000000000000000000..d765070c761981175ad9c4fb9f5592c0ad816391 --- /dev/null +++ b/hw-loongarch-virt-Update-the-ACPI-table-for-hotplug-.patch @@ -0,0 +1,143 @@ +From a3728999125cd9fc9e3e841b66a1677663933c27 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 23 Oct 2024 15:13:12 +0800 +Subject: [PATCH 71/78] hw/loongarch/virt: Update the ACPI table for hotplug + cpu + +On LoongArch virt machine, ACPI GED hardware is used for cpu +hotplug, here cpu hotplug support feature is added on GED device, +also cpu scan and reject method is added about CPU device in +DSDT table. + +Co-developed-by: Xianglai Li +Signed-off-by: Bibo Mao +Message-ID: <20241023071312.881866-4-maobibo@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/acpi-build.c | 35 +++++++++++++++++++++++++++++++++-- + hw/loongarch/virt.c | 10 ++++++++++ + include/hw/loongarch/virt.h | 1 + + 3 files changed, 44 insertions(+), 2 deletions(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index bcdec2e1cb..a54c5e0e70 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -47,6 +47,22 @@ + #define ACPI_BUILD_DPRINTF(fmt, ...) + #endif + ++static void virt_madt_cpu_entry(int uid, ++ const CPUArchIdList *apic_ids, ++ GArray *entry, bool force_enabled) ++{ ++ uint32_t flags, apic_id = apic_ids->cpus[uid].arch_id; ++ ++ flags = apic_ids->cpus[uid].cpu || force_enabled ? 1 /* Enabled */ : 0; ++ ++ /* Rev 1.0b, Table 5-13 Processor Local APIC Structure */ ++ build_append_int_noprefix(entry, 0, 1); /* Type */ ++ build_append_int_noprefix(entry, 8, 1); /* Length */ ++ build_append_int_noprefix(entry, uid, 1); /* ACPI Processor ID */ ++ build_append_int_noprefix(entry, apic_id, 1); /* APIC ID */ ++ build_append_int_noprefix(entry, flags, 4); /* Flags */ ++} ++ + /* build FADT */ + static void init_common_fadt_data(AcpiFadtData *data) + { +@@ -123,15 +139,17 @@ build_madt(GArray *table_data, BIOSLinker *linker, + build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ + + for (i = 0; i < arch_ids->len; i++) { ++ uint32_t flags; ++ + /* Processor Core Interrupt Controller Structure */ + arch_id = arch_ids->cpus[i].arch_id; +- ++ flags = arch_ids->cpus[i].cpu ? 1 : 0; + build_append_int_noprefix(table_data, 17, 1); /* Type */ + build_append_int_noprefix(table_data, 15, 1); /* Length */ + build_append_int_noprefix(table_data, 1, 1); /* Version */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor ID */ + build_append_int_noprefix(table_data, arch_id, 4); /* Core ID */ +- build_append_int_noprefix(table_data, 1, 4); /* Flags */ ++ build_append_int_noprefix(table_data, flags, 4); /* Flags */ + } + + /* Extend I/O Interrupt Controller Structure */ +@@ -334,6 +352,7 @@ build_la_ged_aml(Aml *dsdt, MachineState *machine) + { + uint32_t event; + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); ++ CPUHotplugFeatures opts; + + build_ged_aml(dsdt, "\\_SB."GED_DEVICE, + HOTPLUG_HANDLER(lvms->acpi_ged), +@@ -346,6 +365,18 @@ build_la_ged_aml(Aml *dsdt, MachineState *machine) + AML_SYSTEM_MEMORY, + VIRT_GED_MEM_ADDR); + } ++ ++ if (event & ACPI_GED_CPU_HOTPLUG_EVT) { ++ opts.acpi_1_compatible = false; ++ opts.has_legacy_cphp = false; ++ opts.fw_unplugs_cpu = false; ++ opts.smi_path = NULL; ++ ++ build_cpus_aml(dsdt, machine, opts, virt_madt_cpu_entry, NULL, ++ VIRT_GED_CPUHP_ADDR, "\\_SB", ++ NULL, AML_SYSTEM_MEMORY); ++ } ++ + acpi_dsdt_add_power_button(dsdt); + } + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index e7734ed3c0..6159fd9470 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -652,11 +652,17 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic, + { + DeviceState *dev; + MachineState *ms = MACHINE(lvms); ++ MachineClass *mc = MACHINE_GET_CLASS(lvms); + uint32_t event = ACPI_GED_PWR_DOWN_EVT; + + if (ms->ram_slots) { + event |= ACPI_GED_MEM_HOTPLUG_EVT; + } ++ ++ if (mc->has_hotpluggable_cpus) { ++ event |= ACPI_GED_CPU_HOTPLUG_EVT; ++ } ++ + dev = qdev_new(TYPE_ACPI_GED); + qdev_prop_set_uint32(dev, "ged-event", event); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); +@@ -668,6 +674,10 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic, + /* ged regs used for reset and power down */ + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, VIRT_GED_REG_ADDR); + ++ if (mc->has_hotpluggable_cpus) { ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, VIRT_GED_CPUHP_ADDR); ++ } ++ + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, + qdev_get_gpio_in(pch_pic, VIRT_SCI_IRQ - VIRT_GSI_BASE)); + return dev; +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 27c52af9f3..98c990327b 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -32,6 +32,7 @@ + #define VIRT_GED_EVT_ADDR 0x100e0000 + #define VIRT_GED_MEM_ADDR (VIRT_GED_EVT_ADDR + ACPI_GED_EVT_SEL_LEN) + #define VIRT_GED_REG_ADDR (VIRT_GED_MEM_ADDR + MEMORY_HOTPLUG_IO_LEN) ++#define VIRT_GED_CPUHP_ADDR (VIRT_GED_REG_ADDR + ACPI_GED_REG_COUNT) + + #define COMMAND_LINE_SIZE 512 + +-- +2.39.1 + diff --git a/hw-loongarch-virt-Use-MemTxAttrs-interface-for-misc-.patch b/hw-loongarch-virt-Use-MemTxAttrs-interface-for-misc-.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6ccf48617de2e5fd0f31333f36f1e73fb1573c0 --- /dev/null +++ b/hw-loongarch-virt-Use-MemTxAttrs-interface-for-misc-.patch @@ -0,0 +1,77 @@ +From b63b7b0b6c9bed8e1a316f3838aab7db2e8f2037 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 28 May 2024 16:38:54 +0800 +Subject: [PATCH 29/78] hw/loongarch/virt: Use MemTxAttrs interface for misc + ops + +Use MemTxAttrs interface read_with_attrs/write_with_attrs +for virt_iocsr_misc_ops. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240528083855.1912757-3-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index f7874bccf9..12816c6023 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -915,8 +915,8 @@ static void virt_firmware_init(LoongArchVirtMachineState *lvms) + } + + +-static MemTxResult loongarch_qemu_write(void *opaque, hwaddr addr, uint64_t val, +- unsigned size, MemTxAttrs attrs) ++static MemTxResult virt_iocsr_misc_write(void *opaque, hwaddr addr, uint64_t val, ++ unsigned size, MemTxAttrs attrs) + { + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque); + uint64_t features; +@@ -945,9 +945,9 @@ static MemTxResult loongarch_qemu_write(void *opaque, hwaddr addr, uint64_t val, + return MEMTX_OK; + } + +-static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, +- uint64_t *data, +- unsigned size, MemTxAttrs attrs) ++static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr, ++ uint64_t *data, ++ unsigned size, MemTxAttrs attrs) + { + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque); + uint64_t ret = 0; +@@ -962,7 +962,7 @@ static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, + if (kvm_enabled()) { + ret |= BIT(IOCSRF_VM); + } +- return ret; ++ break; + case VENDOR_REG: + ret = 0x6e6f73676e6f6f4cULL; /* "Loongson" */ + break; +@@ -986,6 +986,8 @@ static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, + ret |= BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE); + } + break; ++ default: ++ g_assert_not_reached(); + } + + *data = ret; +@@ -993,8 +995,8 @@ static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, + } + + static const MemoryRegionOps virt_iocsr_misc_ops = { +- .read_with_attrs = loongarch_qemu_read, +- .write_with_attrs = loongarch_qemu_write, ++ .read_with_attrs = virt_iocsr_misc_read, ++ .write_with_attrs = virt_iocsr_misc_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, +-- +2.39.1 + diff --git a/hw-loongarch-virt-pass-random-seed-to-fdt.patch b/hw-loongarch-virt-pass-random-seed-to-fdt.patch new file mode 100644 index 0000000000000000000000000000000000000000..acc6323f0ecde2714b6b82fe38cc789a69c779f4 --- /dev/null +++ b/hw-loongarch-virt-pass-random-seed-to-fdt.patch @@ -0,0 +1,66 @@ +From 573f3bec8137caf829457620380d794165c96a92 Mon Sep 17 00:00:00 2001 +From: "Jason A. Donenfeld" +Date: Thu, 5 Sep 2024 17:33:16 +0200 +Subject: [PATCH 36/78] hw/loongarch: virt: pass random seed to fdt + +If the FDT contains /chosen/rng-seed, then the Linux RNG will use it to +initialize early. Set this using the usual guest random number +generation function. + +This is the same procedure that's done in b91b6b5a2c ("hw/microblaze: +pass random seed to fdt"), e4b4f0b71c ("hw/riscv: virt: pass random seed +to fdt"), c6fe3e6b4c ("hw/openrisc: virt: pass random seed to fdt"), +67f7e426e5 ("hw/i386: pass RNG seed via setup_data entry"), c287941a4d +("hw/rx: pass random seed to fdt"), 5e19cc68fb ("hw/mips: boston: pass +random seed to fdt"), 6b23a67916 ("hw/nios2: virt: pass random seed to fdt") +c4b075318e ("hw/ppc: pass random seed to fdt"), and 5242876f37 +("hw/arm/virt: dt: add rng-seed property"). + +These earlier commits later were amended to rerandomize the RNG seed on +snapshot load, but the LoongArch code somehow already does that, despite +not having this patch here, presumably due to some lucky copy and +pasting. + +Signed-off-by: Jason A. Donenfeld +Reviewed-by: Song Gao +Message-Id: <20240905153316.2038769-1-Jason@zx2c4.com> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index a6e9309064..79b16953d2 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -49,6 +49,7 @@ + #include "hw/block/flash.h" + #include "hw/virtio/virtio-iommu.h" + #include "qemu/error-report.h" ++#include "qemu/guest-random.h" + + static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms) + { +@@ -304,6 +305,7 @@ static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, + static void create_fdt(LoongArchVirtMachineState *lvms) + { + MachineState *ms = MACHINE(lvms); ++ uint8_t rng_seed[32]; + + ms->fdt = create_device_tree(&lvms->fdt_size); + if (!ms->fdt) { +@@ -317,6 +319,10 @@ static void create_fdt(LoongArchVirtMachineState *lvms) + qemu_fdt_setprop_cell(ms->fdt, "/", "#address-cells", 0x2); + qemu_fdt_setprop_cell(ms->fdt, "/", "#size-cells", 0x2); + qemu_fdt_add_subnode(ms->fdt, "/chosen"); ++ ++ /* Pass seed to RNG */ ++ qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); ++ qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed", rng_seed, sizeof(rng_seed)); + } + + static void fdt_add_cpu_nodes(const LoongArchVirtMachineState *lvms) +-- +2.39.1 + diff --git a/hw-loongarch-virt-support-up-to-4-serial-ports.patch b/hw-loongarch-virt-support-up-to-4-serial-ports.patch new file mode 100644 index 0000000000000000000000000000000000000000..665c9ffbb7a7e2a71da54b8ca18d7bbb10ecef3f --- /dev/null +++ b/hw-loongarch-virt-support-up-to-4-serial-ports.patch @@ -0,0 +1,172 @@ +From 04895c794652c5da1ece0cad82741bed9aa8ad02 Mon Sep 17 00:00:00 2001 +From: "Jason A. Donenfeld" +Date: Sat, 7 Sep 2024 16:34:39 +0200 +Subject: [PATCH 35/78] hw/loongarch: virt: support up to 4 serial ports + +In order to support additional channels of communication using +`-serial`, add several serial ports, up to the standard 4 generally +supported by the 8250 driver. + +Fixed: https://lore.kernel.org/all/20240907143439.2792924-1-Jason@zx2c4.com/ + +Signed-off-by: Jason A. Donenfeld +Tested-by: Bibo Mao +[gaosong: ACPI uart need't reverse order] +Signed-off-by: Song Gao +Message-Id: <20240907143439.2792924-1-Jason@zx2c4.com> +Signed-off-by: Xianglai Li +--- + hw/loongarch/acpi-build.c | 23 +++++++++++++++-------- + hw/loongarch/virt.c | 27 +++++++++++++++++---------- + include/hw/pci-host/ls7a.h | 9 +++++---- + 3 files changed, 37 insertions(+), 22 deletions(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index 1a9d25fc51..33a92223d8 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -31,6 +31,7 @@ + + #include "hw/acpi/generic_event_device.h" + #include "hw/pci-host/gpex.h" ++#include "sysemu/sysemu.h" + #include "sysemu/tpm.h" + #include "hw/platform-bus.h" + #include "hw/acpi/aml-build.h" +@@ -252,23 +253,27 @@ struct AcpiBuildState { + MemoryRegion *linker_mr; + } AcpiBuildState; + +-static void build_uart_device_aml(Aml *table) ++static void build_uart_device_aml(Aml *table, int index) + { + Aml *dev; + Aml *crs; + Aml *pkg0, *pkg1, *pkg2; +- uint32_t uart_irq = VIRT_UART_IRQ; +- +- Aml *scope = aml_scope("_SB"); +- dev = aml_device("COMA"); ++ Aml *scope; ++ uint32_t uart_irq; ++ uint64_t base; ++ ++ uart_irq = VIRT_UART_IRQ + index; ++ base = VIRT_UART_BASE + index * VIRT_UART_SIZE; ++ scope = aml_scope("_SB"); ++ dev = aml_device("COM%d", index); + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501"))); +- aml_append(dev, aml_name_decl("_UID", aml_int(0))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(index))); + aml_append(dev, aml_name_decl("_CCA", aml_int(1))); + crs = aml_resource_template(); + aml_append(crs, + aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, + AML_NON_CACHEABLE, AML_READ_WRITE, +- 0, VIRT_UART_BASE, VIRT_UART_BASE + VIRT_UART_SIZE - 1, ++ 0, base, base + VIRT_UART_SIZE - 1, + 0, VIRT_UART_SIZE)); + aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH, + AML_SHARED, &uart_irq, 1)); +@@ -401,6 +406,7 @@ static void acpi_dsdt_add_tpm(Aml *scope, LoongArchVirtMachineState *vms) + static void + build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine) + { ++ int i; + Aml *dsdt, *scope, *pkg; + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine); + AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = lvms->oem_id, +@@ -408,7 +414,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine) + + acpi_table_begin(&table, table_data); + dsdt = init_aml_allocator(); +- build_uart_device_aml(dsdt); ++ for (i = 0; i < VIRT_UART_COUNT; i++) ++ build_uart_device_aml(dsdt, i); + build_pci_device_aml(dsdt, lvms); + build_la_ged_aml(dsdt, machine); + build_flash_aml(dsdt, lvms); +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index a7283e6755..a6e9309064 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -281,10 +281,10 @@ static void fdt_add_rtc_node(LoongArchVirtMachineState *lvms, + } + + static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, +- uint32_t *pch_pic_phandle) ++ uint32_t *pch_pic_phandle, hwaddr base, ++ int irq, bool chosen) + { + char *nodename; +- hwaddr base = VIRT_UART_BASE; + hwaddr size = VIRT_UART_SIZE; + MachineState *ms = MACHINE(lvms); + +@@ -293,9 +293,9 @@ static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, + qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "ns16550a"); + qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0x0, base, 0x0, size); + qemu_fdt_setprop_cell(ms->fdt, nodename, "clock-frequency", 100000000); +- qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); +- qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", +- VIRT_UART_IRQ - VIRT_GSI_BASE, 0x4); ++ if (chosen) ++ qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, 0x4); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + *pch_pic_phandle); + g_free(nodename); +@@ -706,11 +706,18 @@ static void virt_devices_init(DeviceState *pch_pic, + /* Add pcie node */ + fdt_add_pcie_node(lvms, pch_pic_phandle, pch_msi_phandle); + +- serial_mm_init(get_system_memory(), VIRT_UART_BASE, 0, +- qdev_get_gpio_in(pch_pic, +- VIRT_UART_IRQ - VIRT_GSI_BASE), +- 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); +- fdt_add_uart_node(lvms, pch_pic_phandle); ++ /* ++ * Create uart fdt node in reverse order so that they appear ++ * in the finished device tree lowest address first ++ */ ++ for (i = VIRT_UART_COUNT; i --> 0;) { ++ hwaddr base = VIRT_UART_BASE + i * VIRT_UART_SIZE; ++ int irq = VIRT_UART_IRQ + i - VIRT_GSI_BASE; ++ serial_mm_init(get_system_memory(), base, 0, ++ qdev_get_gpio_in(pch_pic, irq), ++ 115200, serial_hd(i), DEVICE_LITTLE_ENDIAN); ++ fdt_add_uart_node(lvms, pch_pic_phandle, base, irq, i == 0); ++ } + + /* Network init */ + for (i = 0; i < nb_nics; i++) { +diff --git a/include/hw/pci-host/ls7a.h b/include/hw/pci-host/ls7a.h +index cd7c9ec7bc..79d4ea8501 100644 +--- a/include/hw/pci-host/ls7a.h ++++ b/include/hw/pci-host/ls7a.h +@@ -36,17 +36,18 @@ + #define VIRT_PCH_PIC_IRQ_NUM 32 + #define VIRT_GSI_BASE 64 + #define VIRT_DEVICE_IRQS 16 ++#define VIRT_UART_COUNT 4 + #define VIRT_UART_IRQ (VIRT_GSI_BASE + 2) + #define VIRT_UART_BASE 0x1fe001e0 +-#define VIRT_UART_SIZE 0X100 +-#define VIRT_RTC_IRQ (VIRT_GSI_BASE + 3) ++#define VIRT_UART_SIZE 0x100 ++#define VIRT_RTC_IRQ (VIRT_GSI_BASE + 6) + #define VIRT_MISC_REG_BASE (VIRT_PCH_REG_BASE + 0x00080000) + #define VIRT_RTC_REG_BASE (VIRT_MISC_REG_BASE + 0x00050100) + #define VIRT_RTC_LEN 0x100 +-#define VIRT_SCI_IRQ (VIRT_GSI_BASE + 4) ++#define VIRT_SCI_IRQ (VIRT_GSI_BASE + 7) + + #define VIRT_PLATFORM_BUS_BASEADDRESS 0x16000000 + #define VIRT_PLATFORM_BUS_SIZE 0x2000000 + #define VIRT_PLATFORM_BUS_NUM_IRQS 2 +-#define VIRT_PLATFORM_BUS_IRQ (VIRT_GSI_BASE + 5) ++#define VIRT_PLATFORM_BUS_IRQ (VIRT_GSI_BASE + 8) + #endif +-- +2.39.1 + diff --git a/hw-misc-aspeed_hace-Fix-buffer-overflow-in-has_paddi.patch b/hw-misc-aspeed_hace-Fix-buffer-overflow-in-has_paddi.patch new file mode 100644 index 0000000000000000000000000000000000000000..379be17fe7897ccc8d737622e486d4a466349d8a --- /dev/null +++ b/hw-misc-aspeed_hace-Fix-buffer-overflow-in-has_paddi.patch @@ -0,0 +1,50 @@ +From 7e1bd6e7e109c6228bc4c40ea6f2af2d7f281fca Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Tue, 8 Apr 2025 05:59:29 -0400 +Subject: [PATCH] hw/misc/aspeed_hace: Fix buffer overflow in has_padding + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 78877b2e06464f49f777e086845e094ea7bc82ef + +The maximum padding size is either 64 or 128 bytes and should always be smaller +than "req_len". If "padding_size" exceeds "req_len", then +"req_len - padding_size" underflows due to "uint32_t" data type, leading to a +large incorrect value (e.g., `0xFFXXXXXX`). This causes an out-of-bounds memory +access, potentially leading to a buffer overflow. + +Added a check to ensure "padding_size" does not exceed "req_len" before +computing "pad_offset". This prevents "req_len - padding_size" from underflowing +and avoids accessing invalid memory. + +Signed-off-by: Jamin Lin +Reviewed-by: Cédric Le Goater +Fixes: 5cd7d8564a8b563da724b9e6264c967f0a091afa ("aspeed/hace: Support AST2600 HACE ") +Link: https://lore.kernel.org/qemu-devel/20250321092623.2097234-3-jamin_lin@aspeedtech.com +Signed-off-by: Cédric Le Goater +Signed-off-by: qihao_yewu +--- + hw/misc/aspeed_hace.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c +index b07506ec04..8706e3d376 100644 +--- a/hw/misc/aspeed_hace.c ++++ b/hw/misc/aspeed_hace.c +@@ -123,6 +123,11 @@ static bool has_padding(AspeedHACEState *s, struct iovec *iov, + if (*total_msg_len <= s->total_req_len) { + uint32_t padding_size = s->total_req_len - *total_msg_len; + uint8_t *padding = iov->iov_base; ++ ++ if (padding_size > req_len) { ++ return false; ++ } ++ + *pad_offset = req_len - padding_size; + if (padding[*pad_offset] == 0x80) { + return true; +-- +2.41.0.windows.1 + diff --git a/hw-misc-bcm2835_property-Fix-handling-of-FRAMEBUFFER.patch b/hw-misc-bcm2835_property-Fix-handling-of-FRAMEBUFFER.patch new file mode 100644 index 0000000000000000000000000000000000000000..941afc2872dc36978e264c1b129f9c219fc76878 --- /dev/null +++ b/hw-misc-bcm2835_property-Fix-handling-of-FRAMEBUFFER.patch @@ -0,0 +1,92 @@ +From 93959a5378f57190fb79dd1ccdefb8d8cd095b58 Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 10:29:32 +0800 +Subject: [PATCH] hw/misc/bcm2835_property: Fix handling of + FRAMEBUFFER_SET_PALETTE +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry picked from commit 0892fffc2abaadfb5d8b79bb0250ae1794862560 + +The documentation of the "Set palette" mailbox property at +https://github.com/raspberrypi/firmware/wiki/Mailbox-property-interface#set-palette +says it has the form: + + Length: 24..1032 + Value: + u32: offset: first palette index to set (0-255) + u32: length: number of palette entries to set (1-256) + u32...: RGBA palette values (offset to offset+length-1) + +We get this wrong in a couple of ways: + * we aren't checking the offset and length are in range, so the guest + can make us spin for a long time by providing a large length + * the bounds check on our loop is wrong: we should iterate through + 'length' palette entries, not 'length - offset' entries + +Fix the loop to implement the bounds checks and get the loop +condition right. In the process, make the variables local to +this switch case, rather than function-global, so it's clearer +what type they are when reading the code. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20240723131029.1159908-2-peter.maydell@linaro.org +Signed-off-by: Gao Jiazhen +--- + hw/misc/bcm2835_property.c | 27 ++++++++++++++++----------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c +index ff55a4e2cd..12a1bc558a 100644 +--- a/hw/misc/bcm2835_property.c ++++ b/hw/misc/bcm2835_property.c +@@ -28,8 +28,6 @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) + uint32_t tot_len; + size_t resplen; + uint32_t tmp; +- int n; +- uint32_t offset, length, color; + + /* + * Copy the current state of the framebuffer config; we will update +@@ -264,18 +262,25 @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) + resplen = 16; + break; + case RPI_FWREQ_FRAMEBUFFER_SET_PALETTE: +- offset = ldl_le_phys(&s->dma_as, value + 12); +- length = ldl_le_phys(&s->dma_as, value + 16); +- n = 0; +- while (n < length - offset) { +- color = ldl_le_phys(&s->dma_as, value + 20 + (n << 2)); +- stl_le_phys(&s->dma_as, +- s->fbdev->vcram_base + ((offset + n) << 2), color); +- n++; ++ { ++ uint32_t offset = ldl_le_phys(&s->dma_as, value + 12); ++ uint32_t length = ldl_le_phys(&s->dma_as, value + 16); ++ int resp; ++ ++ if (offset > 255 || length < 1 || length > 256) { ++ resp = 1; /* invalid request */ ++ } else { ++ for (uint32_t e = 0; e < length; e++) { ++ uint32_t color = ldl_le_phys(&s->dma_as, value + 20 + (e << 2)); ++ stl_le_phys(&s->dma_as, ++ s->fbdev->vcram_base + ((offset + e) << 2), color); ++ } ++ resp = 0; + } +- stl_le_phys(&s->dma_as, value + 12, 0); ++ stl_le_phys(&s->dma_as, value + 12, resp); + resplen = 4; + break; ++ } + case RPI_FWREQ_FRAMEBUFFER_GET_NUM_DISPLAYS: + stl_le_phys(&s->dma_as, value + 12, 1); + resplen = 4; +-- +2.41.0.windows.1 + diff --git a/hw-misc-mos6522-Fix-bad-class-definition-of-the-MOS6.patch b/hw-misc-mos6522-Fix-bad-class-definition-of-the-MOS6.patch new file mode 100644 index 0000000000000000000000000000000000000000..6177a5c4132b9cfa01ef5d27a023ece9fad34892 --- /dev/null +++ b/hw-misc-mos6522-Fix-bad-class-definition-of-the-MOS6.patch @@ -0,0 +1,49 @@ +From f0be5a2c99d2f893a27839cd5eb5fa74f3ff5564 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 18 Nov 2024 21:03:55 -0500 +Subject: [PATCH] hw/misc/mos6522: Fix bad class definition of the MOS6522 + device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from c3d7c18b0d616cf7fb3c1f325503e1462307209d + +When compiling QEMU with --enable-cfi, the "q800" m68k machine +currently crashes very early, when the q800_machine_init() function +tries to wire the interrupts of the "via1" device. +This happens because TYPE_MOS6522_Q800_VIA1 is supposed to be a +proper SysBus device, but its parent (TYPE_MOS6522) has a mistake +in its class definition where it is only derived from DeviceClass, +and not from SysBusDeviceClass, so we end up in funny memory access +issues here. Using the right class hierarchy for the MOS6522 device +fixes the problem. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2675 +Signed-off-by: Thomas Huth +Fixes: 51f233ec92 ("misc: introduce new mos6522 VIA device") +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Mark Cave-Ayland +Message-ID: <20241114104653.963812-1-thuth@redhat.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + include/hw/misc/mos6522.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/misc/mos6522.h b/include/hw/misc/mos6522.h +index fba45668ab..920871a598 100644 +--- a/include/hw/misc/mos6522.h ++++ b/include/hw/misc/mos6522.h +@@ -154,7 +154,7 @@ struct MOS6522State { + OBJECT_DECLARE_TYPE(MOS6522State, MOS6522DeviceClass, MOS6522) + + struct MOS6522DeviceClass { +- DeviceClass parent_class; ++ SysBusDeviceClass parent_class; + + ResettablePhases parent_phases; + void (*portB_write)(MOS6522State *dev); +-- +2.41.0.windows.1 + diff --git a/hw-misc-nrf51_rng-Don-t-use-BIT_MASK-when-we-mean-BI.patch b/hw-misc-nrf51_rng-Don-t-use-BIT_MASK-when-we-mean-BI.patch new file mode 100644 index 0000000000000000000000000000000000000000..49205716cd2f160707fb047ef164441db51c79ca --- /dev/null +++ b/hw-misc-nrf51_rng-Don-t-use-BIT_MASK-when-we-mean-BI.patch @@ -0,0 +1,70 @@ +From e6b4460566522f1a9d608217bcb1534bf6709cab Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 12 Dec 2024 12:16:01 +0800 +Subject: [PATCH] hw/misc/nrf51_rng: Don't use BIT_MASK() when we mean BIT() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from a29a9776407e68c5560687e07828925bda710150 + +The BIT_MASK() macro from bitops.h provides the mask of a bit +within a particular word of a multi-word bit array; it is intended +to be used with its counterpart BIT_WORD() that gives the index +of the word in the array. + +In nrf51_rng we are using it for cases where we have a bit number +that we know is the index of a bit within a single word (in fact, it +happens that all the bit numbers we pass to it are zero). This +happens to give the right answer, but the macro that actually +does the job we want here is BIT(). + +Use BIT() instead of BIT_MASK(). + +Signed-off-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20241108135644.4007151-1-peter.maydell@linaro.org> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Jiao +--- + hw/misc/nrf51_rng.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/hw/misc/nrf51_rng.c b/hw/misc/nrf51_rng.c +index fc86e1b697..e911b3a3a3 100644 +--- a/hw/misc/nrf51_rng.c ++++ b/hw/misc/nrf51_rng.c +@@ -107,25 +107,25 @@ static void rng_write(void *opaque, hwaddr offset, + break; + case NRF51_RNG_REG_SHORTS: + s->shortcut_stop_on_valrdy = +- (value & BIT_MASK(NRF51_RNG_REG_SHORTS_VALRDY_STOP)) ? 1 : 0; ++ (value & BIT(NRF51_RNG_REG_SHORTS_VALRDY_STOP)) ? 1 : 0; + break; + case NRF51_RNG_REG_INTEN: + s->interrupt_enabled = +- (value & BIT_MASK(NRF51_RNG_REG_INTEN_VALRDY)) ? 1 : 0; ++ (value & BIT(NRF51_RNG_REG_INTEN_VALRDY)) ? 1 : 0; + break; + case NRF51_RNG_REG_INTENSET: +- if (value & BIT_MASK(NRF51_RNG_REG_INTEN_VALRDY)) { ++ if (value & BIT(NRF51_RNG_REG_INTEN_VALRDY)) { + s->interrupt_enabled = 1; + } + break; + case NRF51_RNG_REG_INTENCLR: +- if (value & BIT_MASK(NRF51_RNG_REG_INTEN_VALRDY)) { ++ if (value & BIT(NRF51_RNG_REG_INTEN_VALRDY)) { + s->interrupt_enabled = 0; + } + break; + case NRF51_RNG_REG_CONFIG: + s->filter_enabled = +- (value & BIT_MASK(NRF51_RNG_REG_CONFIG_DECEN)) ? 1 : 0; ++ (value & BIT(NRF51_RNG_REG_CONFIG_DECEN)) ? 1 : 0; + break; + + default: +-- +2.41.0.windows.1 + diff --git a/hw-misc-psp-Pin-the-hugepage-memory-specified-by-mem.patch b/hw-misc-psp-Pin-the-hugepage-memory-specified-by-mem.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf29af81203e7e0c9bb6a198cbd9ed14eaf036ca --- /dev/null +++ b/hw-misc-psp-Pin-the-hugepage-memory-specified-by-mem.patch @@ -0,0 +1,212 @@ +From ddaa38853d386e5b9f9fa1c3813048872c8ad687 Mon Sep 17 00:00:00 2001 +From: niuyongwen +Date: Sun, 29 Sep 2024 09:45:15 +0800 +Subject: [PATCH] hw/misc/psp: Pin the hugepage memory specified by mem2 during + use for psp + +Signed-off-by: niuyongwen +--- + hw/misc/psp.c | 138 +++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 121 insertions(+), 17 deletions(-) + +diff --git a/hw/misc/psp.c b/hw/misc/psp.c +index 4eb5ca0e0b..03e8663027 100644 +--- a/hw/misc/psp.c ++++ b/hw/misc/psp.c +@@ -17,6 +17,7 @@ + #include "sysemu/runstate.h" + #include "exec/memory.h" + #include "exec/address-spaces.h" ++#include "exec/ramblock.h" + #include "hw/i386/e820_memory_layout.h" + #include + +@@ -38,6 +39,8 @@ struct PSPDevState { + * the TKM module uses different key spaces based on different vids. + */ + uint32_t vid; ++ /* pinned hugepage numbers */ ++ int hp_num; + }; + + #define PSP_DEV_PATH "/dev/hygon_psp_config" +@@ -45,6 +48,8 @@ struct PSPDevState { + #define PSP_IOC_MUTEX_ENABLE _IOWR(HYGON_PSP_IOC_TYPE, 1, NULL) + #define PSP_IOC_MUTEX_DISABLE _IOWR(HYGON_PSP_IOC_TYPE, 2, NULL) + #define PSP_IOC_VPSP_OPT _IOWR(HYGON_PSP_IOC_TYPE, 3, NULL) ++#define PSP_IOC_PIN_USER_PAGE _IOWR(HYGON_PSP_IOC_TYPE, 4, NULL) ++#define PSP_IOC_UNPIN_USER_PAGE _IOWR(HYGON_PSP_IOC_TYPE, 5, NULL) + + enum VPSP_DEV_CTRL_OPCODE { + VPSP_OP_VID_ADD, +@@ -69,6 +74,109 @@ struct psp_dev_ctrl { + } __attribute__ ((packed)) data; + }; + ++static MemoryRegion *find_memory_region_by_name(MemoryRegion *root, const char *name) { ++ MemoryRegion *subregion; ++ MemoryRegion *result; ++ ++ if (strcmp(root->name, name) == 0) ++ return root; ++ ++ QTAILQ_FOREACH(subregion, &root->subregions, subregions_link) { ++ result = find_memory_region_by_name(subregion, name); ++ if (result) { ++ return result; ++ } ++ } ++ ++ return NULL; ++} ++ ++static int pin_user_hugepage(int fd, uint64_t vaddr) ++{ ++ int ret; ++ ++ ret = ioctl(fd, PSP_IOC_PIN_USER_PAGE, vaddr); ++ /* 22: Invalid argument, some old kernel doesn't support this ioctl command */ ++ if (ret != 0 && errno == EINVAL) { ++ ret = 0; ++ } ++ return ret; ++} ++ ++static int unpin_user_hugepage(int fd, uint64_t vaddr) ++{ ++ int ret; ++ ++ ret = ioctl(fd, PSP_IOC_UNPIN_USER_PAGE, vaddr); ++ /* 22: Invalid argument, some old kernel doesn't support this ioctl command */ ++ if (ret != 0 && errno == EINVAL) { ++ ret = 0; ++ } ++ return ret; ++} ++ ++static int pin_psp_user_hugepages(struct PSPDevState *state, MemoryRegion *root) ++{ ++ int ret = 0; ++ char mr_name[128] = {0}; ++ int i, pinned_num; ++ MemoryRegion *find_mr = NULL; ++ ++ for (i = 0 ; i < state->hp_num; ++i) { ++ sprintf(mr_name, "mem2-%d", i); ++ find_mr = find_memory_region_by_name(root, mr_name); ++ if (!find_mr) { ++ error_report("fail to find memory region by name %s.", mr_name); ++ ret = -ENOMEM; ++ goto end; ++ } ++ ++ ret = pin_user_hugepage(state->dev_fd, (uint64_t)find_mr->ram_block->host); ++ if (ret) { ++ error_report("fail to pin_user_hugepage, ret: %d.", ret); ++ goto end; ++ } ++ } ++end: ++ if (ret) { ++ pinned_num = i; ++ for (i = 0 ; i < pinned_num; ++i) { ++ sprintf(mr_name, "mem2-%d", i); ++ find_mr = find_memory_region_by_name(root, mr_name); ++ if (!find_mr) { ++ continue; ++ } ++ unpin_user_hugepage(state->dev_fd, (uint64_t)find_mr->ram_block->host); ++ } ++ ++ } ++ return ret; ++} ++ ++static int unpin_psp_user_hugepages(struct PSPDevState *state, MemoryRegion *root) ++{ ++ int ret = 0; ++ char mr_name[128] = {0}; ++ int i; ++ MemoryRegion *find_mr = NULL; ++ ++ for (i = 0 ; i < state->hp_num; ++i) { ++ sprintf(mr_name, "mem2-%d", i); ++ find_mr = find_memory_region_by_name(root, mr_name); ++ if (!find_mr) { ++ continue; ++ } ++ ++ ret = unpin_user_hugepage(state->dev_fd, (uint64_t)find_mr->ram_block->host); ++ if (ret) { ++ error_report("fail to unpin_user_hugepage, ret: %d.", ret); ++ goto end; ++ } ++ } ++end: ++ return ret; ++} ++ + static void psp_dev_destroy(PSPDevState *state) + { + struct psp_dev_ctrl ctrl = { 0 }; +@@ -77,6 +185,11 @@ static void psp_dev_destroy(PSPDevState *state) + ctrl.op = VPSP_OP_VID_DEL; + if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { + error_report("VPSP_OP_VID_DEL: %d", -errno); ++ } ++ ++ /* Unpin hugepage memory */ ++ if (unpin_psp_user_hugepages(state, get_system_memory())) { ++ error_report("unpin_psp_user_hugepages failed"); + } else { + state->enabled = false; + } +@@ -99,23 +212,6 @@ static void psp_dev_shutdown_notify(Notifier *notifier, void *data) + psp_dev_destroy(state); + } + +-static MemoryRegion *find_memory_region_by_name(MemoryRegion *root, const char *name) { +- MemoryRegion *subregion; +- MemoryRegion *result; +- +- if (strcmp(root->name, name) == 0) +- return root; +- +- QTAILQ_FOREACH(subregion, &root->subregions, subregions_link) { +- result = find_memory_region_by_name(subregion, name); +- if (result) { +- return result; +- } +- } +- +- return NULL; +-} +- + static void psp_dev_realize(DeviceState *dev, Error **errp) + { + int i; +@@ -150,6 +246,8 @@ static void psp_dev_realize(DeviceState *dev, Error **errp) + ram2_end = find_mr->addr + find_mr->size - 1; + } + ++ state->hp_num = i; ++ + if (ram2_start != ram2_end) { + ctrl.op = VPSP_OP_SET_GPA; + ctrl.data.gpa.gpa_start = ram2_start; +@@ -159,6 +257,12 @@ static void psp_dev_realize(DeviceState *dev, Error **errp) + ram2_start, ram2_end, -errno); + goto del_vid; + } ++ ++ /* Pin hugepage memory */ ++ if(pin_psp_user_hugepages(state, root_mr)) { ++ error_setg(errp, "pin_psp_user_hugepages failed."); ++ goto del_vid; ++ } + } + + state->enabled = true; +-- +2.41.0.windows.1 + diff --git a/hw-misc-support-tkm-use-mem2-memory.patch b/hw-misc-support-tkm-use-mem2-memory.patch new file mode 100644 index 0000000000000000000000000000000000000000..eb9329b69776e54184de0f9a809385d1ada32afa --- /dev/null +++ b/hw-misc-support-tkm-use-mem2-memory.patch @@ -0,0 +1,123 @@ +From 884c4d6bc101454f0e0f3c779bc1155024b056c3 Mon Sep 17 00:00:00 2001 +From: xiongmengbiao +Date: Wed, 29 May 2024 15:18:55 +0800 +Subject: [PATCH] hw/misc: support tkm use mem2 memory + +Signed-off-by: xiongmengbiao +--- + hw/misc/psp.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 63 insertions(+), 1 deletion(-) + +diff --git a/hw/misc/psp.c b/hw/misc/psp.c +index 6ff2ceec10..4eb5ca0e0b 100644 +--- a/hw/misc/psp.c ++++ b/hw/misc/psp.c +@@ -15,6 +15,9 @@ + #include "migration/vmstate.h" + #include "hw/qdev-properties.h" + #include "sysemu/runstate.h" ++#include "exec/memory.h" ++#include "exec/address-spaces.h" ++#include "hw/i386/e820_memory_layout.h" + #include + + #define TYPE_PSP_DEV "psp" +@@ -46,14 +49,24 @@ struct PSPDevState { + enum VPSP_DEV_CTRL_OPCODE { + VPSP_OP_VID_ADD, + VPSP_OP_VID_DEL, ++ VPSP_OP_SET_DEFAULT_VID_PERMISSION, ++ VPSP_OP_GET_DEFAULT_VID_PERMISSION, ++ VPSP_OP_SET_GPA, + }; + + struct psp_dev_ctrl { + unsigned char op; ++ unsigned char resv[3]; + union { + unsigned int vid; ++ // Set or check the permissions for the default VID ++ unsigned int def_vid_perm; ++ struct { ++ uint64_t gpa_start; ++ uint64_t gpa_end; ++ } gpa; + unsigned char reserved[128]; +- } data; ++ } __attribute__ ((packed)) data; + }; + + static void psp_dev_destroy(PSPDevState *state) +@@ -86,10 +99,32 @@ static void psp_dev_shutdown_notify(Notifier *notifier, void *data) + psp_dev_destroy(state); + } + ++static MemoryRegion *find_memory_region_by_name(MemoryRegion *root, const char *name) { ++ MemoryRegion *subregion; ++ MemoryRegion *result; ++ ++ if (strcmp(root->name, name) == 0) ++ return root; ++ ++ QTAILQ_FOREACH(subregion, &root->subregions, subregions_link) { ++ result = find_memory_region_by_name(subregion, name); ++ if (result) { ++ return result; ++ } ++ } ++ ++ return NULL; ++} ++ + static void psp_dev_realize(DeviceState *dev, Error **errp) + { ++ int i; ++ char mr_name[128] = {0}; + struct psp_dev_ctrl ctrl = { 0 }; + PSPDevState *state = PSP_DEV(dev); ++ MemoryRegion *root_mr = get_system_memory(); ++ MemoryRegion *find_mr = NULL; ++ uint64_t ram2_start = 0, ram2_end = 0; + + state->dev_fd = qemu_open_old(PSP_DEV_PATH, O_RDWR); + if (state->dev_fd < 0) { +@@ -104,9 +139,36 @@ static void psp_dev_realize(DeviceState *dev, Error **errp) + goto end; + } + ++ for (i = 0 ;; ++i) { ++ sprintf(mr_name, "mem2-%d", i); ++ find_mr = find_memory_region_by_name(root_mr, mr_name); ++ if (!find_mr) ++ break; ++ ++ if (!ram2_start) ++ ram2_start = find_mr->addr; ++ ram2_end = find_mr->addr + find_mr->size - 1; ++ } ++ ++ if (ram2_start != ram2_end) { ++ ctrl.op = VPSP_OP_SET_GPA; ++ ctrl.data.gpa.gpa_start = ram2_start; ++ ctrl.data.gpa.gpa_end = ram2_end; ++ if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { ++ error_setg(errp, "psp_dev_realize VPSP_OP_SET_GPA (start 0x%lx, end 0x%lx), return %d", ++ ram2_start, ram2_end, -errno); ++ goto del_vid; ++ } ++ } ++ + state->enabled = true; + state->shutdown_notifier.notify = psp_dev_shutdown_notify; + qemu_register_shutdown_notifier(&state->shutdown_notifier); ++ ++ return; ++del_vid: ++ ctrl.op = VPSP_OP_VID_DEL; ++ ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl); + end: + return; + } +-- +2.41.0.windows.1 + diff --git a/hw-misc-support-vpsp.patch b/hw-misc-support-vpsp.patch new file mode 100644 index 0000000000000000000000000000000000000000..21984f5d2523c2d013f78c5ab0bf9cb21f754241 --- /dev/null +++ b/hw-misc-support-vpsp.patch @@ -0,0 +1,190 @@ +From f74cee44cd57da213a790f7711a68da0f4de061a Mon Sep 17 00:00:00 2001 +From: xiongmengbiao +Date: Thu, 30 Nov 2023 13:47:21 +0800 +Subject: [PATCH] hw/misc: support vpsp + +simulate a psp misc device for support tkm's key isolation + +Signed-off-by: xiongmengbiao +--- + hw/misc/Kconfig | 4 ++ + hw/misc/meson.build | 1 + + hw/misc/psp.c | 141 ++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 146 insertions(+) + create mode 100644 hw/misc/psp.c + +diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig +index cc8a8c1418..2ea5c68eb5 100644 +--- a/hw/misc/Kconfig ++++ b/hw/misc/Kconfig +@@ -200,4 +200,8 @@ config IOSB + config XLNX_VERSAL_TRNG + bool + ++config PSP_DEV ++ bool ++ default y ++ + source macio/Kconfig +diff --git a/hw/misc/meson.build b/hw/misc/meson.build +index 36c20d5637..28cba0ac28 100644 +--- a/hw/misc/meson.build ++++ b/hw/misc/meson.build +@@ -9,6 +9,7 @@ system_ss.add(when: 'CONFIG_UNIMP', if_true: files('unimp.c')) + system_ss.add(when: 'CONFIG_EMPTY_SLOT', if_true: files('empty_slot.c')) + system_ss.add(when: 'CONFIG_LED', if_true: files('led.c')) + system_ss.add(when: 'CONFIG_PVPANIC_COMMON', if_true: files('pvpanic.c')) ++system_ss.add(when: 'CONFIG_PSP_DEV', if_true: files('psp.c')) + + # ARM devices + system_ss.add(when: 'CONFIG_PL310', if_true: files('arm_l2x0.c')) +diff --git a/hw/misc/psp.c b/hw/misc/psp.c +new file mode 100644 +index 0000000000..6ff2ceec10 +--- /dev/null ++++ b/hw/misc/psp.c +@@ -0,0 +1,141 @@ ++/* ++ * hygon psp device emulation ++ * ++ * Copyright 2024 HYGON Corp. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/compiler.h" ++#include "qemu/error-report.h" ++#include "qapi/error.h" ++#include "migration/vmstate.h" ++#include "hw/qdev-properties.h" ++#include "sysemu/runstate.h" ++#include ++ ++#define TYPE_PSP_DEV "psp" ++OBJECT_DECLARE_SIMPLE_TYPE(PSPDevState, PSP_DEV) ++ ++struct PSPDevState { ++ /* Private */ ++ DeviceState pdev; ++ ++ /* Public */ ++ Notifier shutdown_notifier; ++ int dev_fd; ++ bool enabled; ++ ++ /** ++ * vid is used to identify a virtual machine in qemu. ++ * When a virtual machine accesses a tkm key, ++ * the TKM module uses different key spaces based on different vids. ++ */ ++ uint32_t vid; ++}; ++ ++#define PSP_DEV_PATH "/dev/hygon_psp_config" ++#define HYGON_PSP_IOC_TYPE 'H' ++#define PSP_IOC_MUTEX_ENABLE _IOWR(HYGON_PSP_IOC_TYPE, 1, NULL) ++#define PSP_IOC_MUTEX_DISABLE _IOWR(HYGON_PSP_IOC_TYPE, 2, NULL) ++#define PSP_IOC_VPSP_OPT _IOWR(HYGON_PSP_IOC_TYPE, 3, NULL) ++ ++enum VPSP_DEV_CTRL_OPCODE { ++ VPSP_OP_VID_ADD, ++ VPSP_OP_VID_DEL, ++}; ++ ++struct psp_dev_ctrl { ++ unsigned char op; ++ union { ++ unsigned int vid; ++ unsigned char reserved[128]; ++ } data; ++}; ++ ++static void psp_dev_destroy(PSPDevState *state) ++{ ++ struct psp_dev_ctrl ctrl = { 0 }; ++ if (state && state->dev_fd) { ++ if (state->enabled) { ++ ctrl.op = VPSP_OP_VID_DEL; ++ if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { ++ error_report("VPSP_OP_VID_DEL: %d", -errno); ++ } else { ++ state->enabled = false; ++ } ++ } ++ qemu_close(state->dev_fd); ++ state->dev_fd = 0; ++ } ++} ++ ++/** ++ * Guest OS performs shut down operations through 'shutdown' and 'powerdown' event. ++ * The 'powerdown' event will also trigger 'shutdown' in the end, ++ * so only attention to the 'shutdown' event. ++ * ++ * When Guest OS trigger 'reboot' or 'reset' event, to do nothing. ++*/ ++static void psp_dev_shutdown_notify(Notifier *notifier, void *data) ++{ ++ PSPDevState *state = container_of(notifier, PSPDevState, shutdown_notifier); ++ psp_dev_destroy(state); ++} ++ ++static void psp_dev_realize(DeviceState *dev, Error **errp) ++{ ++ struct psp_dev_ctrl ctrl = { 0 }; ++ PSPDevState *state = PSP_DEV(dev); ++ ++ state->dev_fd = qemu_open_old(PSP_DEV_PATH, O_RDWR); ++ if (state->dev_fd < 0) { ++ error_setg(errp, "fail to open %s, errno %d.", PSP_DEV_PATH, errno); ++ goto end; ++ } ++ ++ ctrl.op = VPSP_OP_VID_ADD; ++ ctrl.data.vid = state->vid; ++ if (ioctl(state->dev_fd, PSP_IOC_VPSP_OPT, &ctrl) < 0) { ++ error_setg(errp, "psp_dev_realize VPSP_OP_VID_ADD vid %d, return %d", ctrl.data.vid, -errno); ++ goto end; ++ } ++ ++ state->enabled = true; ++ state->shutdown_notifier.notify = psp_dev_shutdown_notify; ++ qemu_register_shutdown_notifier(&state->shutdown_notifier); ++end: ++ return; ++} ++ ++static struct Property psp_dev_properties[] = { ++ DEFINE_PROP_UINT32("vid", PSPDevState, vid, 0), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void psp_dev_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ ++ dc->desc = "PSP Device"; ++ dc->realize = psp_dev_realize; ++ set_bit(DEVICE_CATEGORY_MISC, dc->categories); ++ device_class_set_props(dc, psp_dev_properties); ++} ++ ++static const TypeInfo psp_dev_info = { ++ .name = TYPE_PSP_DEV, ++ .parent = TYPE_DEVICE, ++ .instance_size = sizeof(PSPDevState), ++ .class_init = psp_dev_class_init, ++}; ++ ++static void psp_dev_register_types(void) ++{ ++ type_register_static(&psp_dev_info); ++} ++ ++type_init(psp_dev_register_types) +-- +2.41.0.windows.1 + diff --git a/hw-net-cadence_gem-fix-register-mask-initialization.patch b/hw-net-cadence_gem-fix-register-mask-initialization.patch new file mode 100644 index 0000000000000000000000000000000000000000..b77da2b0a571ef559cbb8a7d69395e0e0544a165 --- /dev/null +++ b/hw-net-cadence_gem-fix-register-mask-initialization.patch @@ -0,0 +1,49 @@ +From 7fe1c9d57bf60feadaadabe6ada9ddee378ab244 Mon Sep 17 00:00:00 2001 +From: guping +Date: Fri, 1 Aug 2025 02:38:52 +0000 +Subject: [PATCH] hw/net/cadence_gem: fix register mask initialization + cherry-pick from 2bfcd27e00a49da2efa5d703121b94cd9cd4948b +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The gem_init_register_masks function was called at init time but it +relies on the num-priority-queues property. Call it at realize time +instead. + +Cc: qemu-stable@nongnu.org +Fixes: 4c70e32f05f ("net: cadence_gem: Define access permission for interrupt registers") +Signed-off-by: Luc Michel +Reviewed-by: Francisco Iglesias +Reviewed-by: Sai Pavan Boddu +Message-ID: <20250716095432.81923-2-luc.michel@amd.com> +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: guping +--- + hw/net/cadence_gem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c +index 296bba238e..c7f793c560 100644 +--- a/hw/net/cadence_gem.c ++++ b/hw/net/cadence_gem.c +@@ -1740,6 +1740,7 @@ static void gem_realize(DeviceState *dev, Error **errp) + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]); + } + ++ gem_init_register_masks(s); + qemu_macaddr_default_if_unset(&s->conf.macaddr); + + s->nic = qemu_new_nic(&net_gem_info, &s->conf, +@@ -1760,7 +1761,6 @@ static void gem_init(Object *obj) + + DB_PRINT("\n"); + +- gem_init_register_masks(s); + memory_region_init_io(&s->iomem, OBJECT(s), &gem_ops, s, + "enet", sizeof(s->regs)); + +-- +2.33.0 + diff --git a/hw-net-can-sja1000-fix-bug-for-single-acceptance-fil.patch b/hw-net-can-sja1000-fix-bug-for-single-acceptance-fil.patch new file mode 100644 index 0000000000000000000000000000000000000000..57b12ad43152797954c64cc9e3682377ee2d9a10 --- /dev/null +++ b/hw-net-can-sja1000-fix-bug-for-single-acceptance-fil.patch @@ -0,0 +1,43 @@ +From 0b89dd1ae05d17f0bacbd34218799f00d04c8174 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 17 Oct 2024 11:13:06 +0800 +Subject: [PATCH] hw/net/can/sja1000: fix bug for single acceptance filter and + standard frame + +cheery-pick from 25145a7d7735344a469551946fc2a7f19eb4aa3d + +A CAN sja1000 standard frame filter mask has been computed and applied +incorrectly for standard frames when single Acceptance Filter Mode +(MOD_AFM = 1) has been selected. The problem has not been found +by Linux kernel testing because it uses dual filter mode (MOD_AFM = 0) +and leaves falters fully open. + +The problem has been noticed by Grant Ramsay when testing with Zephyr +RTOS which uses single filter mode. + +Signed-off-by: Pavel Pisa +Reported-by: Grant Ramsay +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2028 +Fixes: 733210e754 ("hw/net/can: SJA1000 chip register level emulation") +Message-ID: <20240103231426.5685-1-pisa@fel.cvut.cz> +Signed-off-by: Zhang Jiao +--- + hw/net/can/can_sja1000.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c +index 73201f9139..575df7d2f8 100644 +--- a/hw/net/can/can_sja1000.c ++++ b/hw/net/can/can_sja1000.c +@@ -108,7 +108,7 @@ void can_sja_single_filter(struct qemu_can_filter *filter, + } + + filter->can_mask = (uint32_t)amr[0] << 3; +- filter->can_mask |= (uint32_t)amr[1] << 5; ++ filter->can_mask |= (uint32_t)amr[1] >> 5; + filter->can_mask = ~filter->can_mask & QEMU_CAN_SFF_MASK; + if (!(amr[1] & 0x10)) { + filter->can_mask |= QEMU_CAN_RTR_FLAG; +-- +2.41.0.windows.1 + diff --git a/hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch b/hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch deleted file mode 100644 index 3bffc1cf188e0ca6167e1ffb3cd138f14073dc8b..0000000000000000000000000000000000000000 --- a/hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 2b157688d19da5ce4fca6b5f3c78d2e309ecec9a Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Wed, 11 Nov 2020 18:36:36 +0530 -Subject: [PATCH] hw/net/e1000e: advance desc_offset in case of null descriptor - -While receiving packets via e1000e_write_packet_to_guest() routine, -'desc_offset' is advanced only when RX descriptor is processed. And -RX descriptor is not processed if it has NULL buffer address. -This may lead to an infinite loop condition. Increament 'desc_offset' -to process next descriptor in the ring to avoid infinite loop. - -Reported-by: Cheol-woo Myung <330cjfdn@gmail.com> -Signed-off-by: Prasad J Pandit -Signed-off-by: Jason Wang -(cherry-picked from c2cb5116) -Fix CVE-2020-28916 -Signed-off-by: Alex Chen ---- - hw/net/e1000e_core.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c -index 2a221c2ef9..e45d47f584 100644 ---- a/hw/net/e1000e_core.c -+++ b/hw/net/e1000e_core.c -@@ -1595,13 +1595,13 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); - } - } -- desc_offset += desc_size; -- if (desc_offset >= total_size) { -- is_last = true; -- } - } else { /* as per intel docs; skip descriptors with null buf addr */ - trace_e1000e_rx_null_descriptor(); - } -+ desc_offset += desc_size; -+ if (desc_offset >= total_size) { -+ is_last = true; -+ } - - e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, - rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); --- -2.27.0 - diff --git a/hw-net-fix-vmxnet3-live-migration.patch b/hw-net-fix-vmxnet3-live-migration.patch deleted file mode 100644 index be97b3ac0dc76d839f646078151cddc0861ab094..0000000000000000000000000000000000000000 --- a/hw-net-fix-vmxnet3-live-migration.patch +++ /dev/null @@ -1,136 +0,0 @@ -From b8b9f58ee5d3cff0a1e7cca770fe632043efb728 Mon Sep 17 00:00:00 2001 -From: Marcel Apfelbaum -Date: Fri, 5 Jul 2019 04:07:11 +0300 -Subject: [PATCH] hw/net: fix vmxnet3 live migration - -At some point vmxnet3 live migration stopped working and git-bisect -didn't help finding a working version. -The issue is the PCI configuration space is not being migrated -successfully and MSIX remains masked at destination. - -Remove the migration differentiation between PCI and PCIe since -the logic resides now inside VMSTATE_PCI_DEVICE. -Remove also the VMXNET3_COMPAT_FLAG_DISABLE_PCIE based differentiation -since at 'realize' time is decided if the device is PCI or PCIe, -then the above macro is enough. - -Use the opportunity to move to the standard VMSTATE_MSIX -instead of the deprecated SaveVMHandlers. - -Signed-off-by: Marcel Apfelbaum -Message-Id: <20190705010711.23277-1-marcel.apfelbaum@gmail.com> -Tested-by: Sukrit Bhatnagar -Reviewed-by: Dmitry Fleytman -Signed-off-by: Dr. David Alan Gilbert ---- - hw/net/vmxnet3.c | 52 ++---------------------------------------------- - 1 file changed, 2 insertions(+), 50 deletions(-) - -diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c -index ecc4f5bcf0..bf8e6ca4c9 100644 ---- a/hw/net/vmxnet3.c -+++ b/hw/net/vmxnet3.c -@@ -2153,21 +2153,6 @@ vmxnet3_cleanup_msi(VMXNET3State *s) - msi_uninit(d); - } - --static void --vmxnet3_msix_save(QEMUFile *f, void *opaque) --{ -- PCIDevice *d = PCI_DEVICE(opaque); -- msix_save(d, f); --} -- --static int --vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id) --{ -- PCIDevice *d = PCI_DEVICE(opaque); -- msix_load(d, f); -- return 0; --} -- - static const MemoryRegionOps b0_ops = { - .read = vmxnet3_io_bar0_read, - .write = vmxnet3_io_bar0_write, -@@ -2188,11 +2173,6 @@ static const MemoryRegionOps b1_ops = { - }, - }; - --static SaveVMHandlers savevm_vmxnet3_msix = { -- .save_state = vmxnet3_msix_save, -- .load_state = vmxnet3_msix_load, --}; -- - static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) - { - uint64_t dsn_payload; -@@ -2215,7 +2195,6 @@ static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) - - static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) - { -- DeviceState *dev = DEVICE(pci_dev); - VMXNET3State *s = VMXNET3(pci_dev); - int ret; - -@@ -2261,8 +2240,6 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) - pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, - vmxnet3_device_serial_num(s)); - } -- -- register_savevm_live(dev, "vmxnet3-msix", -1, 1, &savevm_vmxnet3_msix, s); - } - - static void vmxnet3_instance_init(Object *obj) -@@ -2452,29 +2429,6 @@ static const VMStateDescription vmstate_vmxnet3_int_state = { - } - }; - --static bool vmxnet3_vmstate_need_pcie_device(void *opaque) --{ -- VMXNET3State *s = VMXNET3(opaque); -- -- return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE); --} -- --static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id) --{ -- return !vmxnet3_vmstate_need_pcie_device(opaque); --} -- --static const VMStateDescription vmstate_vmxnet3_pcie_device = { -- .name = "vmxnet3/pcie", -- .version_id = 1, -- .minimum_version_id = 1, -- .needed = vmxnet3_vmstate_need_pcie_device, -- .fields = (VMStateField[]) { -- VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), -- VMSTATE_END_OF_LIST() -- } --}; -- - static const VMStateDescription vmstate_vmxnet3 = { - .name = "vmxnet3", - .version_id = 1, -@@ -2482,9 +2436,8 @@ static const VMStateDescription vmstate_vmxnet3 = { - .pre_save = vmxnet3_pre_save, - .post_load = vmxnet3_post_load, - .fields = (VMStateField[]) { -- VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State, -- vmxnet3_vmstate_test_pci_device, 0, -- vmstate_pci_device, PCIDevice), -+ VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), -+ VMSTATE_MSIX(parent_obj, VMXNET3State), - VMSTATE_BOOL(rx_packets_compound, VMXNET3State), - VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State), - VMSTATE_BOOL(lro_supported, VMXNET3State), -@@ -2520,7 +2473,6 @@ static const VMStateDescription vmstate_vmxnet3 = { - }, - .subsections = (const VMStateDescription*[]) { - &vmxstate_vmxnet3_mcast_list, -- &vmstate_vmxnet3_pcie_device, - NULL - } - }; --- -2.27.0 - diff --git a/hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch b/hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0659827143f2b0d4c8651ed7c1e6161f6fba139 --- /dev/null +++ b/hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch @@ -0,0 +1,71 @@ +From c23034c79ad8632388bc00dd4268e429638eee9e Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 18 Apr 2024 14:45:15 +0800 +Subject: [PATCH] hw/net/net_tx_pkt: Fix overrun in update_sctp_checksum() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 83ddb3dbba2ee0f1767442ae6ee665058aeb1093 + +If a fragmented packet size is too short, do not try to +calculate its checksum. + +Reproduced using: + + $ cat << EOF | qemu-system-i386 -display none -nodefaults \ + -machine q35,accel=qtest -m 32M \ + -device igb,netdev=net0 \ + -netdev user,id=net0 \ + -qtest stdio + outl 0xcf8 0x80000810 + outl 0xcfc 0xe0000000 + outl 0xcf8 0x80000804 + outw 0xcfc 0x06 + write 0xe0000403 0x1 0x02 + writel 0xe0003808 0xffffffff + write 0xe000381a 0x1 0x5b + write 0xe000381b 0x1 0x00 + EOF + Assertion failed: (offset == 0), function iov_from_buf_full, file util/iov.c, line 39. + #1 0x5575e81e952a in iov_from_buf_full qemu/util/iov.c:39:5 + #2 0x5575e6500768 in net_tx_pkt_update_sctp_checksum qemu/hw/net/net_tx_pkt.c:144:9 + #3 0x5575e659f3e1 in igb_setup_tx_offloads qemu/hw/net/igb_core.c:478:11 + #4 0x5575e659f3e1 in igb_tx_pkt_send qemu/hw/net/igb_core.c:552:10 + #5 0x5575e659f3e1 in igb_process_tx_desc qemu/hw/net/igb_core.c:671:17 + #6 0x5575e659f3e1 in igb_start_xmit qemu/hw/net/igb_core.c:903:9 + #7 0x5575e659f3e1 in igb_set_tdt qemu/hw/net/igb_core.c:2812:5 + #8 0x5575e657d6a4 in igb_core_write qemu/hw/net/igb_core.c:4248:9 + +Fixes: CVE-2024-3567 +Cc: qemu-stable@nongnu.org +Reported-by: Zheyu Ma +Fixes: f199b13bc1 ("igb: Implement Tx SCTP CSO") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2273 +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Message-Id: <20240410070459.49112-1-philmd@linaro.org> +Signed-off-by: qihao_yewu +--- + hw/net/net_tx_pkt.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c +index 2e5f58b3c9..d40d508a11 100644 +--- a/hw/net/net_tx_pkt.c ++++ b/hw/net/net_tx_pkt.c +@@ -141,6 +141,10 @@ bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt) + uint32_t csum = 0; + struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG; + ++ if (iov_size(pl_start_frag, pkt->payload_frags) < 8 + sizeof(csum)) { ++ return false; ++ } ++ + if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { + return false; + } +-- +2.27.0 + diff --git a/hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch b/hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch deleted file mode 100644 index a763f93bc4859f6f38fa8f6d83da6d84f1ab01f2..0000000000000000000000000000000000000000 --- a/hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 596e7e8908b742f727d02ec9ab747116573f67e0 Mon Sep 17 00:00:00 2001 -From: Mauro Matteo Cascella -Date: Sat, 1 Aug 2020 18:42:38 +0200 -Subject: [PATCH] hw/net/net_tx_pkt: fix assertion failure in - net_tx_pkt_add_raw_fragment() - -An assertion failure issue was found in the code that processes network packets -while adding data fragments into the packet context. It could be abused by a -malicious guest to abort the QEMU process on the host. This patch replaces the -affected assert() with a conditional statement, returning false if the current -data fragment exceeds max_raw_frags. - -Reported-by: Alexander Bulekov -Reported-by: Ziming Zhang -Reviewed-by: Dmitry Fleytman -Signed-off-by: Mauro Matteo Cascella -Signed-off-by: Jason Wang ---- - hw/net/net_tx_pkt.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c -index 162f802dd7..54d4c3bbd0 100644 ---- a/hw/net/net_tx_pkt.c -+++ b/hw/net/net_tx_pkt.c -@@ -379,7 +379,10 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, - hwaddr mapped_len = 0; - struct iovec *ventry; - assert(pkt); -- assert(pkt->max_raw_frags > pkt->raw_frags); -+ -+ if (pkt->raw_frags >= pkt->max_raw_frags) { -+ return false; -+ } - - if (!len) { - return true; --- -2.23.0 - diff --git a/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch index dd3a972936049057760b5819669beec96cbeb48c..052c6cbe2e167250833171c371bc737f6351ddf8 100644 --- a/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch +++ b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch @@ -1,4 +1,4 @@ -From e921d308845a0249126c59655d985007acf58ed7 Mon Sep 17 00:00:00 2001 +From c3f204e02eacdd3e9ec6ac55396ccc7f115ad63e Mon Sep 17 00:00:00 2001 From: Qiang Ning Date: Mon, 12 Jul 2021 17:30:45 +0800 Subject: [PATCH] hw/net/rocker_of_dpa: fix double free bug of rocker device @@ -18,12 +18,13 @@ Fix that by setting group->l2_flood.group_ids to NULL after free. Signed-off-by: Jiajie Li Signed-off-by: Qiang Ning +Signed-off-by: Yan Wang --- hw/net/rocker/rocker_of_dpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c -index 8e347d1ee4..0c9de5f014 100644 +index 5e16056be6..c25438cccc 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -2070,6 +2070,7 @@ static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group, diff --git a/hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch b/hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch new file mode 100644 index 0000000000000000000000000000000000000000..8789267286651d8dbfdc94fe845ec0eb198cf421 --- /dev/null +++ b/hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch @@ -0,0 +1,73 @@ +From 7e18fd22e9c0b5b28462455f60c508d5341e0230 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Wed, 3 Apr 2024 16:34:39 +0800 +Subject: [PATCH] hw/net/virtio-net: fix qemu set used ring flag even vhost + started +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 4c54f5bc8e1d38f15cc35b6a6932d8fbe219c692 + +When vhost-user or vhost-kernel is handling virtio net datapath, +QEMU should not touch used ring. + +But with vhost-user socket reconnect scenario, in a very rare case +(has pending kick event). VRING_USED_F_NO_NOTIFY is set by QEMU in +following code path: + + #0 virtio_queue_split_set_notification (vq=0x7ff5f4c920a8, enable=0) at ../hw/virtio/virtio.c:511 + #1 0x0000559d6dbf033b in virtio_queue_set_notification (vq=0x7ff5f4c920a8, enable=0) at ../hw/virtio/virtio.c:576 + #2 0x0000559d6dbbbdbc in virtio_net_handle_tx_bh (vdev=0x559d703a6aa0, vq=0x7ff5f4c920a8) at ../hw/net/virtio-net.c:2801 + #3 0x0000559d6dbf4791 in virtio_queue_notify_vq (vq=0x7ff5f4c920a8) at ../hw/virtio/virtio.c:2248 + #4 0x0000559d6dbf79da in virtio_queue_host_notifier_read (n=0x7ff5f4c9211c) at ../hw/virtio/virtio.c:3525 + #5 0x0000559d6d9a5814 in virtio_bus_cleanup_host_notifier (bus=0x559d703a6a20, n=1) at ../hw/virtio/virtio-bus.c:321 + #6 0x0000559d6dbf83c9 in virtio_device_stop_ioeventfd_impl (vdev=0x559d703a6aa0) at ../hw/virtio/virtio.c:3774 + #7 0x0000559d6d9a55c8 in virtio_bus_stop_ioeventfd (bus=0x559d703a6a20) at ../hw/virtio/virtio-bus.c:259 + #8 0x0000559d6d9a53e8 in virtio_bus_grab_ioeventfd (bus=0x559d703a6a20) at ../hw/virtio/virtio-bus.c:199 + #9 0x0000559d6dbf841c in virtio_device_grab_ioeventfd (vdev=0x559d703a6aa0) at ../hw/virtio/virtio.c:3783 + #10 0x0000559d6d9bde18 in vhost_dev_enable_notifiers (hdev=0x559d707edd70, vdev=0x559d703a6aa0) at ../hw/virtio/vhost.c:1592 + #11 0x0000559d6d89a0b8 in vhost_net_start_one (net=0x559d707edd70, dev=0x559d703a6aa0) at ../hw/net/vhost_net.c:266 + #12 0x0000559d6d89a6df in vhost_net_start (dev=0x559d703a6aa0, ncs=0x559d7048d890, data_queue_pairs=31, cvq=0) at ../hw/net/vhost_net.c:412 + #13 0x0000559d6dbb5b89 in virtio_net_vhost_status (n=0x559d703a6aa0, status=15 '\017') at ../hw/net/virtio-net.c:311 + #14 0x0000559d6dbb5e34 in virtio_net_set_status (vdev=0x559d703a6aa0, status=15 '\017') at ../hw/net/virtio-net.c:392 + #15 0x0000559d6dbb60d8 in virtio_net_set_link_status (nc=0x559d7048d890) at ../hw/net/virtio-net.c:455 + #16 0x0000559d6da64863 in qmp_set_link (name=0x559d6f0b83d0 "hostnet1", up=true, errp=0x7ffdd76569f0) at ../net/net.c:1459 + #17 0x0000559d6da7226e in net_vhost_user_event (opaque=0x559d6f0b83d0, event=CHR_EVENT_OPENED) at ../net/vhost-user.c:301 + #18 0x0000559d6ddc7f63 in chr_be_event (s=0x559d6f2ffea0, event=CHR_EVENT_OPENED) at ../chardev/char.c:62 + #19 0x0000559d6ddc7fdc in qemu_chr_be_event (s=0x559d6f2ffea0, event=CHR_EVENT_OPENED) at ../chardev/char.c:82 + +This issue causes guest kernel stop kicking device and traffic stop. + +Add vhost_started check in virtio_net_handle_tx_bh to fix this wrong +VRING_USED_F_NO_NOTIFY set. + +Signed-off-by: Yajun Wu +Reviewed-by: Jiri Pirko +Acked-by: Michael S. Tsirkin +Message-ID: <20240402045109.97729-1-yajunw@nvidia.com> +[PMD: Use unlikely()] +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + hw/net/virtio-net.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 9559b3386a..c0a54f2d61 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2831,6 +2831,10 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) + VirtIONet *n = VIRTIO_NET(vdev); + VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; + ++ if (unlikely(n->vhost_started)) { ++ return; ++ } ++ + if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { + virtio_net_drop_tx_queue_data(vdev, vq); + return; +-- +2.27.0 + diff --git a/hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch b/hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch deleted file mode 100644 index 62be98b6f147c098f57efbad27cbc6a5831d5ea2..0000000000000000000000000000000000000000 --- a/hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 2d18434c1ca66d68f80954be6828a3770176dab4 Mon Sep 17 00:00:00 2001 -From: Mauro Matteo Cascella -Date: Fri, 10 Jul 2020 11:19:41 +0200 -Subject: [PATCH] hw/net/xgmac: Fix buffer overflow in xgmac_enet_send() - -A buffer overflow issue was reported by Mr. Ziming Zhang, CC'd here. It -occurs while sending an Ethernet frame due to missing break statements -and improper checking of the buffer size. - -Reported-by: Ziming Zhang -Signed-off-by: Mauro Matteo Cascella -Reviewed-by: Peter Maydell -Signed-off-by: Jason Wang ---- - hw/net/xgmac.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c -index f49df95b07..f496f7ed4c 100644 ---- a/hw/net/xgmac.c -+++ b/hw/net/xgmac.c -@@ -217,21 +217,31 @@ static void xgmac_enet_send(XgmacState *s) - } - len = (bd.buffer1_size & 0xfff) + (bd.buffer2_size & 0xfff); - -+ /* -+ * FIXME: these cases of malformed tx descriptors (bad sizes) -+ * should probably be reported back to the guest somehow -+ * rather than simply silently stopping processing, but we -+ * don't know what the hardware does in this situation. -+ * This will only happen for buggy guests anyway. -+ */ - if ((bd.buffer1_size & 0xfff) > 2048) { - DEBUGF_BRK("qemu:%s:ERROR...ERROR...ERROR... -- " - "xgmac buffer 1 len on send > 2048 (0x%x)\n", - __func__, bd.buffer1_size & 0xfff); -+ break; - } - if ((bd.buffer2_size & 0xfff) != 0) { - DEBUGF_BRK("qemu:%s:ERROR...ERROR...ERROR... -- " - "xgmac buffer 2 len on send != 0 (0x%x)\n", - __func__, bd.buffer2_size & 0xfff); -+ break; - } -- if (len >= sizeof(frame)) { -+ if (frame_size + len >= sizeof(frame)) { - DEBUGF_BRK("qemu:%s: buffer overflow %d read into %zu " -- "buffer\n" , __func__, len, sizeof(frame)); -+ "buffer\n" , __func__, frame_size + len, sizeof(frame)); - DEBUGF_BRK("qemu:%s: buffer1.size=%d; buffer2.size=%d\n", - __func__, bd.buffer1_size, bd.buffer2_size); -+ break; - } - - cpu_physical_memory_read(bd.buffer1_addr, ptr, len); --- -2.23.0 - diff --git a/hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch b/hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f546cd40b083c26f25eb974614a24e3c989ab35 --- /dev/null +++ b/hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch @@ -0,0 +1,85 @@ +From 6a32c9764439093fe4b53f87059c35761d711e39 Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki +Date: Wed, 28 Feb 2024 20:33:12 +0900 +Subject: [PATCH] hw/nvme: Use pcie_sriov_num_vfs() (CVE-2024-26328) + +nvme_sriov_pre_write_ctrl() used to directly inspect SR-IOV +configurations to know the number of VFs being disabled due to SR-IOV +configuration writes, but the logic was flawed and resulted in +out-of-bound memory access. + +It assumed PCI_SRIOV_NUM_VF always has the number of currently enabled +VFs, but it actually doesn't in the following cases: +- PCI_SRIOV_NUM_VF has been set but PCI_SRIOV_CTRL_VFE has never been. +- PCI_SRIOV_NUM_VF was written after PCI_SRIOV_CTRL_VFE was set. +- VFs were only partially enabled because of realization failure. + +It is a responsibility of pcie_sriov to interpret SR-IOV configurations +and pcie_sriov does it correctly, so use pcie_sriov_num_vfs(), which it +provides, to get the number of enabled VFs before and after SR-IOV +configuration writes. + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2024-26328 +Fixes: 11871f53ef8e ("hw/nvme: Add support for the Virtualization Management command") +Suggested-by: Michael S. Tsirkin +Signed-off-by: Akihiko Odaki +Message-Id: <20240228-reuse-v8-1-282660281e60@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/nvme/ctrl.c | 26 ++++++++------------------ + 1 file changed, 8 insertions(+), 18 deletions(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index f026245d1e..7a56e7b79b 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -8466,36 +8466,26 @@ static void nvme_pci_reset(DeviceState *qdev) + nvme_ctrl_reset(n, NVME_RESET_FUNCTION); + } + +-static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address, +- uint32_t val, int len) ++static void nvme_sriov_post_write_config(PCIDevice *dev, uint16_t old_num_vfs) + { + NvmeCtrl *n = NVME(dev); + NvmeSecCtrlEntry *sctrl; +- uint16_t sriov_cap = dev->exp.sriov_cap; +- uint32_t off = address - sriov_cap; +- int i, num_vfs; ++ int i; + +- if (!sriov_cap) { +- return; +- } +- +- if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { +- if (!(val & PCI_SRIOV_CTRL_VFE)) { +- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); +- for (i = 0; i < num_vfs; i++) { +- sctrl = &n->sec_ctrl_list.sec[i]; +- nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false); +- } +- } ++ for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) { ++ sctrl = &n->sec_ctrl_list.sec[i]; ++ nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false); + } + } + + static void nvme_pci_write_config(PCIDevice *dev, uint32_t address, + uint32_t val, int len) + { +- nvme_sriov_pre_write_ctrl(dev, address, val, len); ++ uint16_t old_num_vfs = pcie_sriov_num_vfs(dev); ++ + pci_default_write_config(dev, address, val, len); + pcie_cap_flr_write_config(dev, address, val, len); ++ nvme_sriov_post_write_config(dev, old_num_vfs); + } + + static const VMStateDescription nvme_vmstate = { +-- +2.27.0 + diff --git a/hw-nvme-fix-Werror-maybe-uninitialized.patch b/hw-nvme-fix-Werror-maybe-uninitialized.patch new file mode 100644 index 0000000000000000000000000000000000000000..29b8449f0ab4a8c37a778b45ccfb5040f50e2f3d --- /dev/null +++ b/hw-nvme-fix-Werror-maybe-uninitialized.patch @@ -0,0 +1,38 @@ +From 2fc8029b9e274a0dbedc55b6b114b29e003b32ab Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E5=88=98=E5=A9=A720201110?= + +Date: Mon, 8 Apr 2024 04:32:11 -0400 +Subject: [PATCH] hw/nvme: fix -Werror=maybe-uninitialized +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +../hw/nvme/ctrl.c:6081:21: error: ‘result’ may be used uninitialized [-Werror=maybe-uninitialized] + +It's not obvious that 'result' is set in all code paths. When &result is +a returned argument, it's even less clear. + +Looking at various assignments, 0 seems to be a suitable default value. + +Signed-off-by: Marc-André Lureau +Signed-off-by: Liu Jing +--- + hw/nvme/ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 7a56e7b79b..237b5c8871 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -5882,7 +5882,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t nsid = le32_to_cpu(cmd->nsid); +- uint32_t result; ++ uint32_t result = 0; + uint8_t fid = NVME_GETSETFEAT_FID(dw10); + NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10); + uint16_t iv; +-- +2.27.0 + diff --git a/hw-nvme-fix-handling-of-over-committed-queues.patch b/hw-nvme-fix-handling-of-over-committed-queues.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4db2e75e1dfda31c74779369ff68ddc35a78319 --- /dev/null +++ b/hw-nvme-fix-handling-of-over-committed-queues.patch @@ -0,0 +1,102 @@ +From c4423b70160eb7ae91dac9f2cf61513758ee017d Mon Sep 17 00:00:00 2001 +From: Klaus Jensen +Date: Tue, 29 Oct 2024 13:15:19 +0100 +Subject: [PATCH] hw/nvme: fix handling of over-committed queues + +If a host chooses to use the SQHD "hint" in the CQE to know if there is +room in the submission queue for additional commands, it may result in a +situation where there are not enough internal resources (struct +NvmeRequest) available to process the command. For a lack of a better +term, the host may "over-commit" the device (i.e., it may have more +inflight commands than the queue size). + +For example, assume a queue with N entries. The host submits N commands +and all are picked up for processing, advancing the head and emptying +the queue. Regardless of which of these N commands complete first, the +SQHD field of that CQE will indicate to the host that the queue is +empty, which allows the host to issue N commands again. However, if the +device has not posted CQEs for all the previous commands yet, the device +will have less than N resources available to process the commands, so +queue processing is suspended. + +And here lies an 11 year latent bug. In the absense of any additional +tail updates on the submission queue, we never schedule the processing +bottom-half again unless we observe a head update on an associated full +completion queue. This has been sufficient to handle N-to-1 SQ/CQ setups +(in the absense of over-commit of course). Incidentially, that "kick all +associated SQs" mechanism can now be killed since we now just schedule +queue processing when we return a processing resource to a non-empty +submission queue, which happens to cover both edge cases. However, we +must retain kicking the CQ if it was previously full. + +So, apparently, no previous driver tested with hw/nvme has ever used +SQHD (e.g., neither the Linux NVMe driver or SPDK uses it). But then OSv +shows up with the driver that actually does. I salute you. + +Fixes: f3c507adcd7b ("NVMe: Initial commit for new storage interface") +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2388 +Reported-by: Waldemar Kozaczuk +Reviewed-by: Keith Busch +Signed-off-by: Klaus Jensen +Signed-off-by: Zhongrui Tang +--- + hw/nvme/ctrl.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 104aebc5ea..29445938d5 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -1516,9 +1516,16 @@ static void nvme_post_cqes(void *opaque) + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); + break; + } ++ + QTAILQ_REMOVE(&cq->req_list, req, entry); ++ + nvme_inc_cq_tail(cq); + nvme_sg_unmap(&req->sg); ++ ++ if (QTAILQ_EMPTY(&sq->req_list) && !nvme_sq_empty(sq)) { ++ qemu_bh_schedule(sq->bh); ++ } ++ + QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); + } + if (cq->tail != cq->head) { +@@ -7575,7 +7582,6 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) + /* Completion queue doorbell write */ + + uint16_t new_head = val & 0xffff; +- int start_sqs; + NvmeCQueue *cq; + + qid = (addr - (0x1000 + (1 << 2))) >> 3; +@@ -7626,18 +7632,15 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) + + trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head); + +- start_sqs = nvme_cq_full(cq) ? 1 : 0; ++ /* scheduled deferred cqe posting if queue was previously full */ ++ if (nvme_cq_full(cq)) { ++ qemu_bh_schedule(cq->bh); ++ } ++ + cq->head = new_head; + if (!qid && n->dbbuf_enabled) { + stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED); + } +- if (start_sqs) { +- NvmeSQueue *sq; +- QTAILQ_FOREACH(sq, &cq->sq_list, entry) { +- qemu_bh_schedule(sq->bh); +- } +- qemu_bh_schedule(cq->bh); +- } + + if (cq->tail == cq->head) { + if (cq->irq_enabled) { +-- +2.41.0.windows.1 + diff --git a/hw-nvme-fix-invalid-check-on-mcl.patch b/hw-nvme-fix-invalid-check-on-mcl.patch new file mode 100644 index 0000000000000000000000000000000000000000..cefd6c18b37ddeff1679c0b311760d36ad9a8200 --- /dev/null +++ b/hw-nvme-fix-invalid-check-on-mcl.patch @@ -0,0 +1,36 @@ +From 43fdaaa492ea10ab0e90ec4cc68ec45aed1d415c Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 22 Mar 2025 15:20:27 +0800 +Subject: [PATCH] hw/nvme: fix invalid check on mcl + +cherry-pick from 8c78015a55d84c016da6d5e41b6b5f618ecb25ab + +The number of logical blocks within a source range is converted into a +1s based number at the time of parsing. However, when verifying the copy +length we add one again, causing the check against MCL to fail in error. + +Cc: qemu-stable@nongnu.org +Fixes: 381ab99d8587 ("hw/nvme: check maximum copy length (MCL) for COPY") +Reviewed-by: Minwoo Im +Signed-off-by: Klaus Jensen +Signed-off-by: gubin +--- + hw/nvme/ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 29445938d5..407004b2f7 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -2863,7 +2863,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns, + uint32_t nlb; + nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL, + &nlb, NULL, NULL, NULL); +- copy_len += nlb + 1; ++ copy_len += nlb; + } + + if (copy_len > ns->id_ns.mcl) { +-- +2.41.0.windows.1 + diff --git a/hw-nvme-fix-invalid-endian-conversion.patch b/hw-nvme-fix-invalid-endian-conversion.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f236bd9c022664dc96efb5e6221b7d6135957ab --- /dev/null +++ b/hw-nvme-fix-invalid-endian-conversion.patch @@ -0,0 +1,42 @@ +From 6de964bac51139ef24f43bde56933cd8eafaf317 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 22 Mar 2025 15:25:39 +0800 +Subject: [PATCH] hw/nvme: fix invalid endian conversion +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from d2b5bb860e6c17442ad95cc275feb07c1665be5c + +numcntl is one byte and so is max_vfs. Using cpu_to_le16 on big endian +hosts results in numcntl being set to 0. + +Fix by dropping the endian conversion. + +Fixes: 99f48ae7ae ("hw/nvme: Add support for Secondary Controller List") +Reported-by: Kevin Wolf +Signed-off-by: Klaus Jensen +Reviewed-by: Minwoo Im +Message-ID: <20240222-fix-sriov-numcntl-v1-1-d60bea5e72d0@samsung.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: gubin +--- + hw/nvme/ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 29445938d5..9410344844 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -7928,7 +7928,7 @@ static void nvme_init_state(NvmeCtrl *n) + n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); + QTAILQ_INIT(&n->aer_queue); + +- list->numcntl = cpu_to_le16(max_vfs); ++ list->numcntl = max_vfs; + for (i = 0; i < max_vfs; i++) { + sctrl = &list->sec[i]; + sctrl->pcid = cpu_to_le16(n->cntlid); +-- +2.41.0.windows.1 + diff --git a/hw-nvme-fix-leak-of-uninitialized-memory-in-io_mgmt_.patch b/hw-nvme-fix-leak-of-uninitialized-memory-in-io_mgmt_.patch new file mode 100644 index 0000000000000000000000000000000000000000..762e2ff430331e411e310647e90e74536498de11 --- /dev/null +++ b/hw-nvme-fix-leak-of-uninitialized-memory-in-io_mgmt_.patch @@ -0,0 +1,35 @@ +From 80f4d02d7afa212fba4420a3af04f3a670b9a5d4 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Mon, 26 Aug 2024 10:40:40 +0800 +Subject: [PATCH] hw/nvme: fix leak of uninitialized memory in io_mgmt_recv + +cheery-pick from 6a22121c4f25b181e99479f65958ecde65da1c92 + +Yutaro Shimizu from the Cyber Defense Institute discovered a bug in the +NVMe emulation that leaks contents of an uninitialized heap buffer if +subsystem and FDP emulation are enabled. + +Cc: qemu-stable@nongnu.org +Reported-by: Yutaro Shimizu +Signed-off-by: Klaus Jensen +Signed-off-by: qihao_yewu +--- + hw/nvme/ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index aecf7c37bb..104aebc5ea 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -4302,7 +4302,7 @@ static uint16_t nvme_io_mgmt_recv_ruhs(NvmeCtrl *n, NvmeRequest *req, + + nruhsd = ns->fdp.nphs * endgrp->fdp.nrg; + trans_len = sizeof(NvmeRuhStatus) + nruhsd * sizeof(NvmeRuhStatusDescr); +- buf = g_malloc(trans_len); ++ buf = g_malloc0(trans_len); + + trans_len = MIN(trans_len, len); + +-- +2.41.0.windows.1 + diff --git a/hw-nvme-fix-memory-leak-in-nvme_dsm.patch b/hw-nvme-fix-memory-leak-in-nvme_dsm.patch new file mode 100644 index 0000000000000000000000000000000000000000..7cc00c18c60892c7c364da8443170d896a3a2399 --- /dev/null +++ b/hw-nvme-fix-memory-leak-in-nvme_dsm.patch @@ -0,0 +1,49 @@ +From 0c23d22ea9f160a8e0e0e48b6cb400d7964ae868 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 23 Jul 2024 21:06:08 +0800 +Subject: [PATCH] hw/nvme: fix memory leak in nvme_dsm + +cheery-pick from c510fe78f1b7c966524489d6ba752107423b20c8 + +The allocated memory to hold LBA ranges leaks in the nvme_dsm function. This +happens because the allocated memory for iocb->range is not freed in all +error handling paths. + +Fix this by adding a free to ensure that the allocated memory is properly freed. + +ASAN log: +==3075137==ERROR: LeakSanitizer: detected memory leaks + +Direct leak of 480 byte(s) in 6 object(s) allocated from: + #0 0x55f1f8a0eddd in malloc llvm/compiler-rt/lib/asan/asan_malloc_linux.cpp:129:3 + #1 0x7f531e0f6738 in g_malloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x5e738) + #2 0x55f1faf1f091 in blk_aio_get block/block-backend.c:2583:12 + #3 0x55f1f945c74b in nvme_dsm hw/nvme/ctrl.c:2609:30 + #4 0x55f1f945831b in nvme_io_cmd hw/nvme/ctrl.c:4470:16 + #5 0x55f1f94561b7 in nvme_process_sq hw/nvme/ctrl.c:7039:29 + +Cc: qemu-stable@nongnu.org +Fixes: d7d1474fd85d ("hw/nvme: reimplement dsm to allow cancellation") +Signed-off-by: Zheyu Ma +Reviewed-by: Klaus Jensen +Signed-off-by: Klaus Jensen +Signed-off-by: qihao_yewu +--- + hw/nvme/ctrl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 237b5c8871..dd1c962f93 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -2592,6 +2592,7 @@ next: + done: + iocb->aiocb = NULL; + iocb->common.cb(iocb->common.opaque, iocb->ret); ++ g_free(iocb->range); + qemu_aio_unref(iocb); + } + +-- +2.41.0.windows.1 + diff --git a/hw-nvme-fix-number-of-PIDs-for-FDP-RUH-update.patch b/hw-nvme-fix-number-of-PIDs-for-FDP-RUH-update.patch new file mode 100644 index 0000000000000000000000000000000000000000..285c70ece47a534c3723a82a17bbc23cb5726822 --- /dev/null +++ b/hw-nvme-fix-number-of-PIDs-for-FDP-RUH-update.patch @@ -0,0 +1,35 @@ +From 3696b12c582440669de12d127701187828c5598f Mon Sep 17 00:00:00 2001 +From: Xu Zheng +Date: Fri, 19 Jul 2024 22:11:17 +0800 +Subject: [PATCH] hw/nvme: fix number of PIDs for FDP RUH update + +The number of PIDs is in the upper 16 bits of cdw10. So we need to +right-shift by 16 bits instead of only a single bit. + +Fixes: 73064edfb864 ("hw/nvme: flexible data placement emulation") + +cherry-pick from 3936bbdf9a2e9233875f850c7576c79d06add261 +Signed-off-by: Vincent Fu +Signed-off-by: Klaus Jensen +Signed-off-by: Michael Tokarev +Signed-off-by: Xu Zheng +--- + hw/nvme/ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 237b5c8871..d7e83c3d55 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -4352,7 +4352,7 @@ static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl *n, NvmeRequest *req) + NvmeNamespace *ns = req->ns; + uint32_t cdw10 = le32_to_cpu(cmd->cdw10); + uint16_t ret = NVME_SUCCESS; +- uint32_t npid = (cdw10 >> 1) + 1; ++ uint32_t npid = (cdw10 >> 16) + 1; + unsigned int i = 0; + g_autofree uint16_t *pids = NULL; + uint32_t maxnpid; +-- +2.41.0.windows.1 + diff --git a/hw-pci-Add-parenthesis-to-PCI_BUILD_BDF-macro.patch b/hw-pci-Add-parenthesis-to-PCI_BUILD_BDF-macro.patch new file mode 100644 index 0000000000000000000000000000000000000000..19bbcf644ad461f5d5e50a963c123a68dd257b15 --- /dev/null +++ b/hw-pci-Add-parenthesis-to-PCI_BUILD_BDF-macro.patch @@ -0,0 +1,52 @@ +From 3c108b874b8b142a42939d785d6706a44e7035d7 Mon Sep 17 00:00:00 2001 +From: Roque Arcudia Hernandez +Date: Fri, 1 Nov 2024 21:59:23 +0000 +Subject: [PATCH] hw/pci: Add parenthesis to PCI_BUILD_BDF macro +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The bus parameter in the macro PCI_BUILD_BDF is not surrounded by +parenthesis. This can create a compile error when warnings are +treated as errors or can potentially create runtime errors due to the +operator precedence. + +For instance: + + file.c:x:32: error: suggest parentheses around '-' inside '<<' + [-Werror=parentheses] + 171 | uint16_t bdf = PCI_BUILD_BDF(a - b, sdev->devfn); + | ~~^~~ + include/hw/pci/pci.h:19:41: note: in definition of macro + 'PCI_BUILD_BDF' + 19 | #define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) + | ^~~ + cc1: all warnings being treated as errors + +Signed-off-by: Roque Arcudia Hernandez +Reviewed-by: Nabih Estefan +Message-Id: <20241101215923.3399311-1-roqueh@google.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Zhongrui Tang +--- + include/hw/pci/pci.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index fa6313aabc..7cf7b5619a 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -15,7 +15,7 @@ extern bool pci_available; + #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff) + #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) + #define PCI_FUNC(devfn) ((devfn) & 0x07) +-#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) ++#define PCI_BUILD_BDF(bus, devfn) (((bus) << 8) | (devfn)) + #define PCI_BDF_TO_DEVFN(x) ((x) & 0xff) + #define PCI_BUS_MAX 256 + #define PCI_DEVFN_MAX 256 +-- +2.41.0.windows.1 + diff --git a/hw-pci-Introduce-helper-function-pci_device_get_iomm.patch b/hw-pci-Introduce-helper-function-pci_device_get_iomm.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba4323cb5fb23594e06c75a59db09b46b8c10e94 --- /dev/null +++ b/hw-pci-Introduce-helper-function-pci_device_get_iomm.patch @@ -0,0 +1,95 @@ +From 03f9b12e33238587da36be24523911fd1b003324 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:38 +0800 +Subject: [PATCH] hw/pci: Introduce helper function + pci_device_get_iommu_bus_devfn() + +Extract out pci_device_get_iommu_bus_devfn() from +pci_device_iommu_address_space() to facilitate +implementation of pci_device_[set|unset]_iommu_device() +in following patch. + +No functional change intended. + +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Nicolin Chen +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/pci/pci.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 45 insertions(+), 3 deletions(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index 7467a2a9de..0884fbb760 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2681,11 +2681,27 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) + } + } + +-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) ++/* ++ * Get IOMMU root bus, aliased bus and devfn of a PCI device ++ * ++ * IOMMU root bus is needed by all call sites to call into iommu_ops. ++ * For call sites which don't need aliased BDF, passing NULL to ++ * aliased_[bus|devfn] is allowed. ++ * ++ * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device. ++ * ++ * @aliased_bus: return aliased #PCIBus of the PCI device, optional. ++ * ++ * @aliased_devfn: return aliased devfn of the PCI device, optional. ++ */ ++static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, ++ PCIBus **piommu_bus, ++ PCIBus **aliased_bus, ++ int *aliased_devfn) + { + PCIBus *bus = pci_get_bus(dev); + PCIBus *iommu_bus = bus; +- uint8_t devfn = dev->devfn; ++ int devfn = dev->devfn; + + while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) { + PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); +@@ -2726,7 +2742,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) + + iommu_bus = parent_bus; + } +- if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { ++ ++ assert(0 <= devfn && devfn < PCI_DEVFN_MAX); ++ assert(iommu_bus); ++ ++ if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) { ++ iommu_bus = NULL; ++ } ++ ++ *piommu_bus = iommu_bus; ++ ++ if (aliased_bus) { ++ *aliased_bus = bus; ++ } ++ ++ if (aliased_devfn) { ++ *aliased_devfn = devfn; ++ } ++} ++ ++AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) ++{ ++ PCIBus *bus; ++ PCIBus *iommu_bus; ++ int devfn; ++ ++ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); ++ if (iommu_bus) { + return iommu_bus->iommu_ops->get_address_space(bus, + iommu_bus->iommu_opaque, devfn); + } +-- +2.41.0.windows.1 + diff --git a/hw-pci-Introduce-pci_device_-set-unset-_iommu_device.patch b/hw-pci-Introduce-pci_device_-set-unset-_iommu_device.patch new file mode 100644 index 0000000000000000000000000000000000000000..48f864d026beb6f1b60c12ac4f40186a0c4b4370 --- /dev/null +++ b/hw-pci-Introduce-pci_device_-set-unset-_iommu_device.patch @@ -0,0 +1,120 @@ +From 7bc73d38984460315df315d007789f87f4d11994 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Wed, 5 Jun 2024 16:30:39 +0800 +Subject: [PATCH] hw/pci: Introduce pci_device_[set|unset]_iommu_device() + +pci_device_[set|unset]_iommu_device() call pci_device_get_iommu_bus_devfn() +to get iommu_bus->iommu_ops and call [set|unset]_iommu_device callback to +set/unset HostIOMMUDevice for a given PCI device. + +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Nicolin Chen +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/pci/pci.c | 27 +++++++++++++++++++++++++++ + include/hw/pci/pci.h | 38 +++++++++++++++++++++++++++++++++++++- + 2 files changed, 64 insertions(+), 1 deletion(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index 0884fbb760..d6f627aa51 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2775,6 +2775,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) + return &address_space_memory; + } + ++bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, ++ Error **errp) ++{ ++ PCIBus *iommu_bus; ++ ++ /* set_iommu_device requires device's direct BDF instead of aliased BDF */ ++ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); ++ if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) { ++ return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), ++ iommu_bus->iommu_opaque, ++ dev->devfn, hiod, errp); ++ } ++ return true; ++} ++ ++void pci_device_unset_iommu_device(PCIDevice *dev) ++{ ++ PCIBus *iommu_bus; ++ ++ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); ++ if (iommu_bus && iommu_bus->iommu_ops->unset_iommu_device) { ++ return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev), ++ iommu_bus->iommu_opaque, ++ dev->devfn); ++ } ++} ++ + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) + { + /* +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index cee0cf7460..8d1af44249 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -3,6 +3,7 @@ + + #include "exec/memory.h" + #include "sysemu/dma.h" ++#include "sysemu/host_iommu_device.h" + + /* PCI includes legacy ISA access. */ + #include "hw/isa/isa.h" +@@ -384,10 +385,45 @@ typedef struct PCIIOMMUOps { + * + * @devfn: device and function number + */ +- AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); ++ AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); ++ /** ++ * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU ++ * ++ * Optional callback, if not implemented in vIOMMU, then vIOMMU can't ++ * retrieve host information from the associated HostIOMMUDevice. ++ * ++ * @bus: the #PCIBus of the PCI device. ++ * ++ * @opaque: the data passed to pci_setup_iommu(). ++ * ++ * @devfn: device and function number of the PCI device. ++ * ++ * @dev: the #HostIOMMUDevice to attach. ++ * ++ * @errp: pass an Error out only when return false ++ * ++ * Returns: true if HostIOMMUDevice is attached or else false with errp set. ++ */ ++ bool (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn, ++ HostIOMMUDevice *dev, Error **errp); ++ /** ++ * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU ++ * ++ * Optional callback. ++ * ++ * @bus: the #PCIBus of the PCI device. ++ * ++ * @opaque: the data passed to pci_setup_iommu(). ++ * ++ * @devfn: device and function number of the PCI device. ++ */ ++ void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); + } PCIIOMMUOps; + + AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); ++bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, ++ Error **errp); ++void pci_device_unset_iommu_device(PCIDevice *dev); + + /** + * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus +-- +2.41.0.windows.1 + diff --git a/hw-pci-Remove-unused-pci_irq_pulse-method.patch b/hw-pci-Remove-unused-pci_irq_pulse-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..69baa9e2233c64bf18d5090b2bf422415743adda --- /dev/null +++ b/hw-pci-Remove-unused-pci_irq_pulse-method.patch @@ -0,0 +1,46 @@ +From d1b98e84eeec0b94403fb716bef41080f6bee3b3 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 12 Dec 2024 10:31:47 +0800 +Subject: [PATCH] hw/pci: Remove unused pci_irq_pulse() method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from ef45f46f382a5e2c41c39c71fd3364cff4f41bf5 + +Last use of pci_irq_pulse() was removed 7 years ago in commit +5e9aa92eb1 ("hw/block: Fix pin-based interrupt behaviour of NVMe"). + +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Thomas Huth +Message-ID: <20241122103418.539-1-philmd@linaro.org> +Signed-off-by: Thomas Huth +Signed-off-by: Zhang Jiao +--- + include/hw/pci/pci.h | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index 7cf7b5619a..cee0cf7460 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -632,16 +632,6 @@ static inline void pci_irq_deassert(PCIDevice *pci_dev) + pci_set_irq(pci_dev, 0); + } + +-/* +- * FIXME: PCI does not work this way. +- * All the callers to this method should be fixed. +- */ +-static inline void pci_irq_pulse(PCIDevice *pci_dev) +-{ +- pci_irq_assert(pci_dev); +- pci_irq_deassert(pci_dev); +-} +- + MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); + void pci_set_power(PCIDevice *pci_dev, bool state); + +-- +2.41.0.windows.1 + diff --git a/hw-pci-bridge-Add-a-Kconfig-switch-for-the-normal-PC.patch b/hw-pci-bridge-Add-a-Kconfig-switch-for-the-normal-PC.patch new file mode 100644 index 0000000000000000000000000000000000000000..1a2be67d3df0c515a95c79ac98152495ecf99aa8 --- /dev/null +++ b/hw-pci-bridge-Add-a-Kconfig-switch-for-the-normal-PC.patch @@ -0,0 +1,50 @@ +From 55ea1e473095ea5be692bb4ba2e44131a4a88e73 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Wed, 23 Oct 2024 13:40:51 +0800 +Subject: [PATCH] hw/pci-bridge: Add a Kconfig switch for the normal PCI bridge +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from e779e5c05ad5d8237e2a7d8ba8b432cd24c1708b + +The pci-bridge device is not usable on s390x, so introduce a Kconfig +switch that allows to disable it. + +Message-ID: <20240913144844.427899-1-thuth@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Cédric Le Goater +Signed-off-by: Thomas Huth +Signed-off-by: Zhang Jiao +--- + hw/pci-bridge/Kconfig | 5 +++++ + hw/pci-bridge/meson.build | 2 +- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/pci-bridge/Kconfig b/hw/pci-bridge/Kconfig +index 67077366cc..449ec98643 100644 +--- a/hw/pci-bridge/Kconfig ++++ b/hw/pci-bridge/Kconfig +@@ -1,3 +1,8 @@ ++config PCI_BRIDGE ++ bool ++ default y if PCI_DEVICES ++ depends on PCI ++ + config PCIE_PORT + bool + default y if PCI_DEVICES +diff --git a/hw/pci-bridge/meson.build b/hw/pci-bridge/meson.build +index 6d5ad9f37b..a8b88e9099 100644 +--- a/hw/pci-bridge/meson.build ++++ b/hw/pci-bridge/meson.build +@@ -1,5 +1,5 @@ + pci_ss = ss.source_set() +-pci_ss.add(files('pci_bridge_dev.c')) ++pci_ss.add(when: 'CONFIG_PCI_BRIDGE', if_true: files('pci_bridge_dev.c')) + pci_ss.add(when: 'CONFIG_I82801B11', if_true: files('i82801b11.c')) + pci_ss.add(when: 'CONFIG_IOH3420', if_true: files('ioh3420.c')) + pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pcie_root_port.c')) +-- +2.41.0.windows.1 + diff --git a/hw-pci-host-add-pci-intack-write-method.patch b/hw-pci-host-add-pci-intack-write-method.patch deleted file mode 100644 index bb09d022bb7b23fae8cc34e7c7feae65c6e5bc3e..0000000000000000000000000000000000000000 --- a/hw-pci-host-add-pci-intack-write-method.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 80214941ed6ce24983d8f161a7c9532678acc6f1 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:03:57 +0800 -Subject: [PATCH] hw/pci-host: add pci-intack write method - -fix CVE-2020-15469 - -Add pci-intack mmio write method to avoid NULL pointer dereference -issue. - -Reported-by: Lei Sun -Reviewed-by: Li Qiang -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/pci-host/prep.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c -index c564f234af..f03c81f651 100644 ---- a/hw/pci-host/prep.c -+++ b/hw/pci-host/prep.c -@@ -26,6 +26,7 @@ - #include "qemu/osdep.h" - #include "qemu-common.h" - #include "qemu/units.h" -+#include "qemu/log.h" - #include "qapi/error.h" - #include "hw/hw.h" - #include "hw/pci/pci.h" -@@ -117,8 +118,15 @@ static uint64_t raven_intack_read(void *opaque, hwaddr addr, - return pic_read_irq(isa_pic); - } - -+static void raven_intack_write(void *opaque, hwaddr addr, -+ uint64_t data, unsigned size) -+{ -+ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); -+} -+ - static const MemoryRegionOps raven_intack_ops = { - .read = raven_intack_read, -+ .write = raven_intack_write, - .valid = { - .max_access_size = 1, - }, --- -2.27.0 - diff --git a/hw-pci-host-designware-Fix-ATU_UPPER_TARGET-register.patch b/hw-pci-host-designware-Fix-ATU_UPPER_TARGET-register.patch new file mode 100644 index 0000000000000000000000000000000000000000..5660bdedb327111214a7242dd66a68749da84725 --- /dev/null +++ b/hw-pci-host-designware-Fix-ATU_UPPER_TARGET-register.patch @@ -0,0 +1,41 @@ +From c1f1346eea8da6552e085aa13630bbf5227db00f Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 7 Apr 2025 12:54:10 -0400 +Subject: [PATCH] hw/pci-host/designware: Fix ATU_UPPER_TARGET register access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 04e99f9eb7920b0f0fcce65686c3bedf5e32a1f9 + +Fix copy/paste error writing to the ATU_UPPER_TARGET +register, we want to update the upper 32 bits. + +Cc: qemu-stable@nongnu.org +Reported-by: Joey +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2861 +Fixes: d64e5eabc4c ("pci: Add support for Designware IP block") +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Gustavo Romero +Message-Id: <20250331152041.74533-2-philmd@linaro.org> +Signed-off-by: qihao_yewu +--- + hw/pci-host/designware.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c +index f477f97847..004142709c 100644 +--- a/hw/pci-host/designware.c ++++ b/hw/pci-host/designware.c +@@ -360,7 +360,7 @@ static void designware_pcie_root_config_write(PCIDevice *d, uint32_t address, + + case DESIGNWARE_PCIE_ATU_UPPER_TARGET: + viewport->target &= 0x00000000FFFFFFFFULL; +- viewport->target |= val; ++ viewport->target |= (uint64_t)val << 32; + break; + + case DESIGNWARE_PCIE_ATU_LIMIT: +-- +2.41.0.windows.1 + diff --git a/hw-pci-host-gpex-Define-properties-for-MMIO-ranges.patch b/hw-pci-host-gpex-Define-properties-for-MMIO-ranges.patch new file mode 100644 index 0000000000000000000000000000000000000000..787bd0e5afac2da3420ec21709adc2f8ef948c57 --- /dev/null +++ b/hw-pci-host-gpex-Define-properties-for-MMIO-ranges.patch @@ -0,0 +1,128 @@ +From 7ec434a0f9935a7a6a14896140f33c7e1436111e Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:34 +0800 +Subject: [PATCH 10/18] hw/pci-host/gpex: Define properties for MMIO ranges + +commit 8f6a4874887c226b0df35f5b78fa77f197507d96 upstream + +ACPI DSDT generator needs information like ECAM range, PIO range, 32-bit +and 64-bit PCI MMIO range etc related to the PCI host bridge. Instead of +making these values machine specific, create properties for the GPEX +host bridge with default value 0. During initialization, the firmware +can initialize these properties with correct values for the platform. +This basically allows DSDT generator code independent of the machine +specific memory map accesses. + +Suggested-by: Igor Mammedov +Signed-off-by: Sunil V L +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Reviewed-by: Daniel Henrique Barboza +Message-ID: <20231218150247.466427-11-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/pci-host/gpex-acpi.c | 13 +++++++++++++ + hw/pci-host/gpex.c | 12 ++++++++++++ + include/hw/pci-host/gpex.h | 30 +++++++++++++++++++++--------- + 3 files changed, 46 insertions(+), 9 deletions(-) + +diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c +index 162f6221ab..020ded0ff6 100644 +--- a/hw/pci-host/gpex-acpi.c ++++ b/hw/pci-host/gpex-acpi.c +@@ -296,3 +296,16 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) + + crs_range_set_free(&crs_range_set); + } ++ ++void acpi_dsdt_add_gpex_host(Aml *scope, uint32_t irq) ++{ ++ bool ambig; ++ Object *obj = object_resolve_path_type("", TYPE_GPEX_HOST, &ambig); ++ ++ if (!obj || ambig) { ++ return; ++ } ++ ++ GPEX_HOST(obj)->gpex_cfg.irq = irq; ++ acpi_dsdt_add_gpex(scope, &GPEX_HOST(obj)->gpex_cfg); ++} +diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c +index a6752fac5e..41f4e73f6e 100644 +--- a/hw/pci-host/gpex.c ++++ b/hw/pci-host/gpex.c +@@ -154,6 +154,18 @@ static Property gpex_host_properties[] = { + */ + DEFINE_PROP_BOOL("allow-unmapped-accesses", GPEXHost, + allow_unmapped_accesses, true), ++ DEFINE_PROP_UINT64(PCI_HOST_ECAM_BASE, GPEXHost, gpex_cfg.ecam.base, 0), ++ DEFINE_PROP_SIZE(PCI_HOST_ECAM_SIZE, GPEXHost, gpex_cfg.ecam.size, 0), ++ DEFINE_PROP_UINT64(PCI_HOST_PIO_BASE, GPEXHost, gpex_cfg.pio.base, 0), ++ DEFINE_PROP_SIZE(PCI_HOST_PIO_SIZE, GPEXHost, gpex_cfg.pio.size, 0), ++ DEFINE_PROP_UINT64(PCI_HOST_BELOW_4G_MMIO_BASE, GPEXHost, ++ gpex_cfg.mmio32.base, 0), ++ DEFINE_PROP_SIZE(PCI_HOST_BELOW_4G_MMIO_SIZE, GPEXHost, ++ gpex_cfg.mmio32.size, 0), ++ DEFINE_PROP_UINT64(PCI_HOST_ABOVE_4G_MMIO_BASE, GPEXHost, ++ gpex_cfg.mmio64.base, 0), ++ DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MMIO_SIZE, GPEXHost, ++ gpex_cfg.mmio64.size, 0), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h +index 65475f7f9d..c414ae5190 100644 +--- a/include/hw/pci-host/gpex.h ++++ b/include/hw/pci-host/gpex.h +@@ -40,6 +40,16 @@ struct GPEXRootState { + /*< public >*/ + }; + ++struct GPEXConfig { ++ MemMapEntry ecam; ++ MemMapEntry mmio32; ++ MemMapEntry mmio64; ++ MemMapEntry pio; ++ int irq; ++ PCIBus *bus; ++ bool preserve_config; ++}; ++ + struct GPEXHost { + /*< private >*/ + PCIExpressHost parent_obj; +@@ -55,20 +65,22 @@ struct GPEXHost { + int irq_num[GPEX_NUM_IRQS]; + + bool allow_unmapped_accesses; +-}; + +-struct GPEXConfig { +- MemMapEntry ecam; +- MemMapEntry mmio32; +- MemMapEntry mmio64; +- MemMapEntry pio; +- int irq; +- PCIBus *bus; +- bool preserve_config; ++ struct GPEXConfig gpex_cfg; + }; + + int gpex_set_irq_num(GPEXHost *s, int index, int gsi); + + void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg); ++void acpi_dsdt_add_gpex_host(Aml *scope, uint32_t irq); ++ ++#define PCI_HOST_PIO_BASE "x-pio-base" ++#define PCI_HOST_PIO_SIZE "x-pio-size" ++#define PCI_HOST_ECAM_BASE "x-ecam-base" ++#define PCI_HOST_ECAM_SIZE "x-ecam-size" ++#define PCI_HOST_BELOW_4G_MMIO_BASE "x-below-4g-mmio-base" ++#define PCI_HOST_BELOW_4G_MMIO_SIZE "x-below-4g-mmio-size" ++#define PCI_HOST_ABOVE_4G_MMIO_BASE "x-above-4g-mmio-base" ++#define PCI_HOST_ABOVE_4G_MMIO_SIZE "x-above-4g-mmio-size" + + #endif /* HW_GPEX_H */ +-- +2.33.0 + diff --git a/hw-pci-host-gpex-needs-kernel-fix-Allow-to-generate-.patch b/hw-pci-host-gpex-needs-kernel-fix-Allow-to-generate-.patch new file mode 100644 index 0000000000000000000000000000000000000000..cbe78ad8f1f01a3b0ce13bf1a02d5338035b323f --- /dev/null +++ b/hw-pci-host-gpex-needs-kernel-fix-Allow-to-generate-.patch @@ -0,0 +1,119 @@ +From 37308e60d43323c0ea65d734487ce6542f8a9d3b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 5 Oct 2021 10:53:12 +0200 +Subject: [PATCH] hw/pci-host/gpex: [needs kernel fix] Allow to generate + preserve boot config DSM #5 + +Add a 'preserve_config' field in struct GPEXConfig and +if set, generate the DSM #5 for preserving PCI boot configurations. +The DSM presence is needed to expose RMRs. + +At the moment the DSM generation is not yet enabled. + +Signed-off-by: Eric Auger +--- + hw/pci-host/gpex-acpi.c | 35 +++++++++++++++++++++++++++++++---- + include/hw/pci-host/gpex.h | 1 + + 2 files changed, 32 insertions(+), 4 deletions(-) + +diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c +index ac5d229757..ce424fc9da 100644 +--- a/hw/pci-host/gpex-acpi.c ++++ b/hw/pci-host/gpex-acpi.c +@@ -49,9 +49,10 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) + } + } + +-static void acpi_dsdt_add_pci_osc(Aml *dev) ++static void acpi_dsdt_add_pci_osc(Aml *dev, bool preserve_config) + { + Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf; ++ uint8_t byte_list[1] = {0}; + + /* Declare an _OSC (OS Control Handoff) method */ + aml_append(dev, aml_name_decl("SUPP", aml_int(0))); +@@ -113,10 +114,24 @@ static void acpi_dsdt_add_pci_osc(Aml *dev) + UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); + ifctx = aml_if(aml_equal(aml_arg(0), UUID)); + ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(0))); +- uint8_t byte_list[1] = {0}; ++ if (preserve_config) { ++ /* support for functions other than function 0 and function 5 */ ++ byte_list[0] = 0x21; ++ } + buf = aml_buffer(1, byte_list); + aml_append(ifctx1, aml_return(buf)); + aml_append(ifctx, ifctx1); ++ ++ if (preserve_config) { ++ Aml *ifctx2 = aml_if(aml_equal(aml_arg(2), aml_int(5))); ++ /* ++ * 0 - The operating system must not ignore the PCI configuration that ++ * firmware has done at boot time. ++ */ ++ aml_append(ifctx2, aml_return(aml_int(0))); ++ aml_append(ifctx, ifctx2); ++ } ++ + aml_append(method, ifctx); + + byte_list[0] = 0; +@@ -174,6 +189,12 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) + aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); + } + ++ if (cfg->preserve_config) { ++ method = aml_method("_DSM", 5, AML_SERIALIZED); ++ aml_append(method, aml_return(aml_int(0))); ++ aml_append(dev, method); ++ } ++ + acpi_dsdt_add_pci_route_table(dev, cfg->irq); + + /* +@@ -188,7 +209,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) + if (is_cxl) { + build_cxl_osc_method(dev); + } else { +- acpi_dsdt_add_pci_osc(dev); ++ acpi_dsdt_add_pci_osc(dev, cfg->preserve_config); + } + + aml_append(scope, dev); +@@ -205,6 +226,12 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) + aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device"))); + aml_append(dev, aml_name_decl("_CCA", aml_int(1))); + ++ if (cfg->preserve_config) { ++ method = aml_method("_DSM", 5, AML_SERIALIZED); ++ aml_append(method, aml_return(aml_int(0))); ++ aml_append(dev, method); ++ } ++ + acpi_dsdt_add_pci_route_table(dev, cfg->irq); + + method = aml_method("_CBA", 0, AML_NOTSERIALIZED); +@@ -263,7 +290,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) + } + aml_append(dev, aml_name_decl("_CRS", rbuf)); + +- acpi_dsdt_add_pci_osc(dev); ++ acpi_dsdt_add_pci_osc(dev, cfg->preserve_config); + + Aml *dev_res0 = aml_device("%s", "RES0"); + aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02"))); +diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h +index b0240bd768..65475f7f9d 100644 +--- a/include/hw/pci-host/gpex.h ++++ b/include/hw/pci-host/gpex.h +@@ -64,6 +64,7 @@ struct GPEXConfig { + MemMapEntry pio; + int irq; + PCIBus *bus; ++ bool preserve_config; + }; + + int gpex_set_irq_num(GPEXHost *s, int index, int gsi); +-- +2.41.0.windows.1 + diff --git a/hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch b/hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch deleted file mode 100644 index 76497d9ef4f4e111baba53cdd84ac7b7dbecb112..0000000000000000000000000000000000000000 --- a/hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 595a0d0a0f21cd73863ea3b78ecccb6e0ea8b7a8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Mon, 1 Jun 2020 16:29:25 +0200 -Subject: [PATCH 2/5] hw/pci/pci_bridge: Correct pci_bridge_io memory region - size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -memory_region_set_size() handle the 16 Exabytes limit by -special-casing the UINT64_MAX value. This is not a problem -for the 32-bit maximum, 4 GiB. -By using the UINT32_MAX value, the pci_bridge_io MemoryRegion -ends up missing 1 byte: - - (qemu) info mtree - memory-region: pci_bridge_io - 0000000000000000-00000000fffffffe (prio 0, i/o): pci_bridge_io - 0000000000000060-0000000000000060 (prio 0, i/o): i8042-data - 0000000000000064-0000000000000064 (prio 0, i/o): i8042-cmd - 00000000000001ce-00000000000001d1 (prio 0, i/o): vbe - 0000000000000378-000000000000037f (prio 0, i/o): parallel - 00000000000003b4-00000000000003b5 (prio 0, i/o): vga - ... - -Fix by using the correct value. We now have: - - memory-region: pci_bridge_io - 0000000000000000-00000000ffffffff (prio 0, i/o): pci_bridge_io - 0000000000000060-0000000000000060 (prio 0, i/o): i8042-data - 0000000000000064-0000000000000064 (prio 0, i/o): i8042-cmd - ... - -Reviewed-by: Peter Maydell -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20200601142930.29408-4-f4bug@amsat.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Richard Henderson ---- - hw/pci/pci_bridge.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c -index 715b9a4f..d67c691d 100644 ---- a/hw/pci/pci_bridge.c -+++ b/hw/pci/pci_bridge.c -@@ -30,6 +30,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/units.h" - #include "hw/pci/pci_bridge.h" - #include "hw/pci/pci_bus.h" - #include "qemu/module.h" -@@ -381,7 +382,7 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) - memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX); - sec_bus->address_space_io = &br->address_space_io; - memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", -- UINT32_MAX); -+ 4 * GiB); - br->windows = pci_bridge_region_init(br); - QLIST_INIT(&sec_bus->child); - QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); --- -2.23.0 - diff --git a/hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch b/hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch deleted file mode 100644 index e2f772c6ac1ac3c7b5cdcbf5e2ac033903a723e3..0000000000000000000000000000000000000000 --- a/hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 86f70ed090478cc3b569b3606eb2723a0baadb52 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Tue, 16 Jun 2020 12:25:36 -0400 -Subject: [PATCH] hw/pci/pcie: Move hot plug capability check to pre_plug - callback - -RH-Author: Julia Suvorova -Message-id: <20200616122536.1027685-1-jusual@redhat.com> -Patchwork-id: 97548 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] hw/pci/pcie: Move hot plug capability check to pre_plug callback -Bugzilla: 1820531 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Auger Eric -RH-Acked-by: Sergio Lopez Pascual - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1820531 -BRANCH: rhel-av-8.2.1 -UPSTREAM: merged -BREW: 29422092 - -Check for hot plug capability earlier to avoid removing devices attached -during the initialization process. - -Run qemu with an unattached drive: - -drive file=$FILE,if=none,id=drive0 \ - -device pcie-root-port,id=rp0,slot=3,bus=pcie.0,hotplug=off -Hotplug a block device: - device_add virtio-blk-pci,id=blk0,drive=drive0,bus=rp0 -If hotplug fails on plug_cb, drive0 will be deleted. - -Fixes: 0501e1aa1d32a6 ("hw/pci/pcie: Forbid hot-plug if it's disabled on the slot") - -Acked-by: Igor Mammedov -Signed-off-by: Julia Suvorova -Message-Id: <20200604125947.881210-1-jusual@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0dabc0f6544f2c0310546f6d6cf3b68979580a9c) -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/pci/pcie.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index 2b4eedd2bb..b5190a3a55 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -419,6 +419,17 @@ static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, - void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) - { -+ PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); -+ uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; -+ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); -+ -+ /* Check if hot-plug is disabled on the slot */ -+ if (dev->hotplugged && (sltcap & PCI_EXP_SLTCAP_HPC) == 0) { -+ error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", -+ DEVICE(hotplug_pdev)->id); -+ return; -+ } -+ - pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, errp); - } - --- -2.27.0 - diff --git a/hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch b/hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch deleted file mode 100644 index ad3fc3a8356d20d28805db548c12b2b3745e8054..0000000000000000000000000000000000000000 --- a/hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 95cbe18c649a20f98562a993537a67e0ad78bf36 Mon Sep 17 00:00:00 2001 -From: Stefan Berger -Date: Tue, 21 Jan 2020 10:29:34 -0500 -Subject: [PATCH 08/19] hw/ppc/Kconfig: Enable TPM_SPAPR as part of PSERIES - config -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Stefan Berger -Reviewed-by: Marc-André Lureau -Reviewed-by: David Gibson -Message-Id: <20200121152935.649898-6-stefanb@linux.ibm.com> -[dwg: Use default in Kconfig rather than select to avoid breaking - Windows host build] -Signed-off-by: David Gibson -Signed-off-by: jiangfangjie ---- - hw/tpm/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig -index 4d4ab085..9e67d990 100644 ---- a/hw/tpm/Kconfig -+++ b/hw/tpm/Kconfig -@@ -25,6 +25,6 @@ config TPM_EMULATOR - - config TPM_SPAPR - bool -- default n -+ default y - depends on TPM && PSERIES - select TPMDEV --- -2.23.0 - diff --git a/hw-ppc-e500-Add-missing-device-tree-properties-to-i2.patch b/hw-ppc-e500-Add-missing-device-tree-properties-to-i2.patch new file mode 100644 index 0000000000000000000000000000000000000000..267e5406d5f2bdeb7ed031d1a5f92aeca2964f75 --- /dev/null +++ b/hw-ppc-e500-Add-missing-device-tree-properties-to-i2.patch @@ -0,0 +1,44 @@ +From e025c40fac7d6cc5b4752c392a9c66a074dcaa0b Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 14 Nov 2024 14:24:58 +0800 +Subject: [PATCH] hw/ppc/e500: Add missing device tree properties to i2c + controller node +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from b5d65592d931d07d4f4bcb915d018ec9598058b4 + +When compiling a decompiled device tree blob created with dumpdtb, dtc complains +with: + + /soc@e0000000/i2c@3000: incorrect #address-cells for I2C bus + /soc@e0000000/i2c@3000: incorrect #size-cells for I2C bus + +Fix this by adding the missing device tree properties. + +Reviewed-by: Cédric Le Goater +Signed-off-by: Bernhard Beschow +Message-ID: <20241103133412.73536-6-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Jiao +--- + hw/ppc/e500.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c +index 384226296b..8d394d749a 100644 +--- a/hw/ppc/e500.c ++++ b/hw/ppc/e500.c +@@ -203,6 +203,8 @@ static void dt_i2c_create(void *fdt, const char *soc, const char *mpic, + qemu_fdt_setprop_cells(fdt, i2c, "cell-index", 0); + qemu_fdt_setprop_cells(fdt, i2c, "interrupts", irq0, 0x2); + qemu_fdt_setprop_phandle(fdt, i2c, "interrupt-parent", mpic); ++ qemu_fdt_setprop_cell(fdt, i2c, "#size-cells", 0); ++ qemu_fdt_setprop_cell(fdt, i2c, "#address-cells", 1); + qemu_fdt_setprop_string(fdt, "/aliases", alias, i2c); + + g_free(i2c); +-- +2.41.0.windows.1 + diff --git a/hw-ppc-e500-Prefer-QOM-cast.patch b/hw-ppc-e500-Prefer-QOM-cast.patch new file mode 100644 index 0000000000000000000000000000000000000000..3008c3053135b237d600e5dcba2bb030f0094a9b --- /dev/null +++ b/hw-ppc-e500-Prefer-QOM-cast.patch @@ -0,0 +1,44 @@ +From b85c8374d8b78a6ec1c250bb7562423e6f5d89a0 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 14 Nov 2024 15:12:32 +0800 +Subject: [PATCH] hw/ppc/e500: Prefer QOM cast +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from c620b4ee92ed3664a3d98e0fbb0b651e19fba5b6 + +Reviewed-by: BALATON Zoltan +Signed-off-by: Bernhard Beschow +Message-ID: <20241103133412.73536-4-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Jiao +--- + hw/ppc/e500.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c +index 384226296b..df5a20d3ec 100644 +--- a/hw/ppc/e500.c ++++ b/hw/ppc/e500.c +@@ -1024,7 +1024,7 @@ void ppce500_init(MachineState *machine) + sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC8544_I2C_IRQ)); + memory_region_add_subregion(ccsr_addr_space, MPC8544_I2C_REGS_OFFSET, + sysbus_mmio_get_region(s, 0)); +- i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c"); ++ i2c = I2C_BUS(qdev_get_child_bus(dev, "i2c")); + i2c_slave_create_simple(i2c, "ds1338", RTC_REGS_OFFSET); + + /* eSDHC */ +@@ -1073,7 +1073,7 @@ void ppce500_init(MachineState *machine) + memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET, + sysbus_mmio_get_region(s, 0)); + +- pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0"); ++ pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0")); + if (!pci_bus) + printf("couldn't create PCI controller!\n"); + +-- +2.41.0.windows.1 + diff --git a/hw-ppc-e500-Remove-unused-irqs-parameter.patch b/hw-ppc-e500-Remove-unused-irqs-parameter.patch new file mode 100644 index 0000000000000000000000000000000000000000..4b881490ea5c97ba06a82aac8ead470499aea7e2 --- /dev/null +++ b/hw-ppc-e500-Remove-unused-irqs-parameter.patch @@ -0,0 +1,44 @@ +From 239e256d5510b9aaa3e099359dcda54970e2f08a Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 14 Nov 2024 14:40:02 +0800 +Subject: [PATCH] hw/ppc/e500: Remove unused "irqs" parameter +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 2a309354ac5decf78763c9de999bfb42c8612069 + +Reviewed-by: BALATON Zoltan +Signed-off-by: Bernhard Beschow +Message-ID: <20241103133412.73536-5-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Jiao +--- + hw/ppc/e500.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c +index 384226296b..8ab1ccc969 100644 +--- a/hw/ppc/e500.c ++++ b/hw/ppc/e500.c +@@ -832,7 +832,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms, + } + + static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc, +- IrqLines *irqs, Error **errp) ++ Error **errp) + { + #ifdef CONFIG_KVM + DeviceState *dev; +@@ -872,7 +872,7 @@ static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms, + Error *err = NULL; + + if (kvm_kernel_irqchip_allowed()) { +- dev = ppce500_init_mpic_kvm(pmc, irqs, &err); ++ dev = ppce500_init_mpic_kvm(pmc, &err); + } + if (kvm_kernel_irqchip_required() && !dev) { + error_reportf_err(err, +-- +2.41.0.windows.1 + diff --git a/hw-remote-vfio-user-Fix-config-space-access-byte-ord.patch b/hw-remote-vfio-user-Fix-config-space-access-byte-ord.patch new file mode 100644 index 0000000000000000000000000000000000000000..7abb7bf0dcafaa4050e26910c300ee5a0a78adf3 --- /dev/null +++ b/hw-remote-vfio-user-Fix-config-space-access-byte-ord.patch @@ -0,0 +1,49 @@ +From 6165cf85acd2600c8e0edb062d627e4cb42083af Mon Sep 17 00:00:00 2001 +From: Mattias Nissler +Date: Wed, 23 Aug 2023 02:29:30 -0700 +Subject: [PATCH] hw/remote/vfio-user: Fix config space access byte order +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +PCI config space is little-endian, so on a big-endian host we need to +perform byte swaps for values as they are passed to and received from +the generic PCI config space access machinery. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Jagannathan Raman +Signed-off-by: Mattias Nissler +Message-ID: <20240507094210.300566-6-mnissler@rivosinc.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit e6578f1f68a0e90789a841ada532c3e494c9a04c) +Signed-off-by: zhujun2 +--- + hw/remote/vfio-user-obj.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c +index 8b10c32a3c..8b708422fe 100644 +--- a/hw/remote/vfio-user-obj.c ++++ b/hw/remote/vfio-user-obj.c +@@ -281,7 +281,7 @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, + while (bytes > 0) { + len = (bytes > pci_access_width) ? pci_access_width : bytes; + if (is_write) { +- memcpy(&val, ptr, len); ++ val = ldn_le_p(ptr, len); + pci_host_config_write_common(o->pci_dev, offset, + pci_config_size(o->pci_dev), + val, len); +@@ -289,7 +289,7 @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, + } else { + val = pci_host_config_read_common(o->pci_dev, offset, + pci_config_size(o->pci_dev), len); +- memcpy(ptr, &val, len); ++ stn_le_p(ptr, len, val); + trace_vfu_cfg_read(offset, val); + } + offset += len; +-- +2.41.0.windows.1 + diff --git a/hw-riscv-virt-Make-few-IMSIC-macros-and-functions-pu.patch b/hw-riscv-virt-Make-few-IMSIC-macros-and-functions-pu.patch new file mode 100644 index 0000000000000000000000000000000000000000..d894084624b5804af48313d34b67e41e95dc5043 --- /dev/null +++ b/hw-riscv-virt-Make-few-IMSIC-macros-and-functions-pu.patch @@ -0,0 +1,117 @@ +From 2ce3c25215a931f183ad530baba7a07ddd55cfe6 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:27 +0800 +Subject: [PATCH 04/18] hw/riscv: virt: Make few IMSIC macros and functions + public + +commit 68c8b403c78b8f20acbebba3cdc46320853fe5ca upstream + +Some macros and static function related to IMSIC are defined in virt.c. +They are required in virt-acpi-build.c. So, make them public. + +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Alistair Francis +Reviewed-by: Andrew Jones +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-5-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt.c | 25 +------------------------ + include/hw/riscv/virt.h | 25 +++++++++++++++++++++++++ + 2 files changed, 26 insertions(+), 24 deletions(-) + +diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c +index 9b29ed1108..30b9f8cab6 100644 +--- a/hw/riscv/virt.c ++++ b/hw/riscv/virt.c +@@ -39,7 +39,6 @@ + #include "hw/firmware/smbios.h" + #include "hw/intc/riscv_aclint.h" + #include "hw/intc/riscv_aplic.h" +-#include "hw/intc/riscv_imsic.h" + #include "hw/intc/sifive_plic.h" + #include "hw/misc/sifive_test.h" + #include "hw/platform-bus.h" +@@ -55,28 +54,6 @@ + #include "hw/acpi/aml-build.h" + #include "qapi/qapi-visit-common.h" + +-/* +- * The virt machine physical address space used by some of the devices +- * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets, +- * number of CPUs, and number of IMSIC guest files. +- * +- * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS, +- * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization +- * of virt machine physical address space. +- */ +- +-#define VIRT_IMSIC_GROUP_MAX_SIZE (1U << IMSIC_MMIO_GROUP_MIN_SHIFT) +-#if VIRT_IMSIC_GROUP_MAX_SIZE < \ +- IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS) +-#error "Can't accommodate single IMSIC group in address space" +-#endif +- +-#define VIRT_IMSIC_MAX_SIZE (VIRT_SOCKETS_MAX * \ +- VIRT_IMSIC_GROUP_MAX_SIZE) +-#if 0x4000000 < VIRT_IMSIC_MAX_SIZE +-#error "Can't accommodate all IMSIC groups in address space" +-#endif +- + /* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */ + static bool virt_use_kvm_aia(RISCVVirtState *s) + { +@@ -513,7 +490,7 @@ static void create_fdt_socket_plic(RISCVVirtState *s, + g_free(plic_cells); + } + +-static uint32_t imsic_num_bits(uint32_t count) ++uint32_t imsic_num_bits(uint32_t count) + { + uint32_t ret = 0; + +diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h +index e5c474b26e..5b03575ed3 100644 +--- a/include/hw/riscv/virt.h ++++ b/include/hw/riscv/virt.h +@@ -23,6 +23,7 @@ + #include "hw/riscv/riscv_hart.h" + #include "hw/sysbus.h" + #include "hw/block/flash.h" ++#include "hw/intc/riscv_imsic.h" + + #define VIRT_CPUS_MAX_BITS 9 + #define VIRT_CPUS_MAX (1 << VIRT_CPUS_MAX_BITS) +@@ -127,4 +128,28 @@ enum { + + bool virt_is_acpi_enabled(RISCVVirtState *s); + void virt_acpi_setup(RISCVVirtState *vms); ++uint32_t imsic_num_bits(uint32_t count); ++ ++/* ++ * The virt machine physical address space used by some of the devices ++ * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets, ++ * number of CPUs, and number of IMSIC guest files. ++ * ++ * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS, ++ * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization ++ * of virt machine physical address space. ++ */ ++ ++#define VIRT_IMSIC_GROUP_MAX_SIZE (1U << IMSIC_MMIO_GROUP_MIN_SHIFT) ++#if VIRT_IMSIC_GROUP_MAX_SIZE < \ ++ IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS) ++#error "Can't accomodate single IMSIC group in address space" ++#endif ++ ++#define VIRT_IMSIC_MAX_SIZE (VIRT_SOCKETS_MAX * \ ++ VIRT_IMSIC_GROUP_MAX_SIZE) ++#if 0x4000000 < VIRT_IMSIC_MAX_SIZE ++#error "Can't accomodate all IMSIC groups in address space" ++#endif ++ + #endif +-- +2.33.0 + diff --git a/hw-riscv-virt-Update-GPEX-MMIO-related-properties.patch b/hw-riscv-virt-Update-GPEX-MMIO-related-properties.patch new file mode 100644 index 0000000000000000000000000000000000000000..818ec8961b517fbb692906ee147809a4bd437a99 --- /dev/null +++ b/hw-riscv-virt-Update-GPEX-MMIO-related-properties.patch @@ -0,0 +1,117 @@ +From 9367132cdc0340ebc17d434527818fe87f1f0fa4 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:35 +0800 +Subject: [PATCH 11/18] hw/riscv/virt: Update GPEX MMIO related properties + +commit e86e95270e2b10e57c69852778452b54b31e1c19 upstream + +Update the GPEX host bridge properties related to MMIO ranges with +values set for the virt machine. + +Suggested-by: Igor Mammedov +Signed-off-by: Sunil V L +Reviewed-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-12-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt.c | 47 ++++++++++++++++++++++++++++------------- + include/hw/riscv/virt.h | 1 + + 2 files changed, 33 insertions(+), 15 deletions(-) + +diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c +index 30b9f8cab6..c47df46bfc 100644 +--- a/hw/riscv/virt.c ++++ b/hw/riscv/virt.c +@@ -1055,21 +1055,45 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) + } + + static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, +- hwaddr ecam_base, hwaddr ecam_size, +- hwaddr mmio_base, hwaddr mmio_size, +- hwaddr high_mmio_base, +- hwaddr high_mmio_size, +- hwaddr pio_base, +- DeviceState *irqchip) ++ DeviceState *irqchip, ++ RISCVVirtState *s) + { + DeviceState *dev; + MemoryRegion *ecam_alias, *ecam_reg; + MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg; ++ hwaddr ecam_base = s->memmap[VIRT_PCIE_ECAM].base; ++ hwaddr ecam_size = s->memmap[VIRT_PCIE_ECAM].size; ++ hwaddr mmio_base = s->memmap[VIRT_PCIE_MMIO].base; ++ hwaddr mmio_size = s->memmap[VIRT_PCIE_MMIO].size; ++ hwaddr high_mmio_base = virt_high_pcie_memmap.base; ++ hwaddr high_mmio_size = virt_high_pcie_memmap.size; ++ hwaddr pio_base = s->memmap[VIRT_PCIE_PIO].base; ++ hwaddr pio_size = s->memmap[VIRT_PCIE_PIO].size; + qemu_irq irq; + int i; + + dev = qdev_new(TYPE_GPEX_HOST); + ++ /* Set GPEX object properties for the virt machine */ ++ object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE, ++ ecam_base, NULL); ++ object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE, ++ ecam_size, NULL); ++ object_property_set_uint(OBJECT(GPEX_HOST(dev)), ++ PCI_HOST_BELOW_4G_MMIO_BASE, ++ mmio_base, NULL); ++ object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE, ++ mmio_size, NULL); ++ object_property_set_uint(OBJECT(GPEX_HOST(dev)), ++ PCI_HOST_ABOVE_4G_MMIO_BASE, ++ high_mmio_base, NULL); ++ object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE, ++ high_mmio_size, NULL); ++ object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE, ++ pio_base, NULL); ++ object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE, ++ pio_size, NULL); ++ + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + ecam_alias = g_new0(MemoryRegion, 1); +@@ -1100,6 +1124,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, + gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i); + } + ++ GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus; + return dev; + } + +@@ -1536,15 +1561,7 @@ static void virt_machine_init(MachineState *machine) + qdev_get_gpio_in(virtio_irqchip, VIRTIO_IRQ + i)); + } + +- gpex_pcie_init(system_memory, +- memmap[VIRT_PCIE_ECAM].base, +- memmap[VIRT_PCIE_ECAM].size, +- memmap[VIRT_PCIE_MMIO].base, +- memmap[VIRT_PCIE_MMIO].size, +- virt_high_pcie_memmap.base, +- virt_high_pcie_memmap.size, +- memmap[VIRT_PCIE_PIO].base, +- pcie_irqchip); ++ gpex_pcie_init(system_memory, pcie_irqchip, s); + + create_platform_bus(s, mmio_irqchip); + +diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h +index 5b03575ed3..f89790fd58 100644 +--- a/include/hw/riscv/virt.h ++++ b/include/hw/riscv/virt.h +@@ -61,6 +61,7 @@ struct RISCVVirtState { + char *oem_table_id; + OnOffAuto acpi; + const MemMapEntry *memmap; ++ struct GPEXHost *gpex_host; + }; + + enum { +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-AIA-support-in-RINTC.patch b/hw-riscv-virt-acpi-build.c-Add-AIA-support-in-RINTC.patch new file mode 100644 index 0000000000000000000000000000000000000000..14d6e29eb3ff2ef26a8920d2e130deea1ac45d3a --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-AIA-support-in-RINTC.patch @@ -0,0 +1,114 @@ +From bd3c5ec667493e193db4a2fdf165b96f45af8262 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:28 +0800 +Subject: [PATCH 05/18] hw/riscv/virt-acpi-build.c: Add AIA support in RINTC + +commit 0efb12b713338e2be713b689d1c9743f7163f85d upstream + +Update the RINTC structure in MADT with AIA related fields. + +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Acked-by: Alistair Francis +Reviewed-by: Andrew Jones +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-6-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 43 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 39 insertions(+), 4 deletions(-) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index d8772c2821..3f9536356e 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -38,6 +38,7 @@ + #include "hw/intc/riscv_aclint.h" + + #define ACPI_BUILD_TABLE_SIZE 0x20000 ++#define ACPI_BUILD_INTC_ID(socket, index) ((socket << 24) | (index)) + + typedef struct AcpiBuildState { + /* Copy of table in RAM (for patching) */ +@@ -59,17 +60,50 @@ static void acpi_align_size(GArray *blob, unsigned align) + + static void riscv_acpi_madt_add_rintc(uint32_t uid, + const CPUArchIdList *arch_ids, +- GArray *entry) ++ GArray *entry, ++ RISCVVirtState *s) + { ++ uint8_t guest_index_bits = imsic_num_bits(s->aia_guests + 1); + uint64_t hart_id = arch_ids->cpus[uid].arch_id; ++ uint32_t imsic_size, local_cpu_id, socket_id; ++ uint64_t imsic_socket_addr, imsic_addr; ++ MachineState *ms = MACHINE(s); + ++ socket_id = arch_ids->cpus[uid].props.node_id; ++ local_cpu_id = (arch_ids->cpus[uid].arch_id - ++ riscv_socket_first_hartid(ms, socket_id)) % ++ riscv_socket_hart_count(ms, socket_id); ++ imsic_socket_addr = s->memmap[VIRT_IMSIC_S].base + ++ (socket_id * VIRT_IMSIC_GROUP_MAX_SIZE); ++ imsic_size = IMSIC_HART_SIZE(guest_index_bits); ++ imsic_addr = imsic_socket_addr + local_cpu_id * imsic_size; + build_append_int_noprefix(entry, 0x18, 1); /* Type */ +- build_append_int_noprefix(entry, 20, 1); /* Length */ ++ build_append_int_noprefix(entry, 36, 1); /* Length */ + build_append_int_noprefix(entry, 1, 1); /* Version */ + build_append_int_noprefix(entry, 0, 1); /* Reserved */ + build_append_int_noprefix(entry, 0x1, 4); /* Flags */ + build_append_int_noprefix(entry, hart_id, 8); /* Hart ID */ + build_append_int_noprefix(entry, uid, 4); /* ACPI Processor UID */ ++ /* External Interrupt Controller ID */ ++ if (s->aia_type == VIRT_AIA_TYPE_APLIC) { ++ build_append_int_noprefix(entry, ++ ACPI_BUILD_INTC_ID( ++ arch_ids->cpus[uid].props.node_id, ++ local_cpu_id), ++ 4); ++ } else { ++ build_append_int_noprefix(entry, 0, 4); ++ } ++ ++ if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { ++ /* IMSIC Base address */ ++ build_append_int_noprefix(entry, imsic_addr, 8); ++ /* IMSIC Size */ ++ build_append_int_noprefix(entry, imsic_size, 4); ++ } else { ++ build_append_int_noprefix(entry, 0, 8); ++ build_append_int_noprefix(entry, 0, 4); ++ } + } + + static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) +@@ -88,7 +122,7 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) + aml_int(arch_ids->cpus[i].arch_id))); + + /* build _MAT object */ +- riscv_acpi_madt_add_rintc(i, arch_ids, madt_buf); ++ riscv_acpi_madt_add_rintc(i, arch_ids, madt_buf, s); + aml_append(dev, aml_name_decl("_MAT", + aml_buffer(madt_buf->len, + (uint8_t *)madt_buf->data))); +@@ -227,6 +261,7 @@ static void build_dsdt(GArray *table_data, + * 5.2.12 Multiple APIC Description Table (MADT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15 + * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view ++ * https://drive.google.com/file/d/1oMGPyOD58JaPgMl1pKasT-VKsIKia7zR/view + */ + static void build_madt(GArray *table_data, + BIOSLinker *linker, +@@ -246,7 +281,7 @@ static void build_madt(GArray *table_data, + + /* RISC-V Local INTC structures per HART */ + for (int i = 0; i < arch_ids->len; i++) { +- riscv_acpi_madt_add_rintc(i, arch_ids, table_data); ++ riscv_acpi_madt_add_rintc(i, arch_ids, table_data, s); + } + + acpi_table_end(linker, &table); +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-APLIC-in-the-MADT.patch b/hw-riscv-virt-acpi-build.c-Add-APLIC-in-the-MADT.patch new file mode 100644 index 0000000000000000000000000000000000000000..92cad72581367c2540b6f39ffba1b3139e91466c --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-APLIC-in-the-MADT.patch @@ -0,0 +1,76 @@ +From d9f8fe22148991711170825346b61ff3f8adfb0f Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:30 +0800 +Subject: [PATCH 07/18] hw/riscv/virt-acpi-build.c: Add APLIC in the MADT + +commit 7d189186f68b2b249c0bd6c84984f3aad2bcd1ca upstream + +Add APLIC structures for each socket in the MADT when system is configured +with APLIC as the external wired interrupt controller. + +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Andrew Jones +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-8-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 34 ++++++++++++++++++++++++++++++++++ + 1 file changed, 34 insertions(+) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 6bb21014fd..ec49c8804b 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -274,6 +274,8 @@ static void build_madt(GArray *table_data, + uint8_t guest_index_bits = imsic_num_bits(s->aia_guests + 1); + uint16_t imsic_max_hart_per_socket = 0; + uint8_t hart_index_bits; ++ uint64_t aplic_addr; ++ uint32_t gsi_base; + uint8_t socket; + + for (socket = 0; socket < riscv_socket_count(ms); socket++) { +@@ -319,6 +321,38 @@ static void build_madt(GArray *table_data, + build_append_int_noprefix(table_data, IMSIC_MMIO_GROUP_MIN_SHIFT, 1); + } + ++ if (s->aia_type != VIRT_AIA_TYPE_NONE) { ++ /* APLICs */ ++ for (socket = 0; socket < riscv_socket_count(ms); socket++) { ++ aplic_addr = s->memmap[VIRT_APLIC_S].base + ++ s->memmap[VIRT_APLIC_S].size * socket; ++ gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket; ++ build_append_int_noprefix(table_data, 0x1A, 1); /* Type */ ++ build_append_int_noprefix(table_data, 36, 1); /* Length */ ++ build_append_int_noprefix(table_data, 1, 1); /* Version */ ++ build_append_int_noprefix(table_data, socket, 1); /* APLIC ID */ ++ build_append_int_noprefix(table_data, 0, 4); /* Flags */ ++ build_append_int_noprefix(table_data, 0, 8); /* Hardware ID */ ++ /* Number of IDCs */ ++ if (s->aia_type == VIRT_AIA_TYPE_APLIC) { ++ build_append_int_noprefix(table_data, ++ s->soc[socket].num_harts, ++ 2); ++ } else { ++ build_append_int_noprefix(table_data, 0, 2); ++ } ++ /* Total External Interrupt Sources Supported */ ++ build_append_int_noprefix(table_data, VIRT_IRQCHIP_NUM_SOURCES, 2); ++ /* Global System Interrupt Base */ ++ build_append_int_noprefix(table_data, gsi_base, 4); ++ /* APLIC Address */ ++ build_append_int_noprefix(table_data, aplic_addr, 8); ++ /* APLIC size */ ++ build_append_int_noprefix(table_data, ++ s->memmap[VIRT_APLIC_S].size, 4); ++ } ++ } ++ + acpi_table_end(linker, &table); + } + +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-CMO-information-in-RH.patch b/hw-riscv-virt-acpi-build.c-Add-CMO-information-in-RH.patch new file mode 100644 index 0000000000000000000000000000000000000000..02b012919fa38126237524dc5fca81d7510d4b16 --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-CMO-information-in-RH.patch @@ -0,0 +1,131 @@ +From 99684e0a5700edb5d7ca51f255f23a54749efd1a Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:31 +0800 +Subject: [PATCH 08/18] hw/riscv/virt-acpi-build.c: Add CMO information in RHCT + +commit e810a5177c44509e17293d4c7e6cffab8ce197c9 stream + +When CMO related extensions like Zicboz, Zicbom and Zicbop are enabled, the +block size for those extensions need to be communicated via CMO node in +RHCT. Add CMO node in RHCT if any of those CMO extensions are detected. + +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Andrew Jones +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-9-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 64 +++++++++++++++++++++++++++++++++----- + 1 file changed, 56 insertions(+), 8 deletions(-) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index ec49c8804b..506d487ede 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -140,6 +140,7 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) + * 5.2.36 RISC-V Hart Capabilities Table (RHCT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16 + * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view ++ * https://drive.google.com/file/d/1sKbOa8m1UZw1JkquZYe3F1zQBN1xXsaf/view + */ + static void build_rhct(GArray *table_data, + BIOSLinker *linker, +@@ -149,8 +150,8 @@ static void build_rhct(GArray *table_data, + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + size_t len, aligned_len; +- uint32_t isa_offset, num_rhct_nodes; +- RISCVCPU *cpu; ++ uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0; ++ RISCVCPU *cpu = &s->soc[0].harts[0]; + char *isa; + + AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id, +@@ -166,6 +167,9 @@ static void build_rhct(GArray *table_data, + + /* ISA + N hart info */ + num_rhct_nodes = 1 + ms->smp.cpus; ++ if (cpu->cfg.ext_zicbom || cpu->cfg.ext_zicboz) { ++ num_rhct_nodes++; ++ } + + /* Number of RHCT nodes*/ + build_append_int_noprefix(table_data, num_rhct_nodes, 4); +@@ -177,7 +181,6 @@ static void build_rhct(GArray *table_data, + isa_offset = table_data->len - table.table_offset; + build_append_int_noprefix(table_data, 0, 2); /* Type 0 */ + +- cpu = &s->soc[0].harts[0]; + isa = riscv_isa_string(cpu); + len = 8 + strlen(isa) + 1; + aligned_len = (len % 2) ? (len + 1) : len; +@@ -193,14 +196,59 @@ static void build_rhct(GArray *table_data, + build_append_int_noprefix(table_data, 0x0, 1); /* Optional Padding */ + } + ++ /* CMO node */ ++ if (cpu->cfg.ext_zicbom || cpu->cfg.ext_zicboz) { ++ cmo_offset = table_data->len - table.table_offset; ++ build_append_int_noprefix(table_data, 1, 2); /* Type */ ++ build_append_int_noprefix(table_data, 10, 2); /* Length */ ++ build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ ++ build_append_int_noprefix(table_data, 0, 1); /* Reserved */ ++ ++ /* CBOM block size */ ++ if (cpu->cfg.cbom_blocksize) { ++ build_append_int_noprefix(table_data, ++ __builtin_ctz(cpu->cfg.cbom_blocksize), ++ 1); ++ } else { ++ build_append_int_noprefix(table_data, 0, 1); ++ } ++ ++ /* CBOP block size */ ++ build_append_int_noprefix(table_data, 0, 1); ++ ++ /* CBOZ block size */ ++ if (cpu->cfg.cboz_blocksize) { ++ build_append_int_noprefix(table_data, ++ __builtin_ctz(cpu->cfg.cboz_blocksize), ++ 1); ++ } else { ++ build_append_int_noprefix(table_data, 0, 1); ++ } ++ } ++ + /* Hart Info Node */ + for (int i = 0; i < arch_ids->len; i++) { ++ len = 16; ++ int num_offsets = 1; + build_append_int_noprefix(table_data, 0xFFFF, 2); /* Type */ +- build_append_int_noprefix(table_data, 16, 2); /* Length */ +- build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ +- build_append_int_noprefix(table_data, 1, 2); /* Number of offsets */ +- build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ +- build_append_int_noprefix(table_data, isa_offset, 4); /* Offsets[0] */ ++ ++ /* Length */ ++ if (cmo_offset) { ++ len += 4; ++ num_offsets++; ++ } ++ ++ build_append_int_noprefix(table_data, len, 2); ++ build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ ++ /* Number of offsets */ ++ build_append_int_noprefix(table_data, num_offsets, 2); ++ build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ ++ ++ /* Offsets */ ++ build_append_int_noprefix(table_data, isa_offset, 4); ++ if (cmo_offset) { ++ build_append_int_noprefix(table_data, cmo_offset, 4); ++ } + } + + acpi_table_end(linker, &table); +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-IMSIC-in-the-MADT.patch b/hw-riscv-virt-acpi-build.c-Add-IMSIC-in-the-MADT.patch new file mode 100644 index 0000000000000000000000000000000000000000..adfac94eb80f1b14fbd9f8b8d11f215ce15b681e --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-IMSIC-in-the-MADT.patch @@ -0,0 +1,76 @@ +From ff2e662b539eb2cf9a754f1c5fb2e055e67ac3f7 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:29 +0800 +Subject: [PATCH 06/18] hw/riscv/virt-acpi-build.c: Add IMSIC in the MADT + +commit 66ac45b75975a64aa3fbcaa038aecfbc11ac8547 upstream + +Add IMSIC structure in MADT when IMSIC is configured. + +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Andrew Jones +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-7-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 3f9536356e..6bb21014fd 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -270,6 +270,19 @@ static void build_madt(GArray *table_data, + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); ++ uint8_t group_index_bits = imsic_num_bits(riscv_socket_count(ms)); ++ uint8_t guest_index_bits = imsic_num_bits(s->aia_guests + 1); ++ uint16_t imsic_max_hart_per_socket = 0; ++ uint8_t hart_index_bits; ++ uint8_t socket; ++ ++ for (socket = 0; socket < riscv_socket_count(ms); socket++) { ++ if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { ++ imsic_max_hart_per_socket = s->soc[socket].num_harts; ++ } ++ } ++ ++ hart_index_bits = imsic_num_bits(imsic_max_hart_per_socket); + + AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; +@@ -284,6 +297,28 @@ static void build_madt(GArray *table_data, + riscv_acpi_madt_add_rintc(i, arch_ids, table_data, s); + } + ++ /* IMSIC */ ++ if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { ++ /* IMSIC */ ++ build_append_int_noprefix(table_data, 0x19, 1); /* Type */ ++ build_append_int_noprefix(table_data, 16, 1); /* Length */ ++ build_append_int_noprefix(table_data, 1, 1); /* Version */ ++ build_append_int_noprefix(table_data, 0, 1); /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 4); /* Flags */ ++ /* Number of supervisor mode Interrupt Identities */ ++ build_append_int_noprefix(table_data, VIRT_IRQCHIP_NUM_MSIS, 2); ++ /* Number of guest mode Interrupt Identities */ ++ build_append_int_noprefix(table_data, VIRT_IRQCHIP_NUM_MSIS, 2); ++ /* Guest Index Bits */ ++ build_append_int_noprefix(table_data, guest_index_bits, 1); ++ /* Hart Index Bits */ ++ build_append_int_noprefix(table_data, hart_index_bits, 1); ++ /* Group Index Bits */ ++ build_append_int_noprefix(table_data, group_index_bits, 1); ++ /* Group Index Shift */ ++ build_append_int_noprefix(table_data, IMSIC_MMIO_GROUP_MIN_SHIFT, 1); ++ } ++ + acpi_table_end(linker, &table); + } + +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-IO-controllers-and-de.patch b/hw-riscv-virt-acpi-build.c-Add-IO-controllers-and-de.patch new file mode 100644 index 0000000000000000000000000000000000000000..b37a4ce69e7f3bb81fc89718977dbb76c6d2e39f --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-IO-controllers-and-de.patch @@ -0,0 +1,160 @@ +From 58d2550602ca8ccf9cc4b80bb7ddd1580052eeab Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:36 +0800 +Subject: [PATCH 12/18] hw/riscv/virt-acpi-build.c: Add IO controllers and + devices + +commit 55ecd83b3697d0e4002c1dfde3265ebe6fa887cc upstream + +Add basic IO controllers and devices like PCI, VirtIO and UART in the +ACPI namespace. + +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-13-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/Kconfig | 1 + + hw/riscv/virt-acpi-build.c | 79 ++++++++++++++++++++++++++++++++++++-- + 2 files changed, 76 insertions(+), 4 deletions(-) + +diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig +index 1e11ac9432..5d644eb7b1 100644 +--- a/hw/riscv/Kconfig ++++ b/hw/riscv/Kconfig +@@ -46,6 +46,7 @@ config RISCV_VIRT + select FW_CFG_DMA + select PLATFORM_BUS + select ACPI ++ select ACPI_PCI + + config SHAKTI_C + bool +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 86c38f7c2b..4d03a27efd 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -27,15 +27,18 @@ + #include "hw/acpi/acpi-defs.h" + #include "hw/acpi/acpi.h" + #include "hw/acpi/aml-build.h" ++#include "hw/acpi/pci.h" + #include "hw/acpi/utils.h" ++#include "hw/intc/riscv_aclint.h" + #include "hw/nvram/fw_cfg_acpi.h" ++#include "hw/pci-host/gpex.h" ++#include "hw/riscv/virt.h" ++#include "hw/riscv/numa.h" ++#include "hw/virtio/virtio-acpi.h" ++#include "migration/vmstate.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/reset.h" +-#include "migration/vmstate.h" +-#include "hw/riscv/virt.h" +-#include "hw/riscv/numa.h" +-#include "hw/intc/riscv_aclint.h" + + #define ACPI_BUILD_TABLE_SIZE 0x20000 + #define ACPI_BUILD_INTC_ID(socket, index) ((socket << 24) | (index)) +@@ -132,6 +135,39 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) + } + } + ++static void ++acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, ++ uint32_t uart_irq) ++{ ++ Aml *dev = aml_device("COM0"); ++ aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501"))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(0))); ++ ++ Aml *crs = aml_resource_template(); ++ aml_append(crs, aml_memory32_fixed(uart_memmap->base, ++ uart_memmap->size, AML_READ_WRITE)); ++ aml_append(crs, ++ aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH, ++ AML_EXCLUSIVE, &uart_irq, 1)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ ++ Aml *pkg = aml_package(2); ++ aml_append(pkg, aml_string("clock-frequency")); ++ aml_append(pkg, aml_int(3686400)); ++ ++ Aml *UUID = aml_touuid("DAFFD814-6EBA-4D8C-8A91-BC9BBF4AA301"); ++ ++ Aml *pkg1 = aml_package(1); ++ aml_append(pkg1, pkg); ++ ++ Aml *package = aml_package(2); ++ aml_append(package, UUID); ++ aml_append(package, pkg1); ++ ++ aml_append(dev, aml_name_decl("_DSD", package)); ++ aml_append(scope, dev); ++} ++ + /* RHCT Node[N] starts at offset 56 */ + #define RHCT_NODE_ARRAY_OFFSET 56 + +@@ -310,6 +346,8 @@ static void build_dsdt(GArray *table_data, + RISCVVirtState *s) + { + Aml *scope, *dsdt; ++ MachineState *ms = MACHINE(s); ++ uint8_t socket_count; + const MemMapEntry *memmap = s->memmap; + AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; +@@ -329,6 +367,29 @@ static void build_dsdt(GArray *table_data, + + fw_cfg_acpi_dsdt_add(scope, &memmap[VIRT_FW_CFG]); + ++ socket_count = riscv_socket_count(ms); ++ ++ acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ); ++ ++ if (socket_count == 1) { ++ virtio_acpi_dsdt_add(scope, memmap[VIRT_VIRTIO].base, ++ memmap[VIRT_VIRTIO].size, ++ VIRTIO_IRQ, 0, VIRTIO_COUNT); ++ acpi_dsdt_add_gpex_host(scope, PCIE_IRQ); ++ } else if (socket_count == 2) { ++ virtio_acpi_dsdt_add(scope, memmap[VIRT_VIRTIO].base, ++ memmap[VIRT_VIRTIO].size, ++ VIRTIO_IRQ + VIRT_IRQCHIP_NUM_SOURCES, 0, ++ VIRTIO_COUNT); ++ acpi_dsdt_add_gpex_host(scope, PCIE_IRQ + VIRT_IRQCHIP_NUM_SOURCES); ++ } else { ++ virtio_acpi_dsdt_add(scope, memmap[VIRT_VIRTIO].base, ++ memmap[VIRT_VIRTIO].size, ++ VIRTIO_IRQ + VIRT_IRQCHIP_NUM_SOURCES, 0, ++ VIRTIO_COUNT); ++ acpi_dsdt_add_gpex_host(scope, PCIE_IRQ + VIRT_IRQCHIP_NUM_SOURCES * 2); ++ } ++ + aml_append(dsdt, scope); + + /* copy AML table into ACPI tables blob and patch header there */ +@@ -465,6 +526,16 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) + acpi_add_table(table_offsets, tables_blob); + build_rhct(tables_blob, tables->linker, s); + ++ acpi_add_table(table_offsets, tables_blob); ++ { ++ AcpiMcfgInfo mcfg = { ++ .base = s->memmap[VIRT_PCIE_MMIO].base, ++ .size = s->memmap[VIRT_PCIE_MMIO].size, ++ }; ++ build_mcfg(tables_blob, tables->linker, &mcfg, s->oem_id, ++ s->oem_table_id); ++ } ++ + /* XSDT is pointed to by RSDP */ + xsdt = tables_blob->len; + build_xsdt(tables_blob, tables->linker, table_offsets, s->oem_id, +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-MMU-node-in-RHCT.patch b/hw-riscv-virt-acpi-build.c-Add-MMU-node-in-RHCT.patch new file mode 100644 index 0000000000000000000000000000000000000000..24a1c290def58f26f117fc9cdc083a2875bf5580 --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-MMU-node-in-RHCT.patch @@ -0,0 +1,102 @@ +From 7310f0bc6612ced123be51af2fd720e84955759a Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:33 +0800 +Subject: [PATCH 09/18] hw/riscv/virt-acpi-build.c: Add MMU node in RHCT + +commit a52aea263e0f25993e368ee682d96f32aff52499 upstream + +MMU type information is available via MMU node in RHCT. Add this node in +RHCT. + +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Andrew Jones +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-10-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 506d487ede..86c38f7c2b 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -152,6 +152,8 @@ static void build_rhct(GArray *table_data, + size_t len, aligned_len; + uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0; + RISCVCPU *cpu = &s->soc[0].harts[0]; ++ uint32_t mmu_offset = 0; ++ uint8_t satp_mode_max; + char *isa; + + AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id, +@@ -171,6 +173,10 @@ static void build_rhct(GArray *table_data, + num_rhct_nodes++; + } + ++ if (cpu->cfg.satp_mode.supported != 0) { ++ num_rhct_nodes++; ++ } ++ + /* Number of RHCT nodes*/ + build_append_int_noprefix(table_data, num_rhct_nodes, 4); + +@@ -226,6 +232,26 @@ static void build_rhct(GArray *table_data, + } + } + ++ /* MMU node structure */ ++ if (cpu->cfg.satp_mode.supported != 0) { ++ satp_mode_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map); ++ mmu_offset = table_data->len - table.table_offset; ++ build_append_int_noprefix(table_data, 2, 2); /* Type */ ++ build_append_int_noprefix(table_data, 8, 2); /* Length */ ++ build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ ++ build_append_int_noprefix(table_data, 0, 1); /* Reserved */ ++ /* MMU Type */ ++ if (satp_mode_max == VM_1_10_SV57) { ++ build_append_int_noprefix(table_data, 2, 1); /* Sv57 */ ++ } else if (satp_mode_max == VM_1_10_SV48) { ++ build_append_int_noprefix(table_data, 1, 1); /* Sv48 */ ++ } else if (satp_mode_max == VM_1_10_SV39) { ++ build_append_int_noprefix(table_data, 0, 1); /* Sv39 */ ++ } else { ++ assert(1); ++ } ++ } ++ + /* Hart Info Node */ + for (int i = 0; i < arch_ids->len; i++) { + len = 16; +@@ -238,17 +264,25 @@ static void build_rhct(GArray *table_data, + num_offsets++; + } + ++ if (mmu_offset) { ++ len += 4; ++ num_offsets++; ++ } ++ + build_append_int_noprefix(table_data, len, 2); + build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ + /* Number of offsets */ + build_append_int_noprefix(table_data, num_offsets, 2); + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ +- + /* Offsets */ + build_append_int_noprefix(table_data, isa_offset, 4); + if (cmo_offset) { + build_append_int_noprefix(table_data, cmo_offset, 4); + } ++ ++ if (mmu_offset) { ++ build_append_int_noprefix(table_data, mmu_offset, 4); ++ } + } + + acpi_table_end(linker, &table); +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-PLIC-in-MADT.patch b/hw-riscv-virt-acpi-build.c-Add-PLIC-in-MADT.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c89086e2b928992fd30f654de668bfa8fd3830d --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-PLIC-in-MADT.patch @@ -0,0 +1,72 @@ +From 37a9be92a26b78341aa2ab03126590b9d9fa18b6 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:37 +0800 +Subject: [PATCH 13/18] hw/riscv/virt-acpi-build.c: Add PLIC in MADT + +commit d641da6ed431f497b763a6e6bf30e0b4dc00e0d9 upstream + +Add PLIC structures for each socket in the MADT when system is +configured with PLIC as the external interrupt controller. + +Signed-off-by: Haibo Xu +Signed-off-by: Sunil V L +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Andrew Jones +Acked-by: Alistair Francis +Acked-by: Michael S. Tsirkin +Message-ID: <20231218150247.466427-14-sunilvl@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 4d03a27efd..d4a02579d6 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -94,6 +94,12 @@ static void riscv_acpi_madt_add_rintc(uint32_t uid, + arch_ids->cpus[uid].props.node_id, + local_cpu_id), + 4); ++ } else if (s->aia_type == VIRT_AIA_TYPE_NONE) { ++ build_append_int_noprefix(entry, ++ ACPI_BUILD_INTC_ID( ++ arch_ids->cpus[uid].props.node_id, ++ 2 * local_cpu_id + 1), ++ 4); + } else { + build_append_int_noprefix(entry, 0, 4); + } +@@ -494,6 +500,29 @@ static void build_madt(GArray *table_data, + build_append_int_noprefix(table_data, + s->memmap[VIRT_APLIC_S].size, 4); + } ++ } else { ++ /* PLICs */ ++ for (socket = 0; socket < riscv_socket_count(ms); socket++) { ++ aplic_addr = s->memmap[VIRT_PLIC].base + ++ s->memmap[VIRT_PLIC].size * socket; ++ gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket; ++ build_append_int_noprefix(table_data, 0x1B, 1); /* Type */ ++ build_append_int_noprefix(table_data, 36, 1); /* Length */ ++ build_append_int_noprefix(table_data, 1, 1); /* Version */ ++ build_append_int_noprefix(table_data, socket, 1); /* PLIC ID */ ++ build_append_int_noprefix(table_data, 0, 8); /* Hardware ID */ ++ /* Total External Interrupt Sources Supported */ ++ build_append_int_noprefix(table_data, ++ VIRT_IRQCHIP_NUM_SOURCES - 1, 2); ++ build_append_int_noprefix(table_data, 0, 2); /* Max Priority */ ++ build_append_int_noprefix(table_data, 0, 4); /* Flags */ ++ /* PLIC Size */ ++ build_append_int_noprefix(table_data, s->memmap[VIRT_PLIC].size, 4); ++ /* PLIC Address */ ++ build_append_int_noprefix(table_data, aplic_addr, 8); ++ /* Global System Interrupt Vector Base */ ++ build_append_int_noprefix(table_data, gsi_base, 4); ++ } + } + + acpi_table_end(linker, &table); +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-SRAT-and-SLIT-ACPI-ta.patch b/hw-riscv-virt-acpi-build.c-Add-SRAT-and-SLIT-ACPI-ta.patch new file mode 100644 index 0000000000000000000000000000000000000000..bec635f59ddfafe6f7ab33ba816a582ad4e0f285 --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-SRAT-and-SLIT-ACPI-ta.patch @@ -0,0 +1,112 @@ +From 6d52123c9c8a0c6e9d7560f9c4e4b641e568e0c6 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:42 +0800 +Subject: [PATCH 18/18] hw/riscv/virt-acpi-build.c: Add SRAT and SLIT ACPI + tables + +commit a29f5b957644dd0f14a43c8719b18c134875195c upstream + +Enable ACPI NUMA support by adding the following 2 ACPI tables: +SRAT: provides the association for memory/Harts and Proximity Domains +SLIT: provides the relative distance between Proximity Domains + +The SRAT RINTC Affinity Structure definition[1] was based on the recently +approved ACPI CodeFirst ECR[2]. + +[1] https://github.com/riscv-non-isa/riscv-acpi/issues/25 +[2] https://mantis.uefi.org/mantis/view.php?id=2433 + +Signed-off-by: Haibo Xu +Reviewed-by: Andrew Jones +Message-ID: <20240129094200.3581037-1-haibo1.xu@intel.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 60 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 60 insertions(+) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index d69c25d2c9..69e9646683 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -596,11 +596,61 @@ static void build_madt(GArray *table_data, + acpi_table_end(linker, &table); + } + ++/* ++ * ACPI spec, Revision 6.5+ ++ * 5.2.16 System Resource Affinity Table (SRAT) ++ * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/25 ++ * https://drive.google.com/file/d/1YTdDx2IPm5IeZjAW932EYU-tUtgS08tX/view ++ */ ++static void ++build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms) ++{ ++ int i; ++ uint64_t mem_base; ++ MachineClass *mc = MACHINE_GET_CLASS(vms); ++ MachineState *ms = MACHINE(vms); ++ const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(ms); ++ AcpiTable table = { .sig = "SRAT", .rev = 3, .oem_id = vms->oem_id, ++ .oem_table_id = vms->oem_table_id }; ++ ++ acpi_table_begin(&table, table_data); ++ build_append_int_noprefix(table_data, 1, 4); /* Reserved */ ++ build_append_int_noprefix(table_data, 0, 8); /* Reserved */ ++ ++ for (i = 0; i < cpu_list->len; ++i) { ++ uint32_t nodeid = cpu_list->cpus[i].props.node_id; ++ /* ++ * 5.2.16.8 RINTC Affinity Structure ++ */ ++ build_append_int_noprefix(table_data, 7, 1); /* Type */ ++ build_append_int_noprefix(table_data, 20, 1); /* Length */ ++ build_append_int_noprefix(table_data, 0, 2); /* Reserved */ ++ build_append_int_noprefix(table_data, nodeid, 4); /* Proximity Domain */ ++ build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ ++ /* Flags, Table 5-70 */ ++ build_append_int_noprefix(table_data, 1 /* Flags: Enabled */, 4); ++ build_append_int_noprefix(table_data, 0, 4); /* Clock Domain */ ++ } ++ ++ mem_base = vms->memmap[VIRT_DRAM].base; ++ for (i = 0; i < ms->numa_state->num_nodes; ++i) { ++ if (ms->numa_state->nodes[i].node_mem > 0) { ++ build_srat_memory(table_data, mem_base, ++ ms->numa_state->nodes[i].node_mem, i, ++ MEM_AFFINITY_ENABLED); ++ mem_base += ms->numa_state->nodes[i].node_mem; ++ } ++ } ++ ++ acpi_table_end(linker, &table); ++} ++ + static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) + { + GArray *table_offsets; + unsigned dsdt, xsdt; + GArray *tables_blob = tables->table_data; ++ MachineState *ms = MACHINE(s); + + table_offsets = g_array_new(false, true, + sizeof(uint32_t)); +@@ -636,6 +686,16 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) + s->oem_table_id); + } + ++ if (ms->numa_state->num_nodes > 0) { ++ acpi_add_table(table_offsets, tables_blob); ++ build_srat(tables_blob, tables->linker, s); ++ if (ms->numa_state->have_numa_distance) { ++ acpi_add_table(table_offsets, tables_blob); ++ build_slit(tables_blob, tables->linker, ms, s->oem_id, ++ s->oem_table_id); ++ } ++ } ++ + /* XSDT is pointed to by RSDP */ + xsdt = tables_blob->len; + build_xsdt(tables_blob, tables->linker, table_offsets, s->oem_id, +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Add-namespace-devices-for.patch b/hw-riscv-virt-acpi-build.c-Add-namespace-devices-for.patch new file mode 100644 index 0000000000000000000000000000000000000000..231a6ae5f3945428582bf54f74474d247750840a --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Add-namespace-devices-for.patch @@ -0,0 +1,78 @@ +From 27190f7b6b48ba4684e9f352d0adda63bca814fe Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:39 +0800 +Subject: [PATCH 15/18] hw/riscv/virt-acpi-build.c: Add namespace devices for + PLIC and APLIC + +commit a54dd0cd6b9119c44d52547f51a529122f0ec1f1 upstream + +As per the requirement ACPI_080 in the RISC-V Boot and Runtime Services +(BRS) specification [1], PLIC and APLIC should be in namespace as well. +So, add them using the defined HID. + +[1] - https://github.com/riscv-non-isa/riscv-brs/releases/download/v0.0.2/riscv-brs-spec.pdf + (Chapter 6) + +Signed-off-by: Sunil V L +Acked-by: Alistair Francis +Acked-by: Igor Mammedov +Message-Id: <20240716144306.2432257-2-sunilvl@ventanamicro.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/riscv/virt-acpi-build.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index d4a02579d6..2189579d53 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -141,6 +141,30 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) + } + } + ++static void acpi_dsdt_add_plic_aplic(Aml *scope, uint8_t socket_count, ++ uint64_t mmio_base, uint64_t mmio_size, ++ const char *hid) ++{ ++ uint64_t plic_aplic_addr; ++ uint32_t gsi_base; ++ uint8_t socket; ++ ++ for (socket = 0; socket < socket_count; socket++) { ++ plic_aplic_addr = mmio_base + mmio_size * socket; ++ gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket; ++ Aml *dev = aml_device("IC%.02X", socket); ++ aml_append(dev, aml_name_decl("_HID", aml_string("%s", hid))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(socket))); ++ aml_append(dev, aml_name_decl("_GSB", aml_int(gsi_base))); ++ ++ Aml *crs = aml_resource_template(); ++ aml_append(crs, aml_memory32_fixed(plic_aplic_addr, mmio_size, ++ AML_READ_WRITE)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ aml_append(scope, dev); ++ } ++} ++ + static void + acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, + uint32_t uart_irq) +@@ -375,6 +399,14 @@ static void build_dsdt(GArray *table_data, + + socket_count = riscv_socket_count(ms); + ++ if (s->aia_type == VIRT_AIA_TYPE_NONE) { ++ acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_PLIC].base, ++ memmap[VIRT_PLIC].size, "RSCV0001"); ++ } else { ++ acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_APLIC_S].base, ++ memmap[VIRT_APLIC_S].size, "RSCV0002"); ++ } ++ + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ); + + if (socket_count == 1) { +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Generate-SPCR-table.patch b/hw-riscv-virt-acpi-build.c-Generate-SPCR-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..d29531b4dfd11a18f1f3f8b8f82f516f82773aee --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Generate-SPCR-table.patch @@ -0,0 +1,78 @@ +From 612c2ba1c1b0377318bf9a8ed421a505bafc3311 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:41 +0800 +Subject: [PATCH 17/18] hw/riscv/virt-acpi-build.c: Generate SPCR table + +commit 3e6f1e61b4bc0facd13967580feed47d96a2c28c upstream + +Generate Serial Port Console Redirection Table (SPCR) for RISC-V +virtual machine. + +Signed-off-by: Sia Jee Heng +Reviewed-by: Daniel Henrique Barboza +Message-ID: <20240129021440.17640-3-jeeheng.sia@starfivetech.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt-acpi-build.c | 39 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 6230ab02c6..d69c25d2c9 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -198,6 +198,42 @@ acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, + aml_append(scope, dev); + } + ++/* ++ * Serial Port Console Redirection Table (SPCR) ++ * Rev: 1.07 ++ */ ++ ++static void ++spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s) ++{ ++ AcpiSpcrData serial = { ++ .interface_type = 0, /* 16550 compatible */ ++ .base_addr.id = AML_AS_SYSTEM_MEMORY, ++ .base_addr.width = 32, ++ .base_addr.offset = 0, ++ .base_addr.size = 1, ++ .base_addr.addr = s->memmap[VIRT_UART0].base, ++ .interrupt_type = (1 << 4),/* Bit[4] RISC-V PLIC/APLIC */ ++ .pc_interrupt = 0, ++ .interrupt = UART0_IRQ, ++ .baud_rate = 7, /* 15200 */ ++ .parity = 0, ++ .stop_bits = 1, ++ .flow_control = 0, ++ .terminal_type = 3, /* ANSI */ ++ .language = 0, /* Language */ ++ .pci_device_id = 0xffff, /* not a PCI device*/ ++ .pci_vendor_id = 0xffff, /* not a PCI device*/ ++ .pci_bus = 0, ++ .pci_device = 0, ++ .pci_function = 0, ++ .pci_flags = 0, ++ .pci_segment = 0, ++ }; ++ ++ build_spcr(table_data, linker, &serial, 2, s->oem_id, s->oem_table_id); ++} ++ + /* RHCT Node[N] starts at offset 56 */ + #define RHCT_NODE_ARRAY_OFFSET 56 + +@@ -587,6 +623,9 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) + acpi_add_table(table_offsets, tables_blob); + build_rhct(tables_blob, tables->linker, s); + ++ acpi_add_table(table_offsets, tables_blob); ++ spcr_setup(tables_blob, tables->linker, s); ++ + acpi_add_table(table_offsets, tables_blob); + { + AcpiMcfgInfo mcfg = { +-- +2.33.0 + diff --git a/hw-riscv-virt-acpi-build.c-Update-the-HID-of-RISC-V-.patch b/hw-riscv-virt-acpi-build.c-Update-the-HID-of-RISC-V-.patch new file mode 100644 index 0000000000000000000000000000000000000000..460d7fca03c5918dd55701b6f620f25ee50b77df --- /dev/null +++ b/hw-riscv-virt-acpi-build.c-Update-the-HID-of-RISC-V-.patch @@ -0,0 +1,41 @@ +From 0fb0bf77d7a956e36975c8cfcac83c2e972a5fe3 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:40 +0800 +Subject: [PATCH 16/18] hw/riscv/virt-acpi-build.c: Update the HID of RISC-V + UART + +commit faacd2e6b6a85a5eee2472e5a7f50bf69c4ad44a upstream + +The requirement ACPI_060 in the RISC-V BRS specification [1], requires +NS16550 compatible UART to have the HID RSCV0003. So, update the HID for +the UART. + +[1] - https://github.com/riscv-non-isa/riscv-brs/releases/download/v0.0.2/riscv-brs-spec.pdf + (Chapter 6) + +Signed-off-by: Sunil V L +Acked-by: Alistair Francis +Reviewed-by: Igor Mammedov +Message-Id: <20240716144306.2432257-3-sunilvl@ventanamicro.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/riscv/virt-acpi-build.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c +index 2189579d53..6230ab02c6 100644 +--- a/hw/riscv/virt-acpi-build.c ++++ b/hw/riscv/virt-acpi-build.c +@@ -170,7 +170,7 @@ acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, + uint32_t uart_irq) + { + Aml *dev = aml_device("COM0"); +- aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501"))); ++ aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0003"))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); + + Aml *crs = aml_resource_template(); +-- +2.33.0 + diff --git a/hw-riscv-virt.c-fix-the-interrupts-extended-property.patch b/hw-riscv-virt.c-fix-the-interrupts-extended-property.patch new file mode 100644 index 0000000000000000000000000000000000000000..51020157d8dda430b2567a03b54645ee8f2efd5d --- /dev/null +++ b/hw-riscv-virt.c-fix-the-interrupts-extended-property.patch @@ -0,0 +1,92 @@ +From 127e3dfe8ed67aa0f763e03c5424e6f23b8aafb3 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Mon, 4 Aug 2025 20:59:38 +0800 +Subject: [PATCH 14/18] hw/riscv/virt.c: fix the interrupts-extended property + format of PLIC + +commit ca334e10dcd1f0f3a3c08f8dc3f9945d574d0e6b upstream + +The interrupts-extended property of PLIC only has 2 * hart number +fields when KVM enabled, copy 4 * hart number fields to fdt will +expose some uninitialized value. + +In this patch, I also refactor the code about the setting of +interrupts-extended property of PLIC for improved readability. + +Signed-off-by: Yong-Xuan Wang +Reviewed-by: Jim Shu +Reviewed-by: Daniel Henrique Barboza +Message-ID: <20231218090543.22353-1-yongxuan.wang@sifive.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/virt.c | 47 +++++++++++++++++++++++++++-------------------- + 1 file changed, 27 insertions(+), 20 deletions(-) + +diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c +index c47df46bfc..8f01ec7f61 100644 +--- a/hw/riscv/virt.c ++++ b/hw/riscv/virt.c +@@ -438,24 +438,6 @@ static void create_fdt_socket_plic(RISCVVirtState *s, + "sifive,plic-1.0.0", "riscv,plic0" + }; + +- if (kvm_enabled()) { +- plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); +- } else { +- plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); +- } +- +- for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { +- if (kvm_enabled()) { +- plic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); +- plic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); +- } else { +- plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]); +- plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT); +- plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]); +- plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT); +- } +- } +- + plic_phandles[socket] = (*phandle)++; + plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket); + plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); +@@ -468,8 +450,33 @@ static void create_fdt_socket_plic(RISCVVirtState *s, + (char **)&plic_compat, + ARRAY_SIZE(plic_compat)); + qemu_fdt_setprop(ms->fdt, plic_name, "interrupt-controller", NULL, 0); +- qemu_fdt_setprop(ms->fdt, plic_name, "interrupts-extended", +- plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); ++ ++ if (kvm_enabled()) { ++ plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); ++ ++ for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { ++ plic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); ++ plic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); ++ } ++ ++ qemu_fdt_setprop(ms->fdt, plic_name, "interrupts-extended", ++ plic_cells, ++ s->soc[socket].num_harts * sizeof(uint32_t) * 2); ++ } else { ++ plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); ++ ++ for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { ++ plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]); ++ plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT); ++ plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]); ++ plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT); ++ } ++ ++ qemu_fdt_setprop(ms->fdt, plic_name, "interrupts-extended", ++ plic_cells, ++ s->soc[socket].num_harts * sizeof(uint32_t) * 4); ++ } ++ + qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg", + 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size); + qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev", +-- +2.33.0 + diff --git a/hw-rtc-Fixed-loongson-rtc-emulation-errors.patch b/hw-rtc-Fixed-loongson-rtc-emulation-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a759ac44790e0908326fee49402a13827a6de53 --- /dev/null +++ b/hw-rtc-Fixed-loongson-rtc-emulation-errors.patch @@ -0,0 +1,137 @@ +From 4044284b230182cbaeb401bdb1b65dcbd11c7550 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 7 Apr 2025 18:59:42 +0800 +Subject: [PATCH] hw/rtc: Fixed loongson rtc emulation errors + +The expire time is sent to the timer only +when the expire Time is greater than 0 or +greater than now. Otherwise, the timer +will trigger interruption continuously. + +Timer interrupts are sent using pulse functions. + +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 9 +++++++-- + hw/rtc/ls7a_rtc.c | 22 +++++++++++++--------- + 2 files changed, 20 insertions(+), 11 deletions(-) + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 0c24e632bb..ce026a4c3c 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -51,6 +51,11 @@ + #include "qemu/error-report.h" + #include "qemu/guest-random.h" + ++#define FDT_IRQ_FLAGS_EDGE_LO_HI 1 ++#define FDT_IRQ_FLAGS_EDGE_HI_LO 2 ++#define FDT_IRQ_FLAGS_LEVEL_HI 4 ++#define FDT_IRQ_FLAGS_LEVEL_LO 8 ++ + static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms) + { + if (lvms->veiointc == ON_OFF_AUTO_OFF) { +@@ -275,7 +280,7 @@ static void fdt_add_rtc_node(LoongArchVirtMachineState *lvms, + "loongson,ls7a-rtc"); + qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 2, size); + qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", +- VIRT_RTC_IRQ - VIRT_GSI_BASE , 0x4); ++ VIRT_RTC_IRQ - VIRT_GSI_BASE , FDT_IRQ_FLAGS_EDGE_LO_HI); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + *pch_pic_phandle); + g_free(nodename); +@@ -334,7 +339,7 @@ static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, + qemu_fdt_setprop_cell(ms->fdt, nodename, "clock-frequency", 100000000); + if (chosen) + qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); +- qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, 0x4); ++ qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, FDT_IRQ_FLAGS_LEVEL_HI); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + *pch_pic_phandle); + g_free(nodename); +diff --git a/hw/rtc/ls7a_rtc.c b/hw/rtc/ls7a_rtc.c +index 1f9e38a735..be9546c850 100644 +--- a/hw/rtc/ls7a_rtc.c ++++ b/hw/rtc/ls7a_rtc.c +@@ -145,20 +145,22 @@ static void toymatch_write(LS7ARtcState *s, uint64_t val, int num) + now = qemu_clock_get_ms(rtc_clock); + toymatch_val_to_time(s, val, &tm); + expire_time = now + (qemu_timedate_diff(&tm) - s->offset_toy) * 1000; +- timer_mod(s->toy_timer[num], expire_time); ++ if (expire_time > now) ++ timer_mod(s->toy_timer[num], expire_time); + } + } + + static void rtcmatch_write(LS7ARtcState *s, uint64_t val, int num) + { +- uint64_t expire_ns; ++ int64_t expire_ns; + + /* it do not support write when toy disabled */ + if (rtc_enabled(s)) { + s->rtcmatch[num] = val; + /* calculate expire time */ + expire_ns = ticks_to_ns(val) - ticks_to_ns(s->offset_rtc); +- timer_mod_ns(s->rtc_timer[num], expire_ns); ++ if (expire_ns > 0) ++ timer_mod_ns(s->rtc_timer[num], expire_ns); + } + } + +@@ -185,7 +187,7 @@ static void ls7a_rtc_stop(LS7ARtcState *s) + static void ls7a_toy_start(LS7ARtcState *s) + { + int i; +- uint64_t expire_time, now; ++ int64_t expire_time, now; + struct tm tm = {}; + + now = qemu_clock_get_ms(rtc_clock); +@@ -194,19 +196,21 @@ static void ls7a_toy_start(LS7ARtcState *s) + for (i = 0; i < TIMER_NUMS; i++) { + toymatch_val_to_time(s, s->toymatch[i], &tm); + expire_time = now + (qemu_timedate_diff(&tm) - s->offset_toy) * 1000; +- timer_mod(s->toy_timer[i], expire_time); ++ if (expire_time > now) ++ timer_mod(s->toy_timer[i], expire_time); + } + } + + static void ls7a_rtc_start(LS7ARtcState *s) + { + int i; +- uint64_t expire_time; ++ int64_t expire_time; + + /* recalculate expire time and enable timer */ + for (i = 0; i < TIMER_NUMS; i++) { + expire_time = ticks_to_ns(s->rtcmatch[i]) - ticks_to_ns(s->offset_rtc); +- timer_mod_ns(s->rtc_timer[i], expire_time); ++ if (expire_time > 0) ++ timer_mod_ns(s->rtc_timer[i], expire_time); + } + } + +@@ -370,7 +374,7 @@ static void toy_timer_cb(void *opaque) + LS7ARtcState *s = opaque; + + if (toy_enabled(s)) { +- qemu_irq_raise(s->irq); ++ qemu_irq_pulse(s->irq); + } + } + +@@ -379,7 +383,7 @@ static void rtc_timer_cb(void *opaque) + LS7ARtcState *s = opaque; + + if (rtc_enabled(s)) { +- qemu_irq_raise(s->irq); ++ qemu_irq_pulse(s->irq); + } + } + +-- +2.41.0.windows.1 + diff --git a/hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch b/hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch deleted file mode 100644 index 12c907453efdaa1141217b3adccf27d4099ee924..0000000000000000000000000000000000000000 --- a/hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 5ec15fabe78e385a81e44c7944cd05309de7f36e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Jun 2020 09:26:29 +0200 -Subject: [PATCH 7/9] hw/scsi/megasas: Fix possible out-of-bounds array access - in tracepoints -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Some tracepoints in megasas.c use a guest-controlled value as an index -into the mfi_frame_desc[] array. Thus a malicious guest could cause an -out-of-bounds error here. Fortunately, the impact is very low since this -can only happen when the corresponding tracepoints have been enabled -before, but the problem should be fixed anyway with a proper check. - -Buglink: https://bugs.launchpad.net/qemu/+bug/1882065 -Signed-off-by: Thomas Huth -Message-Id: <20200615072629.32321-1-thuth@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Paolo Bonzini ---- - hw/scsi/megasas.c | 36 +++++++++++++++++++++++------------- - 1 file changed, 23 insertions(+), 13 deletions(-) - -diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c -index 94469e8169..9421f4d14e 100644 ---- a/hw/scsi/megasas.c -+++ b/hw/scsi/megasas.c -@@ -53,10 +53,6 @@ - #define MEGASAS_FLAG_USE_QUEUE64 1 - #define MEGASAS_MASK_USE_QUEUE64 (1 << MEGASAS_FLAG_USE_QUEUE64) - --static const char *mfi_frame_desc[] = { -- "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI", -- "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop"}; -- - typedef struct MegasasCmd { - uint32_t index; - uint16_t flags; -@@ -182,6 +178,20 @@ static void megasas_frame_set_scsi_status(MegasasState *s, - stb_pci_dma(pci, frame + offsetof(struct mfi_frame_header, scsi_status), v); - } - -+static inline const char *mfi_frame_desc(unsigned int cmd) -+{ -+ static const char *mfi_frame_descs[] = { -+ "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI", -+ "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop" -+ }; -+ -+ if (cmd < ARRAY_SIZE(mfi_frame_descs)) { -+ return mfi_frame_descs[cmd]; -+ } -+ -+ return "Unknown"; -+} -+ - /* - * Context is considered opaque, but the HBA firmware is running - * in little endian mode. So convert it to little endian, too. -@@ -1669,25 +1679,25 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, - if (is_logical) { - if (target_id >= MFI_MAX_LD || lun_id != 0) { - trace_megasas_scsi_target_not_present( -- mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); -+ mfi_frame_desc(frame_cmd), is_logical, target_id, lun_id); - return MFI_STAT_DEVICE_NOT_FOUND; - } - } - sdev = scsi_device_find(&s->bus, 0, target_id, lun_id); - - cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len); -- trace_megasas_handle_scsi(mfi_frame_desc[frame_cmd], is_logical, -+ trace_megasas_handle_scsi(mfi_frame_desc(frame_cmd), is_logical, - target_id, lun_id, sdev, cmd->iov_size); - - if (!sdev || (megasas_is_jbod(s) && is_logical)) { - trace_megasas_scsi_target_not_present( -- mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); -+ mfi_frame_desc(frame_cmd), is_logical, target_id, lun_id); - return MFI_STAT_DEVICE_NOT_FOUND; - } - - if (cdb_len > 16) { - trace_megasas_scsi_invalid_cdb_len( -- mfi_frame_desc[frame_cmd], is_logical, -+ mfi_frame_desc(frame_cmd), is_logical, - target_id, lun_id, cdb_len); - megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); - cmd->frame->header.scsi_status = CHECK_CONDITION; -@@ -1705,7 +1715,7 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, - cmd->req = scsi_req_new(sdev, cmd->index, lun_id, cdb, cmd); - if (!cmd->req) { - trace_megasas_scsi_req_alloc_failed( -- mfi_frame_desc[frame_cmd], target_id, lun_id); -+ mfi_frame_desc(frame_cmd), target_id, lun_id); - megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); - cmd->frame->header.scsi_status = BUSY; - s->event_count++; -@@ -1750,17 +1760,17 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) - } - - trace_megasas_handle_io(cmd->index, -- mfi_frame_desc[frame_cmd], target_id, lun_id, -+ mfi_frame_desc(frame_cmd), target_id, lun_id, - (unsigned long)lba_start, (unsigned long)lba_count); - if (!sdev) { - trace_megasas_io_target_not_present(cmd->index, -- mfi_frame_desc[frame_cmd], target_id, lun_id); -+ mfi_frame_desc(frame_cmd), target_id, lun_id); - return MFI_STAT_DEVICE_NOT_FOUND; - } - - if (cdb_len > 16) { - trace_megasas_scsi_invalid_cdb_len( -- mfi_frame_desc[frame_cmd], 1, target_id, lun_id, cdb_len); -+ mfi_frame_desc(frame_cmd), 1, target_id, lun_id, cdb_len); - megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); - cmd->frame->header.scsi_status = CHECK_CONDITION; - s->event_count++; -@@ -1780,7 +1790,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) - lun_id, cdb, cmd); - if (!cmd->req) { - trace_megasas_scsi_req_alloc_failed( -- mfi_frame_desc[frame_cmd], target_id, lun_id); -+ mfi_frame_desc(frame_cmd), target_id, lun_id); - megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); - cmd->frame->header.scsi_status = BUSY; - s->event_count++; --- -2.25.1 - diff --git a/hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch b/hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f9b7abaabd8c367e22e6ba41a29a0c0cc8f9fc1 --- /dev/null +++ b/hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch @@ -0,0 +1,48 @@ +From a57cbe41cd8b2d8bc31eac33ee74a3ac058d67dd Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 28 Mar 2024 15:24:25 +0800 +Subject: [PATCH] hw/scsi/scsi-generic: Fix io_timeout property not applying +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 7c7a9f578e4fb1adff7ac8d9acaaaedb87474e76 + +The io_timeout property, introduced in c9b6609 (part of 6.0) is +silently overwritten by the hardcoded default value of 30 seconds +(DEFAULT_IO_TIMEOUT) in scsi_generic_realize because that function is +being called after the properties have already been applied. + +The property definition already has a default value which is applied +correctly when no value is explicitly set, so we can just remove the +code which overrides the io_timeout completely. + +This has been tested by stracing SG_IO operations with the io_timeout +property set and unset and now sets the timeout field in the ioctl +request to the proper value. + +Fixes: c9b6609b69facad ("scsi: make io_timeout configurable") +Signed-off-by: Lorenz Brun +Message-ID: <20240315145831.2531695-1-lorenz@brun.one> +Reviewed-by: Alex Bennée +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + hw/scsi/scsi-generic.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 22efcd09a6..12fdd8e748 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -782,7 +782,6 @@ static void scsi_generic_realize(SCSIDevice *s, Error **errp) + + /* Only used by scsi-block, but initialize it nevertheless to be clean. */ + s->default_scsi_version = -1; +- s->io_timeout = DEFAULT_IO_TIMEOUT; + scsi_generic_read_device_inquiry(s); + } + +-- +2.27.0 + diff --git a/hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch b/hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch new file mode 100644 index 0000000000000000000000000000000000000000..19a5a6a7f9d663b893b0c03c327892b14da03749 --- /dev/null +++ b/hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch @@ -0,0 +1,135 @@ +From b628859b936c6d6348d2af9e6b6d2887c697b9b7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Tue, 9 Apr 2024 16:19:27 +0200 +Subject: [PATCH] hw/sd/sdhci: Do not update TRNMOD when Command Inhibit (DAT) + is set(CVE-2024-3447) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Per "SD Host Controller Standard Specification Version 3.00": + + * 2.2.5 Transfer Mode Register (Offset 00Ch) + + Writes to this register shall be ignored when the Command + Inhibit (DAT) in the Present State register is 1. + +Do not update the TRNMOD register when Command Inhibit (DAT) +bit is set to avoid the present-status register going out of +sync, leading to malicious guest using DMA mode and overflowing +the FIFO buffer: + + $ cat << EOF | qemu-system-i386 \ + -display none -nographic -nodefaults \ + -machine accel=qtest -m 512M \ + -device sdhci-pci,sd-spec-version=3 \ + -device sd-card,drive=mydrive \ + -drive if=none,index=0,file=null-co://,format=raw,id=mydrive \ + -qtest stdio + outl 0xcf8 0x80001013 + outl 0xcfc 0x91 + outl 0xcf8 0x80001001 + outl 0xcfc 0x06000000 + write 0x9100002c 0x1 0x05 + write 0x91000058 0x1 0x16 + write 0x91000005 0x1 0x04 + write 0x91000028 0x1 0x08 + write 0x16 0x1 0x21 + write 0x19 0x1 0x20 + write 0x9100000c 0x1 0x01 + write 0x9100000e 0x1 0x20 + write 0x9100000f 0x1 0x00 + write 0x9100000c 0x1 0x00 + write 0x91000020 0x1 0x00 + EOF + +Stack trace (part): +================================================================= +==89993==ERROR: AddressSanitizer: heap-buffer-overflow on address +0x615000029900 at pc 0x55d5f885700d bp 0x7ffc1e1e9470 sp 0x7ffc1e1e9468 +WRITE of size 1 at 0x615000029900 thread T0 + #0 0x55d5f885700c in sdhci_write_dataport hw/sd/sdhci.c:564:39 + #1 0x55d5f8849150 in sdhci_write hw/sd/sdhci.c:1223:13 + #2 0x55d5fa01db63 in memory_region_write_accessor system/memory.c:497:5 + #3 0x55d5fa01d245 in access_with_adjusted_size system/memory.c:573:18 + #4 0x55d5fa01b1a9 in memory_region_dispatch_write system/memory.c:1521:16 + #5 0x55d5fa09f5c9 in flatview_write_continue system/physmem.c:2711:23 + #6 0x55d5fa08f78b in flatview_write system/physmem.c:2753:12 + #7 0x55d5fa08f258 in address_space_write system/physmem.c:2860:18 + ... +0x615000029900 is located 0 bytes to the right of 512-byte region +[0x615000029700,0x615000029900) allocated by thread T0 here: + #0 0x55d5f7237b27 in __interceptor_calloc + #1 0x7f9e36dd4c50 in g_malloc0 + #2 0x55d5f88672f7 in sdhci_pci_realize hw/sd/sdhci-pci.c:36:5 + #3 0x55d5f844b582 in pci_qdev_realize hw/pci/pci.c:2092:9 + #4 0x55d5fa2ee74b in device_set_realized hw/core/qdev.c:510:13 + #5 0x55d5fa325bfb in property_set_bool qom/object.c:2358:5 + #6 0x55d5fa31ea45 in object_property_set qom/object.c:1472:5 + #7 0x55d5fa332509 in object_property_set_qobject om/qom-qobject.c:28:10 + #8 0x55d5fa31f6ed in object_property_set_bool qom/object.c:1541:15 + #9 0x55d5fa2e2948 in qdev_realize hw/core/qdev.c:292:12 + #10 0x55d5f8eed3f1 in qdev_device_add_from_qdict system/qdev-monitor.c:719:10 + #11 0x55d5f8eef7ff in qdev_device_add system/qdev-monitor.c:738:11 + #12 0x55d5f8f211f0 in device_init_func system/vl.c:1200:11 + #13 0x55d5fad0877d in qemu_opts_foreach util/qemu-option.c:1135:14 + #14 0x55d5f8f0df9c in qemu_create_cli_devices system/vl.c:2638:5 + #15 0x55d5f8f0db24 in qmp_x_exit_preconfig system/vl.c:2706:5 + #16 0x55d5f8f14dc0 in qemu_init system/vl.c:3737:9 + ... +SUMMARY: AddressSanitizer: heap-buffer-overflow hw/sd/sdhci.c:564:39 +in sdhci_write_dataport + +Add assertions to ensure the fifo_buffer[] is not overflowed by +malicious accesses to the Buffer Data Port register. + +Fixes: CVE-2024-3447 +Cc: qemu-stable@nongnu.org +Fixes: d7dfca0807 ("hw/sdhci: introduce standard SD host controller") +Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58813 +Reported-by: Alexander Bulekov +Reported-by: Chuhong Yuan +Signed-off-by: Peter Maydell +Message-Id: +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20240409145524.27913-1-philmd@linaro.org> +--- + hw/sd/sdhci.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c +index 40473b0db0..e95ea34895 100644 +--- a/hw/sd/sdhci.c ++++ b/hw/sd/sdhci.c +@@ -473,6 +473,7 @@ static uint32_t sdhci_read_dataport(SDHCIState *s, unsigned size) + } + + for (i = 0; i < size; i++) { ++ assert(s->data_count < s->buf_maxsz); + value |= s->fifo_buffer[s->data_count] << i * 8; + s->data_count++; + /* check if we've read all valid data (blksize bytes) from buffer */ +@@ -561,6 +562,7 @@ static void sdhci_write_dataport(SDHCIState *s, uint32_t value, unsigned size) + } + + for (i = 0; i < size; i++) { ++ assert(s->data_count < s->buf_maxsz); + s->fifo_buffer[s->data_count] = value & 0xFF; + s->data_count++; + value >>= 8; +@@ -1208,6 +1210,12 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) + if (!(s->capareg & R_SDHC_CAPAB_SDMA_MASK)) { + value &= ~SDHC_TRNS_DMA; + } ++ ++ /* TRNMOD writes are inhibited while Command Inhibit (DAT) is true */ ++ if (s->prnsts & SDHC_DATA_INHIBIT) { ++ mask |= 0xffff; ++ } ++ + MASKED_WRITE(s->trnmod, mask, value & SDHC_TRNMOD_MASK); + MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16); + +-- +2.27.0 + diff --git a/hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch b/hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch deleted file mode 100644 index 42df9650a9e378fbb0d96afbd5b8a844c8ed64c7..0000000000000000000000000000000000000000 --- a/hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 8b8d3992db22a583b69b6e2ae1d9cd87e2179e21 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Fri, 18 Sep 2020 10:55:22 +0800 -Subject: [PATCH] hw/sd/sdhci: Fix DMA Transfer Block Size field The 'Transfer - Block Size' field is 12-bit wide. See section '2.2.2 Block Size Register - (Offset 004h)' in datasheet. - -Buglink: https://bugs.launchpad.net/qemu/+bug/1892960 - -diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c -index 7b80b1d9..acf482b8 100644 ---- a/hw/sd/sdhci.c -+++ b/hw/sd/sdhci.c -@@ -1127,7 +1127,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) - break; - case SDHC_BLKSIZE: - if (!TRANSFERRING_DATA(s->prnsts)) { -- MASKED_WRITE(s->blksize, mask, value); -+ MASKED_WRITE(s->blksize, mask, extract32(value, 0, 12)); - MASKED_WRITE(s->blkcnt, mask >> 16, value >> 16); - } - --- -2.23.0 - diff --git a/hw-sd-sdhci-free-irq-on-exit.patch b/hw-sd-sdhci-free-irq-on-exit.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f8a611833f310757dd587d23e79c56652a35d87 --- /dev/null +++ b/hw-sd-sdhci-free-irq-on-exit.patch @@ -0,0 +1,46 @@ +From 3746a434596b9bc20994c869c79fb9db24227418 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 7 Apr 2025 13:56:18 -0400 +Subject: [PATCH] hw/sd/sdhci: free irq on exit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 1c2d03bb0889b7a9a677d53126fb035190683af4 + +Fix a memory leak bug in sdhci_pci_realize() due to s->irq +not being freed in sdhci_pci_exit(). + +Signed-off-by: Zheng Huang +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <09ddf42b-a6db-42d5-954b-148d09d8d6cc@gmail.com> +[PMD: Moved qemu_free_irq() call before sdhci_common_unrealize()] +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + hw/sd/sdhci-pci.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/sd/sdhci-pci.c b/hw/sd/sdhci-pci.c +index 9b7bee8b3f..c1eb67cf29 100644 +--- a/hw/sd/sdhci-pci.c ++++ b/hw/sd/sdhci-pci.c +@@ -18,6 +18,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "qemu/module.h" ++#include "hw/irq.h" + #include "hw/qdev-properties.h" + #include "hw/sd/sdhci.h" + #include "sdhci-internal.h" +@@ -49,6 +50,7 @@ static void sdhci_pci_exit(PCIDevice *dev) + { + SDHCIState *s = PCI_SDHCI(dev); + ++ qemu_free_irq(s->irq); + sdhci_common_unrealize(s); + sdhci_uninitfn(s); + } +-- +2.41.0.windows.1 + diff --git a/hw-timer-exynos4210_mct-fix-possible-int-overflow.patch b/hw-timer-exynos4210_mct-fix-possible-int-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1c4a33582c07d3468bbd8918f58381d67e5eb80 --- /dev/null +++ b/hw-timer-exynos4210_mct-fix-possible-int-overflow.patch @@ -0,0 +1,36 @@ +From d0076c906a96019c0fe12be78e5ab21eaf15e69e Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 25 Nov 2024 04:48:16 -0500 +Subject: [PATCH] hw/timer/exynos4210_mct: fix possible int overflow + +cheery-pick from c5d36da7ec62e4c72a72a437057fb6072cf0d6ab + +The product "icnto * s->tcntb" may overflow uint32_t. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Signed-off-by: Dmitry Frolov +Message-id: 20241106083801.219578-2-frolov@swemel.ru +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +Signed-off-by: qihao_yewu +--- + hw/timer/exynos4210_mct.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/timer/exynos4210_mct.c b/hw/timer/exynos4210_mct.c +index 446bbd2b96..6f47bfe2c2 100644 +--- a/hw/timer/exynos4210_mct.c ++++ b/hw/timer/exynos4210_mct.c +@@ -815,7 +815,7 @@ static uint32_t exynos4210_ltick_cnt_get_cnto(struct tick_timer *s) + /* Both are counting */ + icnto = remain / s->tcntb; + if (icnto) { +- tcnto = remain % (icnto * s->tcntb); ++ tcnto = remain % ((uint64_t)icnto * s->tcntb); + } else { + tcnto = remain % s->tcntb; + } +-- +2.41.0.windows.1 + diff --git a/hw-tpm-Add-TPM-event-log.patch b/hw-tpm-Add-TPM-event-log.patch new file mode 100644 index 0000000000000000000000000000000000000000..35f669ce4fbd45c579a8bbad683ac59545a74021 --- /dev/null +++ b/hw-tpm-Add-TPM-event-log.patch @@ -0,0 +1,507 @@ +From ace3d13d5db0b33fdda4c31549aed8e3f87ce47d Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Thu, 7 Nov 2024 13:11:56 +0000 +Subject: [PATCH] hw/tpm: Add TPM event log + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b8e00505df37d35bcbcb05abdca5819d616099f4 + +Provide a library allowing the VMM to create an event log that describes +what is loaded into memory. During remote attestation in confidential +computing this helps an independent verifier reconstruct the initial +measurements of a VM, which contain the initial state of memory and +CPUs. + +We provide some definitions and structures described by the Trusted +Computing Group (TCG) in "TCG PC Client Platform Firmware Profile +Specification" Level 00 Version 1.06 Revision 52 [1]. This is the same +format as used by UEFI, and UEFI could reuse this log after finding it +in DT or ACPI tables, but can also copy its content into a new one. + +[1] https://trustedcomputinggroup.org/resource/pc-client-specific-platform-firmware-profile-specification/ + +Cc: Stefan Berger +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/tpm/Kconfig | 4 + + hw/tpm/meson.build | 1 + + hw/tpm/tpm_log.c | 325 +++++++++++++++++++++++++++++++++++++++ + include/hw/tpm/tpm_log.h | 89 +++++++++++ + qapi/tpm.json | 14 ++ + 5 files changed, 433 insertions(+) + create mode 100644 hw/tpm/tpm_log.c + create mode 100644 include/hw/tpm/tpm_log.h + +diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig +index a46663288c..70694b14a3 100644 +--- a/hw/tpm/Kconfig ++++ b/hw/tpm/Kconfig +@@ -30,3 +30,7 @@ config TPM_SPAPR + default y + depends on TPM && PSERIES + select TPM_BACKEND ++ ++config TPM_LOG ++ bool ++ default y +diff --git a/hw/tpm/meson.build b/hw/tpm/meson.build +index 6968e60b3f..81efb557f3 100644 +--- a/hw/tpm/meson.build ++++ b/hw/tpm/meson.build +@@ -6,4 +6,5 @@ system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb.c')) + system_ss.add(when: 'CONFIG_TPM_TIS', if_true: files('tpm_ppi.c')) + system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_ppi.c')) + ++system_ss.add(when: 'CONFIG_TPM_LOG', if_true: files('tpm_log.c')) + specific_ss.add(when: 'CONFIG_TPM_SPAPR', if_true: files('tpm_spapr.c')) +diff --git a/hw/tpm/tpm_log.c b/hw/tpm/tpm_log.c +new file mode 100644 +index 0000000000..ab29d8569b +--- /dev/null ++++ b/hw/tpm/tpm_log.c +@@ -0,0 +1,325 @@ ++/* ++ * tpm_log.c - Event log as described by the Trusted Computing Group (TCG) ++ * ++ * Copyright (c) 2024 Linaro Ltd. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ * Create an event log in the format specified by: ++ * ++ * TCG PC Client Platform Firmware Profile Specification ++ * Level 00 Version 1.06 Revision 52 ++ * Family “2.0” ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "crypto/hash.h" ++#include "exec/address-spaces.h" ++#include "exec/memory.h" ++#include "hw/tpm/tpm_log.h" ++#include "qapi/error.h" ++#include "qemu/bswap.h" ++#include "qom/object_interfaces.h" ++ ++/* ++ * Legacy structure used only in the first event in the log, for compatibility ++ */ ++struct TcgPcClientPcrEvent { ++ uint32_t pcr_index; ++ uint32_t event_type; ++ uint8_t digest[20]; ++ uint32_t event_data_size; ++ uint8_t event[]; ++} QEMU_PACKED; ++ ++struct TcgEfiSpecIdEvent { ++ uint8_t signature[16]; ++ uint32_t platform_class; ++ uint8_t family_version_minor; ++ uint8_t family_version_major; ++ uint8_t spec_revision; ++ uint8_t uintn_size; ++ uint32_t number_of_algorithms; /* 1 */ ++ /* ++ * For now we declare a single algo, but if we want UEFI to reuse this ++ * header then we'd need to add entries here for all algos supported by ++ * UEFI (and expand the digest field for EV_NO_ACTION). ++ */ ++ uint16_t algorithm_id; ++ uint16_t digest_size; ++ uint8_t vendor_info_size; ++ uint8_t vendor_info[]; ++} QEMU_PACKED; ++ ++struct TcgPcrEvent2Head { ++ uint32_t pcr_index; ++ uint32_t event_type; ++ /* variable-sized digests */ ++ uint8_t digests[]; ++} QEMU_PACKED; ++ ++struct TcgPcrEvent2Tail { ++ uint32_t event_size; ++ uint8_t event[]; ++} QEMU_PACKED; ++ ++struct TpmlDigestValues { ++ uint32_t count; /* 1 */ ++ uint16_t hash_alg; ++ uint8_t digest[]; ++} QEMU_PACKED; ++ ++struct TpmLog { ++ Object parent_obj; ++ ++ TpmLogDigestAlgo digest_algo; ++ size_t max_size; ++ uint64_t load_addr; ++ ++ uint16_t tcg_algo; ++ GByteArray *content; ++ uint8_t *digest; ++ size_t digest_size; ++}; ++ ++OBJECT_DEFINE_SIMPLE_TYPE(TpmLog, tpm_log, TPM_LOG, OBJECT) ++ ++static void tpm_log_init(Object *obj) ++{ ++ TpmLog *log = TPM_LOG(obj); ++ ++ log->digest_algo = TPM_LOG_DIGEST_ALGO_SHA256; ++} ++ ++static void tpm_log_destroy(TpmLog *log) ++{ ++ if (!log->content) { ++ return; ++ } ++ g_free(log->digest); ++ log->digest = NULL; ++ g_byte_array_free(log->content, /* free_segment */ true); ++ log->content = NULL; ++} ++ ++static void tpm_log_finalize(Object *obj) ++{ ++ tpm_log_destroy(TPM_LOG(obj)); ++} ++ ++static int tpm_log_get_digest_algo(Object *obj, Error **errp) ++{ ++ TpmLog *log = TPM_LOG(obj); ++ ++ return log->digest_algo; ++} ++ ++static void tpm_log_set_digest_algo(Object *obj, int algo, Error **errp) ++{ ++ TpmLog *log = TPM_LOG(obj); ++ ++ if (log->content != NULL) { ++ error_setg(errp, "cannot set digest algo after log creation"); ++ return; ++ } ++ ++ log->digest_algo = algo; ++} ++ ++static void tpm_log_get_max_size(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TpmLog *log = TPM_LOG(obj); ++ uint64_t value = log->max_size; ++ ++ visit_type_uint64(v, name, &value, errp); ++} ++ ++static void tpm_log_get_load_addr(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TpmLog *log = TPM_LOG(obj); ++ uint64_t value = log->load_addr; ++ ++ visit_type_uint64(v, name, &value, errp); ++} ++ ++static void tpm_log_set_load_addr(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ TpmLog *log = TPM_LOG(obj); ++ uint64_t value; ++ ++ if (!visit_type_uint64(v, name, &value, errp)) { ++ return; ++ } ++ ++ log->load_addr = value; ++} ++ ++ ++static void tpm_log_class_init(ObjectClass *oc, void *data) ++{ ++ object_class_property_add_enum(oc, "digest-algo", ++ "TpmLogDigestAlgo", ++ &TpmLogDigestAlgo_lookup, ++ tpm_log_get_digest_algo, ++ tpm_log_set_digest_algo); ++ object_class_property_set_description(oc, "digest-algo", ++ "Algorithm used to hash blobs added as events ('sha256', 'sha512')"); ++ ++ /* max_size is set while allocating the log in tpm_log_create */ ++ object_class_property_add(oc, "max-size", "uint64", tpm_log_get_max_size, ++ NULL, NULL, NULL); ++ object_class_property_set_description(oc, "max-size", ++ "Maximum size of the log, reserved in guest memory"); ++ ++ object_class_property_add(oc, "load-addr", "uint64", tpm_log_get_load_addr, ++ tpm_log_set_load_addr, NULL, NULL); ++ object_class_property_set_description(oc, "load-addr", ++ "Base address of the log in guest memory"); ++} ++ ++int tpm_log_create(TpmLog *log, size_t max_size, Error **errp) ++{ ++ struct TcgEfiSpecIdEvent event; ++ struct TcgPcClientPcrEvent header = { ++ .pcr_index = 0, ++ .event_type = cpu_to_le32(TCG_EV_NO_ACTION), ++ .digest = {0}, ++ .event_data_size = cpu_to_le32(sizeof(event)), ++ }; ++ ++ log->content = g_byte_array_sized_new(max_size); ++ log->max_size = max_size; ++ ++ switch (log->digest_algo) { ++ case TPM_LOG_DIGEST_ALGO_SHA256: ++ log->tcg_algo = TCG_ALG_SHA256; ++ log->digest_size = TCG_ALG_SHA256_DIGEST_SIZE; ++ break; ++ case TPM_LOG_DIGEST_ALGO_SHA512: ++ log->tcg_algo = TCG_ALG_SHA512; ++ log->digest_size = TCG_ALG_SHA512_DIGEST_SIZE; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ log->digest = g_malloc0(log->digest_size); ++ ++ event = (struct TcgEfiSpecIdEvent) { ++ .signature = "Spec ID Event03", ++ .platform_class = 0, ++ .family_version_minor = 0, ++ .family_version_major = 2, ++ .spec_revision = 106, ++ .uintn_size = 2, /* UINT64 */ ++ .number_of_algorithms = cpu_to_le32(1), ++ .algorithm_id = cpu_to_le16(log->tcg_algo), ++ .digest_size = cpu_to_le16(log->digest_size), ++ .vendor_info_size = 0, ++ }; ++ ++ g_byte_array_append(log->content, (guint8 *)&header, sizeof(header)); ++ g_byte_array_append(log->content, (guint8 *)&event, sizeof(event)); ++ return 0; ++} ++ ++int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event, ++ size_t event_size, const uint8_t *data, size_t data_size, ++ Error **errp) ++{ ++ int digests = 0; ++ size_t rollback_len; ++ struct TcgPcrEvent2Head header = { ++ .pcr_index = 0, ++ .event_type = cpu_to_le32(event_type), ++ }; ++ struct TpmlDigestValues digest_header = {0}; ++ struct TcgPcrEvent2Tail tail = { ++ .event_size = cpu_to_le32(event_size), ++ }; ++ ++ if (log->content == NULL) { ++ error_setg(errp, "event log is not initialized"); ++ return -EINVAL; ++ } ++ rollback_len = log->content->len; ++ ++ g_byte_array_append(log->content, (guint8 *)&header, sizeof(header)); ++ ++ if (data) { ++ QCryptoHashAlgorithm qc_algo; ++ ++ digest_header.hash_alg = cpu_to_le16(log->tcg_algo); ++ switch (log->digest_algo) { ++ case TPM_LOG_DIGEST_ALGO_SHA256: ++ qc_algo = QCRYPTO_HASH_ALG_SHA256; ++ break; ++ case TPM_LOG_DIGEST_ALGO_SHA512: ++ qc_algo = QCRYPTO_HASH_ALG_SHA512; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ if (qcrypto_hash_bytes(qc_algo, (const char *)data, data_size, ++ &log->digest, &log->digest_size, errp)) { ++ goto err_rollback; ++ } ++ digests = 1; ++ } else if (event_type == TCG_EV_NO_ACTION) { ++ /* EV_NO_ACTION contains empty digests for each supported algo */ ++ memset(log->digest, 0, log->digest_size); ++ digest_header.hash_alg = 0; ++ digests = 1; ++ } ++ ++ if (digests) { ++ digest_header.count = cpu_to_le32(digests); ++ g_byte_array_append(log->content, (guint8 *)&digest_header, ++ sizeof(digest_header)); ++ g_byte_array_append(log->content, log->digest, log->digest_size); ++ } else { ++ /* Add an empty digests list */ ++ g_byte_array_append(log->content, (guint8 *)&digest_header.count, ++ sizeof(digest_header.count)); ++ } ++ ++ g_byte_array_append(log->content, (guint8 *)&tail, sizeof(tail)); ++ g_byte_array_append(log->content, event, event_size); ++ ++ if (log->content->len > log->max_size) { ++ error_setg(errp, "event log exceeds max size"); ++ goto err_rollback; ++ } ++ ++ return 0; ++ ++err_rollback: ++ g_byte_array_set_size(log->content, rollback_len); ++ return -1; ++} ++ ++int tpm_log_write_and_close(TpmLog *log, Error **errp) ++{ ++ int ret; ++ ++ if (!log->content) { ++ error_setg(errp, "event log is not initialized"); ++ return -1; ++ } ++ ++ ret = address_space_write_rom(&address_space_memory, log->load_addr, ++ MEMTXATTRS_UNSPECIFIED, log->content->data, ++ log->content->len); ++ if (ret) { ++ error_setg(errp, "cannot load log into memory"); ++ return -1; ++ } ++ ++ tpm_log_destroy(log); ++ return ret; ++} +diff --git a/include/hw/tpm/tpm_log.h b/include/hw/tpm/tpm_log.h +new file mode 100644 +index 0000000000..b3cd2e7563 +--- /dev/null ++++ b/include/hw/tpm/tpm_log.h +@@ -0,0 +1,89 @@ ++#ifndef QEMU_TPM_LOG_H ++#define QEMU_TPM_LOG_H ++ ++#include "qom/object.h" ++#include "sysemu/tpm.h" ++ ++/* ++ * Defined in: TCG Algorithm Registry ++ * Family 2.0 Level 00 Revision 01.34 ++ * ++ * (Here TCG stands for Trusted Computing Group) ++ */ ++#define TCG_ALG_SHA256 0xB ++#define TCG_ALG_SHA512 0xD ++ ++/* Size of a digest in bytes */ ++#define TCG_ALG_SHA256_DIGEST_SIZE 32 ++#define TCG_ALG_SHA512_DIGEST_SIZE 64 ++ ++/* ++ * Defined in: TCG PC Client Platform Firmware Profile Specification ++ * Version 1.06 revision 52 ++ */ ++#define TCG_EV_NO_ACTION 0x00000003 ++#define TCG_EV_EVENT_TAG 0x00000006 ++#define TCG_EV_POST_CODE2 0x00000013 ++#define TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2 0x8000000A ++ ++struct UefiPlatformFirmwareBlob2Head { ++ uint8_t blob_description_size; ++ uint8_t blob_description[]; ++} __attribute__((packed)); ++ ++struct UefiPlatformFirmwareBlob2Tail { ++ uint64_t blob_base; ++ uint64_t blob_size; ++} __attribute__((packed)); ++ ++#define TYPE_TPM_LOG "tpm-log" ++ ++OBJECT_DECLARE_SIMPLE_TYPE(TpmLog, TPM_LOG) ++ ++/** ++ * tpm_log_create - Create the event log ++ * @log: the log object ++ * @max_size: maximum size of the log. Adding an event past that size will ++ * return an error ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Allocate the event log and create the initial entry (Spec ID Event03) ++ * describing the log format. ++ * ++ * Returns: 0 on success, -1 on error ++ */ ++int tpm_log_create(TpmLog *log, size_t max_size, Error **errp); ++ ++/** ++ * tpm_log_add_event - Append an event to the log ++ * @log: the log object ++ * @event_type: the `eventType` field in TCG_PCR_EVENT2 ++ * @event: the `event` field in TCG_PCR_EVENT2 ++ * @event_size: the `eventSize` field in TCG_PCR_EVENT2 ++ * @data: content to be hashed into the event digest. May be NULL. ++ * @data_size: size of @data. Should be zero when @data is NULL. ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Add a TCG_PCR_EVENT2 event to the event log. Depending on the event type, a ++ * data buffer may be hashed into the event digest (for example ++ * TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2 contains a digest of the blob.) ++ * ++ * Returns: 0 on success, -1 on error ++ */ ++int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event, ++ size_t event_size, const uint8_t *data, size_t data_size, ++ Error **errp); ++ ++/** ++ * tpm_log_write_and_close - Move the log to guest memory ++ * @log: the log object ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Write the log into memory, at the address set in the load-addr property. ++ * After this operation, the log is not writable anymore. ++ * ++ * Return: 0 on success, -1 on error ++ */ ++int tpm_log_write_and_close(TpmLog *log, Error **errp); ++ ++#endif +diff --git a/qapi/tpm.json b/qapi/tpm.json +index a754455ca5..a051d7bf5c 100644 +--- a/qapi/tpm.json ++++ b/qapi/tpm.json +@@ -186,3 +186,17 @@ + ## + { 'command': 'query-tpm', 'returns': ['TPMInfo'], + 'if': 'CONFIG_TPM' } ++ ++## ++# @TpmLogDigestAlgo: ++# ++# @sha256: Use the SHA256 algorithm ++# ++# @sha512: Use the SHA512 algorithm ++# ++# Algorithm to use for event log digests ++# ++# Since: 9.3 ++## ++{ 'enum': 'TpmLogDigestAlgo', ++ 'data': ['sha256', 'sha512'] } +-- +2.33.0 + diff --git a/hw-tpm-rename-Error-parameter-to-more-common-errp.patch b/hw-tpm-rename-Error-parameter-to-more-common-errp.patch deleted file mode 100644 index a47a1ae68da792d7811b9d85cb5cbd5f5d5ac0cd..0000000000000000000000000000000000000000 --- a/hw-tpm-rename-Error-parameter-to-more-common-errp.patch +++ /dev/null @@ -1,58 +0,0 @@ -From f2dceb3cde537210896a2cadb8958cfd310113a3 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Thu, 5 Dec 2019 20:46:30 +0300 -Subject: [PATCH 01/19] hw/tpm: rename Error ** parameter to more common errp -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Stefan Berger -Message-Id: <20191205174635.18758-17-vsementsov@virtuozzo.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Markus Armbruster -Signed-off-by: jiangfangjie ---- - hw/tpm/tpm_emulator.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c -index fc0b512f..38bf5fd6 100644 ---- a/hw/tpm/tpm_emulator.c -+++ b/hw/tpm/tpm_emulator.c -@@ -155,7 +155,7 @@ static int tpm_emulator_unix_tx_bufs(TPMEmulator *tpm_emu, - const uint8_t *in, uint32_t in_len, - uint8_t *out, uint32_t out_len, - bool *selftest_done, -- Error **err) -+ Error **errp) - { - ssize_t ret; - bool is_selftest = false; -@@ -165,20 +165,20 @@ static int tpm_emulator_unix_tx_bufs(TPMEmulator *tpm_emu, - is_selftest = tpm_util_is_selftest(in, in_len); - } - -- ret = qio_channel_write_all(tpm_emu->data_ioc, (char *)in, in_len, err); -+ ret = qio_channel_write_all(tpm_emu->data_ioc, (char *)in, in_len, errp); - if (ret != 0) { - return -1; - } - - ret = qio_channel_read_all(tpm_emu->data_ioc, (char *)out, -- sizeof(struct tpm_resp_hdr), err); -+ sizeof(struct tpm_resp_hdr), errp); - if (ret != 0) { - return -1; - } - - ret = qio_channel_read_all(tpm_emu->data_ioc, - (char *)out + sizeof(struct tpm_resp_hdr), -- tpm_cmd_get_size(out) - sizeof(struct tpm_resp_hdr), err); -+ tpm_cmd_get_size(out) - sizeof(struct tpm_resp_hdr), errp); - if (ret != 0) { - return -1; - } --- -2.23.0 - diff --git a/hw-ufs-Fix-buffer-overflow-bug.patch b/hw-ufs-Fix-buffer-overflow-bug.patch new file mode 100644 index 0000000000000000000000000000000000000000..b55411704ca0373b2902d46cfa4bf131b2d31cb1 --- /dev/null +++ b/hw-ufs-Fix-buffer-overflow-bug.patch @@ -0,0 +1,63 @@ +From 73fecb1c0fab9a1e0593b769c36bdc795c9316ae Mon Sep 17 00:00:00 2001 +From: qihao +Date: Wed, 15 May 2024 15:52:28 +0800 +Subject: [PATCH] hw/ufs: Fix buffer overflow bug + +cheery-pick from f2c8aeb1afefcda92054c448b21fc59cdd99db30 + +It fixes the buffer overflow vulnerability in the ufs device. +The bug was detected by sanitizers. + +You can reproduce it by: + +cat << EOF |\ +qemu-system-x86_64 \ +-display none -machine accel=qtest -m 512M -M q35 -nodefaults -drive \ +file=null-co://,if=none,id=disk0 -device ufs,id=ufs_bus -device \ +ufs-lu,drive=disk0,bus=ufs_bus -qtest stdio +outl 0xcf8 0x80000810 +outl 0xcfc 0xe0000000 +outl 0xcf8 0x80000804 +outw 0xcfc 0x06 +write 0xe0000058 0x1 0xa7 +write 0xa 0x1 0x50 +EOF + +Resolves: #2299 +Fixes: 329f16624499 ("hw/ufs: Support for Query Transfer Requests") +Reported-by: Zheyu Ma +Signed-off-by: Jeuk Kim +Signed-off-by: qihao_yewu +--- + hw/ufs/ufs.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c +index eccdb852a0..bac78a32bb 100644 +--- a/hw/ufs/ufs.c ++++ b/hw/ufs/ufs.c +@@ -126,6 +126,10 @@ static MemTxResult ufs_dma_read_req_upiu(UfsRequest *req) + copy_size = sizeof(UtpUpiuHeader) + UFS_TRANSACTION_SPECIFIC_FIELD_SIZE + + data_segment_length; + ++ if (copy_size > sizeof(req->req_upiu)) { ++ copy_size = sizeof(req->req_upiu); ++ } ++ + ret = ufs_addr_read(u, req_upiu_base_addr, &req->req_upiu, copy_size); + if (ret) { + trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr); +@@ -225,6 +229,10 @@ static MemTxResult ufs_dma_write_rsp_upiu(UfsRequest *req) + copy_size = rsp_upiu_byte_len; + } + ++ if (copy_size > sizeof(req->rsp_upiu)) { ++ copy_size = sizeof(req->rsp_upiu); ++ } ++ + ret = ufs_addr_write(u, rsp_upiu_base_addr, &req->rsp_upiu, copy_size); + if (ret) { + trace_ufs_err_dma_write_rsp_upiu(req->slot, rsp_upiu_base_addr); +-- +2.41.0.windows.1 + diff --git a/hw-ufs-add-basic-info-of-query-response-upiu.patch b/hw-ufs-add-basic-info-of-query-response-upiu.patch new file mode 100644 index 0000000000000000000000000000000000000000..3c8fe2312b4c9aee3827ff52c34dfb1cde16839f --- /dev/null +++ b/hw-ufs-add-basic-info-of-query-response-upiu.patch @@ -0,0 +1,71 @@ +From 450b67a5dd3954db8441a1ad65a5e4594ba2e405 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Mon, 16 Sep 2024 17:34:34 +0800 +Subject: [PATCH] hw/ufs: add basic info of query response upiu cherry picked + from de2cc4078240f8b745a7caeed461b02f2577e2d2 Modify to fill the opcode, idn, + index, selector information of all Query Response UPIU. because attr and flag + operation of query response upiu need these information too. + +Signed-off-by: KyoungrulKim +Reviewed-by: Minwoo Im +Reviewed-by: Jeuk Kim +Signed-off-by: Jeuk Kim +Signed-off-by: dinglimin +--- + hw/ufs/ufs.c | 13 +++++++++---- + hw/ufs/ufs.h | 1 + + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c +index bac78a32bb..068895b27b 100644 +--- a/hw/ufs/ufs.c ++++ b/hw/ufs/ufs.c +@@ -455,6 +455,14 @@ void ufs_build_upiu_header(UfsRequest *req, uint8_t trans_type, uint8_t flags, + req->rsp_upiu.header.data_segment_length = cpu_to_be16(data_segment_length); + } + ++void ufs_build_query_response(UfsRequest *req) ++{ ++ req->rsp_upiu.qr.opcode = req->req_upiu.qr.opcode; ++ req->rsp_upiu.qr.idn = req->req_upiu.qr.idn; ++ req->rsp_upiu.qr.index = req->req_upiu.qr.index; ++ req->rsp_upiu.qr.selector = req->req_upiu.qr.selector; ++} ++ + static UfsReqResult ufs_exec_scsi_cmd(UfsRequest *req) + { + UfsHc *u = req->hc; +@@ -931,10 +939,6 @@ static QueryRespCode ufs_read_desc(UfsRequest *req) + if (length > req->rsp_upiu.qr.data[0]) { + length = req->rsp_upiu.qr.data[0]; + } +- req->rsp_upiu.qr.opcode = req->req_upiu.qr.opcode; +- req->rsp_upiu.qr.idn = req->req_upiu.qr.idn; +- req->rsp_upiu.qr.index = req->req_upiu.qr.index; +- req->rsp_upiu.qr.selector = req->req_upiu.qr.selector; + req->rsp_upiu.qr.length = cpu_to_be16(length); + + return status; +@@ -1015,6 +1019,7 @@ static UfsReqResult ufs_exec_query_cmd(UfsRequest *req) + data_segment_length = be16_to_cpu(req->rsp_upiu.qr.length); + ufs_build_upiu_header(req, UFS_UPIU_TRANSACTION_QUERY_RSP, 0, status, 0, + data_segment_length); ++ ufs_build_query_response(req); + + if (status != UFS_QUERY_RESULT_SUCCESS) { + return UFS_REQUEST_FAIL; +diff --git a/hw/ufs/ufs.h b/hw/ufs/ufs.h +index 8fda94f4ef..8a74b4c2ab 100644 +--- a/hw/ufs/ufs.h ++++ b/hw/ufs/ufs.h +@@ -132,6 +132,7 @@ static inline bool is_wlun(uint8_t lun) + void ufs_build_upiu_header(UfsRequest *req, uint8_t trans_type, uint8_t flags, + uint8_t response, uint8_t scsi_status, + uint16_t data_segment_length); ++void ufs_build_query_response(UfsRequest *req); + void ufs_complete_req(UfsRequest *req, UfsReqResult req_result); + void ufs_init_wlu(UfsLu *wlu, uint8_t wlun); + #endif /* HW_UFS_UFS_H */ +-- +2.41.0.windows.1 + diff --git a/hw-ufs-free-irq-on-exit.patch b/hw-ufs-free-irq-on-exit.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e6a11c978546fe086542fdabe009ae8440cfb1c --- /dev/null +++ b/hw-ufs-free-irq-on-exit.patch @@ -0,0 +1,46 @@ +From 068fef175047c18f60900dacd54c7a436114c164 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 7 Apr 2025 13:18:47 -0400 +Subject: [PATCH] hw/ufs: free irq on exit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from c458f9474d6574505ce9144ab1a90b951e69c1bd + +Fix a memory leak bug in ufs_init_pci() due to u->irq +not being freed in ufs_exit(). + +Signed-off-by: Zheng Huang +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <43ceb427-87aa-44ee-9007-dbaecc499bba@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + hw/ufs/ufs.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c +index 068895b27b..f57d33e771 100644 +--- a/hw/ufs/ufs.c ++++ b/hw/ufs/ufs.c +@@ -25,6 +25,7 @@ + #include "qapi/error.h" + #include "migration/vmstate.h" + #include "scsi/constants.h" ++#include "hw/irq.h" + #include "trace.h" + #include "ufs.h" + +@@ -1286,6 +1287,8 @@ static void ufs_exit(PCIDevice *pci_dev) + { + UfsHc *u = UFS(pci_dev); + ++ qemu_free_irq(u->irq); ++ + qemu_bh_delete(u->doorbell_bh); + qemu_bh_delete(u->complete_bh); + +-- +2.41.0.windows.1 + diff --git a/hw-usb-Style-cleanup.patch b/hw-usb-Style-cleanup.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2c333b373d8dea2b293fd4e8de1764e8ed3d1a2 --- /dev/null +++ b/hw-usb-Style-cleanup.patch @@ -0,0 +1,66 @@ +From f06b930da5d2acf70d142f1212ef4ee09d643b21 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 16:18:43 +0800 +Subject: [PATCH] hw/usb: Style cleanup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 455177ffc457098b0103d2a09cb7ba5e260dfcdd + +We are going to modify these lines, fix their style +in order to avoid checkpatch.pl warning. + +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Signed-off-by: Michael Tokarev +Signed-off-by: dinglimin +--- + hw/usb/hcd-ehci.c | 3 ++- + hw/usb/hcd-uhci.c | 6 ++++-- + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index 19b4534c20..7b093acd98 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -1086,8 +1086,9 @@ static void ehci_opreg_write(void *ptr, hwaddr addr, + case CONFIGFLAG: + val &= 0x1; + if (val) { +- for(i = 0; i < NB_PORTS; i++) ++ for (i = 0; i < NB_PORTS; i++) { + handle_port_owner_write(s, i, 0); ++ } + } + break; + +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 77baaa7a6b..6975966c3f 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -457,8 +457,9 @@ static void uhci_port_write(void *opaque, hwaddr addr, + int n; + + n = (addr >> 1) & 7; +- if (n >= NB_PORTS) ++ if (n >= NB_PORTS) { + return; ++ } + port = &s->ports[n]; + dev = port->port.dev; + if (dev && dev->attached) { +@@ -513,8 +514,9 @@ static uint64_t uhci_port_read(void *opaque, hwaddr addr, unsigned size) + UHCIPort *port; + int n; + n = (addr >> 1) & 7; +- if (n >= NB_PORTS) ++ if (n >= NB_PORTS) { + goto read_default; ++ } + port = &s->ports[n]; + val = port->ctrl; + } +-- +2.27.0 + diff --git a/hw-usb-core-fix-buffer-overflow.patch b/hw-usb-core-fix-buffer-overflow.patch deleted file mode 100644 index 494955788a2506fd2d28521ff234118025fbe674..0000000000000000000000000000000000000000 --- a/hw-usb-core-fix-buffer-overflow.patch +++ /dev/null @@ -1,46 +0,0 @@ -hw-usb-core-fix-buffer-overflow - -From 18ad0451f113ffc3a2ff59c059d189cca1e42842 Mon Sep 17 00:00:00 2001 -From: root -Date: Wed, 19 Aug 2020 17:04:04 +0800 -Subject: [PATCH] hw/usb/core.c fix buffer overflow - -Store calculated setup_len in a local variable, verify it, - and only write it to the struct (USBDevice->setup_len) in case it passed the - sanity checks. - -This prevents other code (do_token_{in,out} function specifically) -from working with invalid USBDevice->setup_len values and overruning -the USBDevice->setup_buf[] buffer. -Store -Fixes: CVE-2020-14364 -Signed-off-by: Gred Hoffman ---- - hw/usb/core.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/usb/core.c b/hw/usb/core.c -index 5abd128b..12342f13 100644 ---- a/hw/usb/core.c -+++ b/hw/usb/core.c -@@ -144,6 +144,8 @@ static void do_token_setup(USBDevice *s, USBPacket *p) - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", - s->setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; -+ s->setup_len = 0; -+ s->setup_state = SETUP_STATE_ACK; - return; - } - -@@ -277,6 +279,8 @@ static void do_parameter(USBDevice *s, USBPacket *p) - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", - s->setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; -+ s->setup_len = 0; -+ s->setup_state = SETUP_STATE_ACK; - return; - } - --- -2.23.0 - diff --git a/hw-usb-hcd-ehci-Fix-debug-printf-format-string.patch b/hw-usb-hcd-ehci-Fix-debug-printf-format-string.patch new file mode 100644 index 0000000000000000000000000000000000000000..709f2a5dc94d8f05337502289dfda0c67dfd5138 --- /dev/null +++ b/hw-usb-hcd-ehci-Fix-debug-printf-format-string.patch @@ -0,0 +1,40 @@ +From 4ca8ac93bd2c328c80841540b3b5e297ff24d3c9 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Wed, 5 Feb 2025 06:02:50 -0500 +Subject: [PATCH] hw/usb/hcd-ehci: Fix debug printf format string +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from a40b5f32867294b7c855d2e4b98a4c2d32b3be28 + +The variable is uint64_t so needs %PRIu64 instead of %d. + +Fixes: 3ae7eb88c47 ("ehci: fix overflow in frame timer code") +Signed-off-by: BALATON Zoltan +Reviewed-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20250124124713.64F8C4E6031@zero.eik.bme.hu> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + hw/usb/hcd-ehci.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index 7b093acd98..fa8c7af5c8 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -2287,7 +2287,8 @@ static void ehci_work_bh(void *opaque) + ehci_update_frindex(ehci, skipped_uframes); + ehci->last_run_ns += UFRAME_TIMER_NS * skipped_uframes; + uframes -= skipped_uframes; +- DPRINTF("WARNING - EHCI skipped %d uframes\n", skipped_uframes); ++ DPRINTF("WARNING - EHCI skipped %"PRIu64" uframes\n", ++ skipped_uframes); + } + + for (i = 0; i < uframes; i++) { +-- +2.41.0.windows.1 + diff --git a/hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch b/hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch deleted file mode 100644 index 96a45b8100318237976abc51ac2b584b569e018a..0000000000000000000000000000000000000000 --- a/hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch +++ /dev/null @@ -1,40 +0,0 @@ -From b1398dc6f3eb16e006167bdd8666fb7c52918e13 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 15 Sep 2020 23:52:59 +0530 -Subject: [PATCH] hw: usb: hcd-ohci: check for processed TD before retire - -While servicing OHCI transfer descriptors(TD), ohci_service_iso_td -retires a TD if it has passed its time frame. It does not check if -the TD was already processed once and holds an error code in TD_CC. -It may happen if the TD list has a loop. Add check to avoid an -infinite loop condition. - -Signed-off-by: Prasad J Pandit -Reviewed-by: Li Qiang -Message-id: 20200915182259.68522-3-ppandit@redhat.com -Signed-off-by: Gerd Hoffmann -(cherry-picked from 1be90ebe) -Fix CVE-2020-25625 -Signed-off-by: Alex Chen ---- - hw/usb/hcd-ohci.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c -index 4f6fdbc0a7..ffe52a09d7 100644 ---- a/hw/usb/hcd-ohci.c -+++ b/hw/usb/hcd-ohci.c -@@ -689,6 +689,10 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, - the next ISO TD of the same ED */ - trace_usb_ohci_iso_td_relative_frame_number_big(relative_frame_number, - frame_count); -+ if (OHCI_CC_DATAOVERRUN == OHCI_BM(iso_td.flags, TD_CC)) { -+ /* avoid infinite loop */ -+ return 1; -+ } - OHCI_SET_BM(iso_td.flags, TD_CC, OHCI_CC_DATAOVERRUN); - ed->head &= ~OHCI_DPTR_MASK; - ed->head |= (iso_td.next & OHCI_DPTR_MASK); --- -2.27.0 - diff --git a/hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch b/hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch deleted file mode 100644 index 0133d70db8abfb7338a57f5cf305c68ac8811e56..0000000000000000000000000000000000000000 --- a/hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 789723b95045b6e44d1d1aef56a8bcb255a10476 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 15 Sep 2020 23:52:58 +0530 -Subject: [PATCH] hw: usb: hcd-ohci: check len and frame_number variables - -While servicing the OHCI transfer descriptors(TD), OHCI host -controller derives variables 'start_addr', 'end_addr', 'len' -etc. from values supplied by the host controller driver. -Host controller driver may supply values such that using -above variables leads to out-of-bounds access issues. -Add checks to avoid them. - -AddressSanitizer: stack-buffer-overflow on address 0x7ffd53af76a0 - READ of size 2 at 0x7ffd53af76a0 thread T0 - #0 ohci_service_iso_td ../hw/usb/hcd-ohci.c:734 - #1 ohci_service_ed_list ../hw/usb/hcd-ohci.c:1180 - #2 ohci_process_lists ../hw/usb/hcd-ohci.c:1214 - #3 ohci_frame_boundary ../hw/usb/hcd-ohci.c:1257 - #4 timerlist_run_timers ../util/qemu-timer.c:572 - #5 qemu_clock_run_timers ../util/qemu-timer.c:586 - #6 qemu_clock_run_all_timers ../util/qemu-timer.c:672 - #7 main_loop_wait ../util/main-loop.c:527 - #8 qemu_main_loop ../softmmu/vl.c:1676 - #9 main ../softmmu/main.c:50 - -Reported-by: Gaoning Pan -Reported-by: Yongkang Jia -Reported-by: Yi Ren -Signed-off-by: Prasad J Pandit -Message-id: 20200915182259.68522-2-ppandit@redhat.com -Signed-off-by: Gerd Hoffmann -(cherry-picked from 1328fe0c) -Fix CVE-2020-25624 -Signed-off-by: Alex Chen ---- - hw/usb/hcd-ohci.c | 24 ++++++++++++++++++++++-- - 1 file changed, 22 insertions(+), 2 deletions(-) - -diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c -index ffe52a09d7..d2dd8efd58 100644 ---- a/hw/usb/hcd-ohci.c -+++ b/hw/usb/hcd-ohci.c -@@ -733,7 +733,11 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, - } - - start_offset = iso_td.offset[relative_frame_number]; -- next_offset = iso_td.offset[relative_frame_number + 1]; -+ if (relative_frame_number < frame_count) { -+ next_offset = iso_td.offset[relative_frame_number + 1]; -+ } else { -+ next_offset = iso_td.be; -+ } - - if (!(OHCI_BM(start_offset, TD_PSW_CC) & 0xe) || - ((relative_frame_number < frame_count) && -@@ -766,7 +770,12 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, - } - } else { - /* Last packet in the ISO TD */ -- end_addr = iso_td.be; -+ end_addr = next_offset; -+ } -+ -+ if (start_addr > end_addr) { -+ trace_usb_ohci_iso_td_bad_cc_overrun(start_addr, end_addr); -+ return 1; - } - - if ((start_addr & OHCI_PAGE_MASK) != (end_addr & OHCI_PAGE_MASK)) { -@@ -775,6 +784,9 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, - } else { - len = end_addr - start_addr + 1; - } -+ if (len > sizeof(ohci->usb_buf)) { -+ len = sizeof(ohci->usb_buf); -+ } - - if (len && dir != OHCI_TD_DIR_IN) { - if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, -@@ -977,8 +989,16 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed) - if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) { - len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff); - } else { -+ if (td.cbp > td.be) { -+ trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be); -+ ohci_die(ohci); -+ return 1; -+ } - len = (td.be - td.cbp) + 1; - } -+ if (len > sizeof(ohci->usb_buf)) { -+ len = sizeof(ohci->usb_buf); -+ } - - pktlen = len; - if (len && dir != OHCI_TD_DIR_IN) { --- -2.27.0 - diff --git a/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch b/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch new file mode 100644 index 0000000000000000000000000000000000000000..72d0c2a4fc5211a27de4ee2c0def9ee28b956973 --- /dev/null +++ b/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch @@ -0,0 +1,459 @@ +From dc7e40b2841132b0bc43d25c2c31f41ae3fa2c68 Mon Sep 17 00:00:00 2001 +From: eillon +Date: Tue, 8 Feb 2022 22:43:59 -0500 +Subject: [PATCH] hw/usb: reduce the vpcu cost of UHCI when VNC disconnect + +Reduce the vpcu cost by set a lower FRAME_TIMER_FREQ of the UHCI +when VNC client disconnected. This can reduce about 3% cost of +vcpu thread. + +Signed-off-by: eillon +--- + hw/usb/core.c | 5 ++-- + hw/usb/desc.c | 7 +++-- + hw/usb/dev-hid.c | 2 +- + hw/usb/hcd-uhci.c | 63 ++++++++++++++++++++++++++++++++++------ + hw/usb/hcd-uhci.h | 1 + + hw/usb/host-libusb.c | 32 ++++++++++++++++++++ + include/hw/usb.h | 1 + + include/qemu/timer.h | 28 ++++++++++++++++++ + ui/vnc.c | 4 +++ + util/qemu-timer.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 197 insertions(+), 15 deletions(-) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 975f76250a..51b36126ca 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -87,7 +87,7 @@ void usb_device_reset(USBDevice *dev) + return; + } + usb_device_handle_reset(dev); +- dev->remote_wakeup = 0; ++ dev->remote_wakeup &= ~USB_DEVICE_REMOTE_WAKEUP; + dev->addr = 0; + dev->state = USB_STATE_DEFAULT; + } +@@ -105,7 +105,8 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) + */ + return; + } +- if (dev->remote_wakeup && dev->port && dev->port->ops->wakeup) { ++ if ((dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) ++ && dev->port && dev->port->ops->wakeup) { + dev->port->ops->wakeup(dev->port); + } + if (bus->ops->wakeup_endpoint) { +diff --git a/hw/usb/desc.c b/hw/usb/desc.c +index f2bdc05a95..333f73fff1 100644 +--- a/hw/usb/desc.c ++++ b/hw/usb/desc.c +@@ -752,7 +752,7 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, + if (config->bmAttributes & USB_CFG_ATT_SELFPOWER) { + data[0] |= 1 << USB_DEVICE_SELF_POWERED; + } +- if (dev->remote_wakeup) { ++ if (dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) { + data[0] |= 1 << USB_DEVICE_REMOTE_WAKEUP; + } + data[1] = 0x00; +@@ -762,14 +762,15 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, + } + case DeviceOutRequest | USB_REQ_CLEAR_FEATURE: + if (value == USB_DEVICE_REMOTE_WAKEUP) { +- dev->remote_wakeup = 0; ++ dev->remote_wakeup &= ~USB_DEVICE_REMOTE_WAKEUP; + ret = 0; + } + trace_usb_clear_device_feature(dev->addr, value, ret); + break; + case DeviceOutRequest | USB_REQ_SET_FEATURE: ++ dev->remote_wakeup |= USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED; + if (value == USB_DEVICE_REMOTE_WAKEUP) { +- dev->remote_wakeup = 1; ++ dev->remote_wakeup |= USB_DEVICE_REMOTE_WAKEUP; + ret = 0; + } + trace_usb_set_device_feature(dev->addr, value, ret); +diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c +index bdd6d1ffaf..cc68d1ce9e 100644 +--- a/hw/usb/dev-hid.c ++++ b/hw/usb/dev-hid.c +@@ -745,7 +745,7 @@ static int usb_ptr_post_load(void *opaque, int version_id) + { + USBHIDState *s = opaque; + +- if (s->dev.remote_wakeup) { ++ if (s->dev.remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) { + hid_pointer_activate(&s->hid); + } + return 0; +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 6975966c3f..a92581ff5f 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -44,6 +44,8 @@ + #include "hcd-uhci.h" + + #define FRAME_TIMER_FREQ 1000 ++#define FRAME_TIMER_FREQ_LAZY 10 ++#define USB_DEVICE_NEED_NORMAL_FREQ "QEMU USB Tablet" + + #define FRAME_MAX_LOOPS 256 + +@@ -109,6 +111,22 @@ static void uhci_async_cancel(UHCIAsync *async); + static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td); + static void uhci_resume(void *opaque); + ++static int64_t uhci_frame_timer_freq = FRAME_TIMER_FREQ_LAZY; ++ ++static void uhci_set_frame_freq(int freq) ++{ ++ if (freq <= 0) { ++ return; ++ } ++ ++ uhci_frame_timer_freq = freq; ++} ++ ++static qemu_usb_controller qemu_uhci = { ++ .name = "uhci", ++ .qemu_set_freq = uhci_set_frame_freq, ++}; ++ + static inline int32_t uhci_queue_token(UHCI_TD *td) + { + if ((td->token & (0xf << 15)) == 0) { +@@ -351,7 +369,7 @@ static int uhci_post_load(void *opaque, int version_id) + + if (version_id < 2) { + s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + +- (NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ); ++ (NANOSECONDS_PER_SECOND / uhci_frame_timer_freq); + } + return 0; + } +@@ -392,8 +410,29 @@ static void uhci_port_write(void *opaque, hwaddr addr, + if ((val & UHCI_CMD_RS) && !(s->cmd & UHCI_CMD_RS)) { + /* start frame processing */ + trace_usb_uhci_schedule_start(); +- s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + +- (NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ); ++ ++ /* ++ * If the frequency of frame_timer is too slow, Guest OS (Win2012) would become ++ * blue-screen after hotplugging some vcpus. ++ * If this USB device support the remote-wakeup, the UHCI controller ++ * will enter global suspend mode when there is no input for several seconds. ++ * In this case, Qemu will delete the frame_timer. Since the frame_timer has been deleted, ++ * there is no influence to the performance of Vms. So, we can change the frequency to 1000. ++ * After that the frequency will be safe when we trigger the frame_timer again. ++ * Excepting this, there are two ways to change the frequency: ++ * 1)VNC connect/disconnect;2)attach/detach USB device. ++ */ ++ if ((uhci_frame_timer_freq != FRAME_TIMER_FREQ) ++ && (s->ports[0].port.dev) ++ && (!memcmp(s->ports[0].port.dev->product_desc, ++ USB_DEVICE_NEED_NORMAL_FREQ, strlen(USB_DEVICE_NEED_NORMAL_FREQ))) ++ && (s->ports[0].port.dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED)) { ++ qemu_log("turn up the frequency of UHCI controller to %d\n", FRAME_TIMER_FREQ); ++ uhci_frame_timer_freq = FRAME_TIMER_FREQ; ++ } ++ ++ s->frame_time = NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ; ++ s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->frame_time; + timer_mod(s->frame_timer, s->expire_time); + s->status &= ~UHCI_STS_HCHALTED; + } else if (!(val & UHCI_CMD_RS)) { +@@ -1083,7 +1122,6 @@ static void uhci_frame_timer(void *opaque) + UHCIState *s = opaque; + uint64_t t_now, t_last_run; + int i, frames; +- const uint64_t frame_t = NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ; + + s->completions_only = false; + qemu_bh_cancel(s->bh); +@@ -1099,14 +1137,14 @@ static void uhci_frame_timer(void *opaque) + } + + /* We still store expire_time in our state, for migration */ +- t_last_run = s->expire_time - frame_t; ++ t_last_run = s->expire_time - s->frame_time; + t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + /* Process up to MAX_FRAMES_PER_TICK frames */ +- frames = (t_now - t_last_run) / frame_t; ++ frames = (t_now - t_last_run) / s->frame_time; + if (frames > s->maxframes) { + int skipped = frames - s->maxframes; +- s->expire_time += skipped * frame_t; ++ s->expire_time += skipped * s->frame_time; + s->frnum = (s->frnum + skipped) & 0x7ff; + frames -= skipped; + } +@@ -1123,7 +1161,7 @@ static void uhci_frame_timer(void *opaque) + /* The spec says frnum is the frame currently being processed, and + * the guest must look at frnum - 1 on interrupt, so inc frnum now */ + s->frnum = (s->frnum + 1) & 0x7ff; +- s->expire_time += frame_t; ++ s->expire_time += s->frame_time; + } + + /* Complete the previous frame(s) */ +@@ -1134,7 +1172,12 @@ static void uhci_frame_timer(void *opaque) + } + s->pending_int_mask = 0; + +- timer_mod(s->frame_timer, t_now + frame_t); ++ /* expire_time is calculated from last frame_time, we should calculate it ++ * according to new frame_time which equals to ++ * NANOSECONDS_PER_SECOND / uhci_frame_timer_freq */ ++ s->expire_time -= s->frame_time - NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; ++ s->frame_time = NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; ++ timer_mod(s->frame_timer, t_now + s->frame_time); + } + + static const MemoryRegionOps uhci_ioport_ops = { +@@ -1195,8 +1238,10 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); + s->num_ports_vmstate = NB_PORTS; ++ s->frame_time = NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; + QTAILQ_INIT(&s->queues); + ++ qemu_register_usb_controller(&qemu_uhci, QEMU_USB_CONTROLLER_UHCI); + memory_region_init_io(&s->io_bar, OBJECT(s), &uhci_ioport_ops, s, + "uhci", 0x20); + +diff --git a/hw/usb/hcd-uhci.h b/hw/usb/hcd-uhci.h +index 69f8b40c49..0918719911 100644 +--- a/hw/usb/hcd-uhci.h ++++ b/hw/usb/hcd-uhci.h +@@ -50,6 +50,7 @@ typedef struct UHCIState { + uint16_t status; + uint16_t intr; /* interrupt enable register */ + uint16_t frnum; /* frame number */ ++ uint64_t frame_time; /* frame time in ns */ + uint32_t fl_base_addr; /* frame list base address */ + uint8_t sof_timing; + uint8_t status2; /* bit 0 and 1 are used to generate UHCI_STS_USBINT */ +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index d7060a42d5..dba469c1ef 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -945,6 +945,30 @@ static void usb_host_ep_update(USBHostDevice *s) + libusb_free_config_descriptor(conf); + } + ++static unsigned int usb_get_controller_type(int speed) ++{ ++ unsigned int type = MAX_USB_CONTROLLER_TYPES; ++ ++ switch (speed) { ++ case USB_SPEED_SUPER: ++ type = QEMU_USB_CONTROLLER_XHCI; ++ break; ++ case USB_SPEED_HIGH: ++ type = QEMU_USB_CONTROLLER_EHCI; ++ break; ++ case USB_SPEED_FULL: ++ type = QEMU_USB_CONTROLLER_UHCI; ++ break; ++ case USB_SPEED_LOW: ++ type = QEMU_USB_CONTROLLER_OHCI; ++ break; ++ default: ++ break; ++ } ++ ++ return type; ++} ++ + static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + { + USBDevice *udev = USB_DEVICE(s); +@@ -1054,6 +1078,12 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + } + + trace_usb_host_open_success(bus_num, addr); ++ ++ /* change ehci frame time freq when USB passthrough */ ++ qemu_log("usb host speed is %d\n", udev->speed); ++ qemu_timer_set_mode(QEMU_TIMER_USB_NORMAL_MODE, ++ usb_get_controller_type(udev->speed)); ++ + return 0; + + fail: +@@ -1129,6 +1159,8 @@ static int usb_host_close(USBHostDevice *s) + } + + usb_host_auto_check(NULL); ++ qemu_timer_set_mode(QEMU_TIMER_USB_LAZY_MODE, ++ usb_get_controller_type(udev->speed)); + return 0; + } + +diff --git a/include/hw/usb.h b/include/hw/usb.h +index 32c23a5ca2..911179158d 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -142,6 +142,7 @@ + + #define USB_DEVICE_SELF_POWERED 0 + #define USB_DEVICE_REMOTE_WAKEUP 1 ++#define USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED 2 + + #define USB_DT_DEVICE 0x01 + #define USB_DT_CONFIG 0x02 +diff --git a/include/qemu/timer.h b/include/qemu/timer.h +index 9a366e551f..475c2a3f18 100644 +--- a/include/qemu/timer.h ++++ b/include/qemu/timer.h +@@ -91,6 +91,34 @@ struct QEMUTimer { + int scale; + }; + ++#define QEMU_USB_NORMAL_FREQ 1000 ++#define QEMU_USB_LAZY_FREQ 10 ++#define MAX_USB_CONTROLLER_TYPES 4 ++#define QEMU_USB_CONTROLLER_OHCI 0 ++#define QEMU_USB_CONTROLLER_UHCI 1 ++#define QEMU_USB_CONTROLLER_EHCI 2 ++#define QEMU_USB_CONTROLLER_XHCI 3 ++ ++typedef void (*QEMUSetFreqHandler) (int freq); ++ ++typedef struct qemu_usb_controller { ++ const char *name; ++ QEMUSetFreqHandler qemu_set_freq; ++} qemu_usb_controller; ++ ++typedef qemu_usb_controller* qemu_usb_controller_ptr; ++ ++enum qemu_timer_mode { ++ QEMU_TIMER_USB_NORMAL_MODE = 1 << 0, /* Set when VNC connect or ++ * with usb dev passthrough ++ */ ++ QEMU_TIMER_USB_LAZY_MODE = 1 << 1, /* Set when VNC disconnect */ ++}; ++ ++int qemu_register_usb_controller(qemu_usb_controller_ptr controller, ++ unsigned int type); ++int qemu_timer_set_mode(enum qemu_timer_mode mode, unsigned int type); ++ + extern QEMUTimerListGroup main_loop_tlg; + + /* +diff --git a/ui/vnc.c b/ui/vnc.c +index 4f23a0fa79..5dd77e73cb 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -1365,6 +1365,8 @@ void vnc_disconnect_finish(VncState *vs) + g_free(vs->zrle); + g_free(vs->tight); + g_free(vs); ++ ++ qemu_timer_set_mode(QEMU_TIMER_USB_LAZY_MODE, QEMU_USB_CONTROLLER_UHCI); + } + + size_t vnc_client_io_error(VncState *vs, ssize_t ret, Error *err) +@@ -3341,6 +3343,8 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc, + } + } + } ++ ++ qemu_timer_set_mode(QEMU_TIMER_USB_NORMAL_MODE, QEMU_USB_CONTROLLER_UHCI); + } + + void vnc_start_protocol(VncState *vs) +diff --git a/util/qemu-timer.c b/util/qemu-timer.c +index 6a0de33dd2..dc891cc557 100644 +--- a/util/qemu-timer.c ++++ b/util/qemu-timer.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/timer.h" + #include "qemu/lockable.h" +@@ -75,6 +76,74 @@ struct QEMUTimerList { + QemuEvent timers_done_ev; + }; + ++typedef struct qemu_controller_timer_state { ++ qemu_usb_controller_ptr controller; ++ int refs; ++} controller_timer_state; ++ ++typedef controller_timer_state* controller_timer_state_ptr; ++ ++static controller_timer_state uhci_timer_state = { ++ .controller = NULL, ++ .refs = 0, ++}; ++ ++static controller_timer_state_ptr \ ++ qemu_usb_controller_tab[MAX_USB_CONTROLLER_TYPES] = {NULL, ++ &uhci_timer_state, ++ NULL, NULL}; ++ ++int qemu_register_usb_controller(qemu_usb_controller_ptr controller, ++ unsigned int type) ++{ ++ if (type != QEMU_USB_CONTROLLER_UHCI) { ++ return 0; ++ } ++ ++ /* for companion EHCI controller will create three UHCI controllers, ++ * we init it only once. ++ */ ++ if (!qemu_usb_controller_tab[type]->controller) { ++ qemu_log("the usb controller (%d) registed frame handler\n", type); ++ qemu_usb_controller_tab[type]->controller = controller; ++ } ++ ++ return 0; ++} ++ ++int qemu_timer_set_mode(enum qemu_timer_mode mode, unsigned int type) ++{ ++ if (type != QEMU_USB_CONTROLLER_UHCI) { ++ qemu_log("the usb controller (%d) no need change frame frep\n", type); ++ return 0; ++ } ++ ++ if (!qemu_usb_controller_tab[type]->controller) { ++ qemu_log("the usb controller (%d) not registed yet\n", type); ++ return 0; ++ } ++ ++ if (mode == QEMU_TIMER_USB_NORMAL_MODE) { ++ if (qemu_usb_controller_tab[type]->refs++ > 0) { ++ return 0; ++ } ++ qemu_usb_controller_tab[type]->controller-> ++ qemu_set_freq(QEMU_USB_NORMAL_FREQ); ++ qemu_log("Set the controller (%d) of freq %d HZ,\n", ++ type, QEMU_USB_NORMAL_FREQ); ++ } else { ++ if (--qemu_usb_controller_tab[type]->refs > 0) { ++ return 0; ++ } ++ qemu_usb_controller_tab[type]->controller-> ++ qemu_set_freq(QEMU_USB_LAZY_FREQ); ++ qemu_log("Set the controller(type:%d) of freq %d HZ,\n", ++ type, QEMU_USB_LAZY_FREQ); ++ } ++ ++ return 0; ++} ++ + /** + * qemu_clock_ptr: + * @type: type of clock +-- +2.27.0 + diff --git a/hw-vfio-add-device-hct-based-on-vfio.patch b/hw-vfio-add-device-hct-based-on-vfio.patch new file mode 100644 index 0000000000000000000000000000000000000000..bd0c9cf048e3be4732bea080a11c1b1c9fb11508 --- /dev/null +++ b/hw-vfio-add-device-hct-based-on-vfio.patch @@ -0,0 +1,593 @@ +From ebafa8d737b5f08e787803375d6e942ecdaef1a9 Mon Sep 17 00:00:00 2001 +From: Yabin Li +Date: Fri, 4 Aug 2023 21:09:08 +0800 +Subject: [PATCH] hw/vfio: add device hct based on vfio. + +add hct device based on vfio, used to simulate ccp devices + +Signed-off-by: Yabin Li +Signed-off-by: yangdepei +--- + hw/vfio/Kconfig | 6 + + hw/vfio/hct.c | 543 ++++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/meson.build | 1 + + 3 files changed, 550 insertions(+) + create mode 100644 hw/vfio/hct.c + +diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig +index 7cdba0560a..5f0d3c2d2b 100644 +--- a/hw/vfio/Kconfig ++++ b/hw/vfio/Kconfig +@@ -41,3 +41,9 @@ config VFIO_IGD + bool + default y if PC_PCI + depends on VFIO_PCI ++ ++config VFIO_HCT ++ bool ++ default y ++ select VFIO ++ depends on LINUX && PCI +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +new file mode 100644 +index 0000000000..476e86c61d +--- /dev/null ++++ b/hw/vfio/hct.c +@@ -0,0 +1,543 @@ ++/* ++ * vfio based mediated ccp(hct) assignment support ++ * ++ * Copyright 2023 HYGON Corp. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "qemu/osdep.h" ++#include "qemu/queue.h" ++#include "qemu/main-loop.h" ++#include "qemu/log.h" ++#include "trace.h" ++#include "hw/pci/pci.h" ++#include "hw/vfio/pci.h" ++#include "qemu/range.h" ++#include "sysemu/kvm.h" ++#include "hw/pci/msi.h" ++#include "qemu/error-report.h" ++#include "qapi/error.h" ++#include "hw/qdev-properties.h" ++ ++#define MAX_CCP_CNT 16 ++#define PAGE_SIZE 4096 ++#define HCT_SHARED_MEMORY_SIZE (PAGE_SIZE * MAX_CCP_CNT) ++#define CCP_INDEX_BYTES 4 ++#define PATH_MAX 4096 ++#define TYPE_HCT_DEV "hct" ++#define PCI_HCT_DEV(obj) OBJECT_CHECK(HCTDevState, (obj), TYPE_HCT_DEV) ++#define HCT_MMIO_SIZE (1 << 20) ++#define HCT_MAX_PASID (1 << 8) ++ ++#define PCI_VENDOR_ID_HYGON_CCP 0x1d94 ++#define PCI_DEVICE_ID_HYGON_CCP 0x1468 ++ ++#define HCT_SHARE_DEV "/dev/hct_share" ++ ++#define HCT_VERSION_STRING "0.2" ++#define DEF_VERSION_STRING "0.1" ++#define VERSION_SIZE 16 ++ ++#define HCT_SHARE_IOC_TYPE 'C' ++#define HCT_SHARE_OP_TYPE 0x01 ++#define HCT_SHARE_OP _IOWR(HCT_SHARE_IOC_TYPE, \ ++ HCT_SHARE_OP_TYPE, \ ++ struct hct_dev_ctrl) ++#define HCT_SHARE_OP_DMA_MAP 0x01 ++#define HCT_SHARE_OP_GET_ID 0x03 ++#define HCT_SHARE_OP_GET_PASID 0x04 ++#define HCT_SHARE_OP_DMA_UNMAP 0x05 ++#define HCT_SHARE_OP_GET_VERSION 0x06 ++ ++/* BARS */ ++#define HCT_REG_BAR_IDX 2 ++#define HCT_SHARED_BAR_IDX 3 ++#define HCT_PASID_BAR_IDX 4 ++ ++#define PASID_OFFSET 40 ++ ++static volatile struct hct_data { ++ int init; ++ int hct_fd; ++ unsigned long pasid; ++ uint8_t *pasid_memory; ++ uint8_t *hct_shared_memory; ++ uint8_t ccp_index[MAX_CCP_CNT]; ++ uint8_t ccp_cnt; ++} hct_data; ++ ++typedef struct SharedDevice { ++ PCIDevice dev; ++ int shared_memory_offset; ++} SharedDevice; ++ ++typedef struct HctDevState { ++ SharedDevice sdev; ++ VFIODevice vdev; ++ MemoryRegion mmio; ++ MemoryRegion shared; ++ MemoryRegion pasid; ++ void *maps[PCI_NUM_REGIONS]; ++} HCTDevState; ++ ++struct hct_dev_ctrl { ++ unsigned char op; ++ unsigned char rsvd[3]; ++ union { ++ unsigned char version[VERSION_SIZE]; ++ struct { ++ unsigned long vaddr; ++ unsigned long iova; ++ unsigned long size; ++ }; ++ unsigned int id; ++ }; ++}; ++ ++static int pasid_get_and_init(HCTDevState *state) ++{ ++ struct hct_dev_ctrl ctrl; ++ int ret; ++ ++ ctrl.op = HCT_SHARE_OP_GET_PASID; ++ ctrl.id = -1; ++ ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); ++ if (ret < 0) { ++ ret = -errno; ++ error_report("GET_PASID fail: %d", -errno); ++ goto out; ++ } ++ ++ *hct_data.pasid_memory = ctrl.id; ++ hct_data.pasid = ctrl.id; ++ ++out: ++ return ret; ++} ++ ++static const MemoryRegionOps hct_mmio_ops = { ++ .endianness = DEVICE_NATIVE_ENDIAN, ++ .valid = ++ { ++ .min_access_size = 4, ++ .max_access_size = 4, ++ }, ++}; ++ ++static void vfio_hct_detach_device(HCTDevState *state) ++{ ++ vfio_detach_device(&state->vdev); ++ g_free(state->vdev.name); ++} ++ ++static void vfio_hct_exit(PCIDevice *dev) ++{ ++ HCTDevState *state = PCI_HCT_DEV(dev); ++ ++ vfio_hct_detach_device(state); ++ ++ if (hct_data.hct_fd) { ++ qemu_close(hct_data.hct_fd); ++ hct_data.hct_fd = 0; ++ } ++} ++ ++static Property vfio_hct_properties[] = { ++ DEFINE_PROP_STRING("sysfsdev", HCTDevState, vdev.sysfsdev), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void vfio_ccp_compute_needs_reset(VFIODevice *vdev) ++{ ++ vdev->needs_reset = false; ++} ++ ++struct VFIODeviceOps vfio_ccp_ops = { ++ .vfio_compute_needs_reset = vfio_ccp_compute_needs_reset, ++}; ++ ++/* create BAR2, BAR3 and BAR4 space for the virtual machine. */ ++static int vfio_hct_region_mmap(HCTDevState *state) ++{ ++ int ret; ++ int i; ++ struct vfio_region_info *info; ++ ++ for (i = 0; i < PCI_ROM_SLOT; i++) { ++ ret = vfio_get_region_info(&state->vdev, i, &info); ++ if (ret) ++ goto out; ++ ++ if (info->size) { ++ state->maps[i] = mmap(NULL, info->size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, state->vdev.fd, info->offset); ++ if (state->maps[i] == MAP_FAILED) { ++ ret = -errno; ++ g_free(info); ++ error_report("vfio mmap fail\n"); ++ goto out; ++ } ++ } ++ g_free(info); ++ } ++ ++ memory_region_init_io(&state->mmio, OBJECT(state), &hct_mmio_ops, state, ++ "hct mmio", HCT_MMIO_SIZE); ++ memory_region_init_ram_device_ptr(&state->mmio, OBJECT(state), "hct mmio", ++ HCT_MMIO_SIZE, ++ state->maps[HCT_REG_BAR_IDX]); ++ ++ memory_region_init_io(&state->shared, OBJECT(state), &hct_mmio_ops, state, ++ "hct shared memory", PAGE_SIZE); ++ memory_region_init_ram_device_ptr( ++ &state->shared, OBJECT(state), "hct shared memory", PAGE_SIZE, ++ (void *)hct_data.hct_shared_memory + ++ state->sdev.shared_memory_offset * PAGE_SIZE); ++ ++ memory_region_init_io(&state->pasid, OBJECT(state), &hct_mmio_ops, state, ++ "hct pasid", PAGE_SIZE); ++ memory_region_init_ram_device_ptr(&state->pasid, OBJECT(state), "hct pasid", ++ PAGE_SIZE, hct_data.pasid_memory); ++ ++ pci_register_bar(&state->sdev.dev, HCT_REG_BAR_IDX, ++ PCI_BASE_ADDRESS_SPACE_MEMORY, &state->mmio); ++ pci_register_bar(&state->sdev.dev, HCT_SHARED_BAR_IDX, ++ PCI_BASE_ADDRESS_SPACE_MEMORY, &state->shared); ++ pci_register_bar(&state->sdev.dev, HCT_PASID_BAR_IDX, ++ PCI_BASE_ADDRESS_SPACE_MEMORY, &state->pasid); ++out: ++ return ret; ++} ++ ++static int hct_check_duplicated_index(int index) ++{ ++ int cnt; ++ for (cnt = 0; cnt < hct_data.ccp_cnt; cnt++) { ++ if (hct_data.ccp_index[cnt] == index) { ++ error_report("many mdev shouldn't be mapped to one ccp in a " ++ "virtual machine!\n"); ++ return -1; ++ } ++ } ++ ++ hct_data.ccp_index[hct_data.ccp_cnt++] = index; ++ return 0; ++} ++ ++static int hct_get_ccp_index(HCTDevState *state) ++{ ++ char path[PATH_MAX]; ++ char buf[CCP_INDEX_BYTES]; ++ int fd; ++ int ret; ++ int ccp_index; ++ ++ snprintf(path, PATH_MAX, "%s/vendor/id", state->vdev.sysfsdev); ++ fd = qemu_open_old(path, O_RDONLY); ++ if (fd < 0) { ++ error_report("open %s fail\n", path); ++ return -errno; ++ } ++ ++ ret = read(fd, buf, sizeof(buf)); ++ if (ret < 0) { ++ ret = -errno; ++ error_report("read %s fail\n", path); ++ goto out; ++ } ++ ++ if (1 != sscanf(buf, "%d", &ccp_index)) { ++ ret = -errno; ++ error_report("format addr %s fail\n", buf); ++ goto out; ++ } ++ ++ if (!hct_check_duplicated_index(ccp_index)) { ++ state->sdev.shared_memory_offset = ccp_index; ++ } else { ++ ret = -1; ++ } ++ ++out: ++ qemu_close(fd); ++ return ret; ++} ++ ++static int hct_api_version_check(void) ++{ ++ struct hct_dev_ctrl ctrl; ++ int ret; ++ ++ ctrl.op = HCT_SHARE_OP_GET_VERSION; ++ memcpy(ctrl.version, DEF_VERSION_STRING, sizeof(DEF_VERSION_STRING)); ++ ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); ++ if (ret < 0) { ++ error_report("ret %d, errno %d: fail to get hct.ko version, please " ++ "update hct.ko to version 0.2.\n", ++ ret, errno); ++ return -1; ++ } else if (memcmp(ctrl.version, HCT_VERSION_STRING, ++ sizeof(HCT_VERSION_STRING)) < 0) { ++ error_report("The API version %s is larger than hct.ko version %s, " ++ "please update hct.ko to version 0.2\n", ++ HCT_VERSION_STRING, ctrl.version); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int hct_shared_memory_init(void) ++{ ++ int ret = 0; ++ ++ hct_data.hct_shared_memory = ++ mmap(NULL, HCT_SHARED_MEMORY_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, ++ hct_data.hct_fd, 0); ++ if (hct_data.hct_shared_memory == MAP_FAILED) { ++ ret = -errno; ++ error_report("map hct shared memory fail\n"); ++ goto out; ++ } ++ ++out: ++ return ret; ++} ++ ++static void hct_listener_region_add(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ struct hct_dev_ctrl ctrl; ++ hwaddr iova; ++ Int128 llend, llsize; ++ void *vaddr; ++ int ret; ++ ++ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); ++ llend = int128_make64(section->offset_within_address_space); ++ llend = int128_add(llend, section->size); ++ llend = int128_add(llend, int128_exts64(qemu_real_host_page_mask())); ++ ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ ++ if (!section->mr->ram) { ++ return; ++ } ++ ++ vaddr = memory_region_get_ram_ptr(section->mr) + ++ section->offset_within_region + ++ (iova - section->offset_within_address_space); ++ llsize = int128_sub(llend, int128_make64(iova)); ++ ++ ctrl.op = HCT_SHARE_OP_DMA_MAP; ++ ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); ++ ctrl.vaddr = (uint64_t)vaddr; ++ ctrl.size = llsize; ++ ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); ++ if (ret < 0) ++ error_report("VFIO_MAP_DMA: %d, iova=%lx", -errno, iova); ++} ++ ++static void hct_listener_region_del(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ struct hct_dev_ctrl ctrl; ++ hwaddr iova; ++ Int128 llend, llsize; ++ int ret; ++ ++ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); ++ llend = int128_make64(section->offset_within_address_space); ++ llend = int128_add(llend, section->size); ++ llend = int128_add(llend, int128_exts64(qemu_real_host_page_mask())); ++ ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ ++ if (!section->mr->ram) { ++ return; ++ } ++ ++ llsize = int128_sub(llend, int128_make64(iova)); ++ ++ ctrl.op = HCT_SHARE_OP_DMA_UNMAP; ++ ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); ++ ctrl.size = llsize; ++ ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); ++ if (ret < 0) ++ error_report("VFIO_UNMAP_DMA: %d", -errno); ++} ++ ++static MemoryListener hct_memory_listener = { ++ .region_add = hct_listener_region_add, ++ .region_del = hct_listener_region_del, ++}; ++ ++static void hct_data_uninit(HCTDevState *state) ++{ ++ if (hct_data.hct_fd) { ++ qemu_close(hct_data.hct_fd); ++ hct_data.hct_fd = 0; ++ } ++ ++ if (hct_data.pasid) { ++ hct_data.pasid = 0; ++ } ++ ++ if (hct_data.pasid_memory) { ++ munmap(hct_data.pasid_memory, PAGE_SIZE); ++ hct_data.pasid_memory = NULL; ++ } ++ ++ if (hct_data.hct_shared_memory) { ++ munmap((void *)hct_data.hct_shared_memory, HCT_SHARED_MEMORY_SIZE); ++ hct_data.hct_shared_memory = NULL; ++ } ++ ++ memory_listener_unregister(&hct_memory_listener); ++} ++ ++static int hct_data_init(HCTDevState *state) ++{ ++ int ret; ++ ++ if (hct_data.init == 0) { ++ ++ hct_data.hct_fd = qemu_open_old(HCT_SHARE_DEV, O_RDWR); ++ if (hct_data.hct_fd < 0) { ++ error_report("fail to open %s, errno %d.", HCT_SHARE_DEV, errno); ++ ret = -errno; ++ goto out; ++ } ++ ++ /* The hct.ko version number needs not to be less than 0.2. */ ++ ret = hct_api_version_check(); ++ if (ret) ++ goto out; ++ ++ /* assign a page to the virtual BAR3 of each CCP. */ ++ ret = hct_shared_memory_init(); ++ if (ret) ++ goto out; ++ ++ hct_data.pasid_memory = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ++ if (hct_data.pasid_memory < 0) ++ goto unmap_shared_memory_exit; ++ ++ /* assign a unique pasid to each virtual machine. */ ++ ret = pasid_get_and_init(state); ++ if (ret < 0) ++ goto unmap_pasid_memory_exit; ++ ++ /* perform DMA_MAP and DMA_UNMAP operations on all memories of the ++ * virtual machine. */ ++ memory_listener_register(&hct_memory_listener, &address_space_memory); ++ ++ hct_data.init = 1; ++ } ++ ++ return hct_get_ccp_index(state); ++ ++unmap_pasid_memory_exit: ++ munmap(hct_data.pasid_memory, PAGE_SIZE); ++ ++unmap_shared_memory_exit: ++ munmap((void *)hct_data.hct_shared_memory, HCT_SHARED_MEMORY_SIZE); ++ ++out: ++ return ret; ++} ++ ++/* When device is loaded */ ++static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) ++{ ++ int ret; ++ char *mdevid; ++ Error *err = NULL; ++ HCTDevState *state = PCI_HCT_DEV(pci_dev); ++ ++ /* parsing mdev device name from startup scripts */ ++ mdevid = g_path_get_basename(state->vdev.sysfsdev); ++ state->vdev.name = g_strdup_printf("%s", mdevid); ++ ++ ret = hct_data_init(state); ++ if (ret < 0) { ++ g_free(state->vdev.name); ++ goto out; ++ } ++ ++ ret = vfio_attach_device(state->vdev.name, &state->vdev, ++ pci_device_iommu_address_space(pci_dev), &err); ++ ++ if (ret) { ++ error_report("attach device failed, name = %s", state->vdev.name); ++ goto data_uninit_out; ++ } ++ ++ state->vdev.ops = &vfio_ccp_ops; ++ state->vdev.dev = &state->sdev.dev.qdev; ++ ++ ret = vfio_hct_region_mmap(state); ++ if (ret < 0) ++ goto detach_device_out; ++ ++ return; ++ ++detach_device_out: ++ vfio_hct_detach_device(state); ++ ++data_uninit_out: ++ hct_data_uninit(state); ++ ++out: ++ return; ++} ++ ++static void hct_dev_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); ++ ++ dc->desc = "HCT Device"; ++ device_class_set_props(dc, vfio_hct_properties); ++ ++ pdc->realize = vfio_hct_realize; ++ pdc->exit = vfio_hct_exit; ++ pdc->vendor_id = PCI_VENDOR_ID_HYGON_CCP; ++ pdc->device_id = PCI_DEVICE_ID_HYGON_CCP; ++ pdc->class_id = PCI_CLASS_CRYPT_OTHER; ++ set_bit(DEVICE_CATEGORY_MISC, dc->categories); ++ ++ return; ++} ++ ++static const TypeInfo pci_hct_info = { ++ .name = TYPE_HCT_DEV, ++ .parent = TYPE_PCI_DEVICE, ++ .instance_size = sizeof(HCTDevState), ++ .class_init = hct_dev_class_init, ++ .interfaces = ++ (InterfaceInfo[]){ ++ {INTERFACE_CONVENTIONAL_PCI_DEVICE}, ++ {}, ++ }, ++}; ++ ++static void hct_register_types(void) { ++ type_register_static(&pci_hct_info); ++} ++ ++type_init(hct_register_types); +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index 2a6912c940..b1db4c8605 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -17,5 +17,6 @@ vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) + vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) + vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) + vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) ++vfio_ss.add(when: 'CONFIG_VFIO_HCT', if_true: files('hct.c')) + + specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) +-- +2.41.0.windows.1 + diff --git a/hw-vfio-common-trace-vfio_connect_container-operatio.patch b/hw-vfio-common-trace-vfio_connect_container-operatio.patch deleted file mode 100644 index bd952359250359770ea8d51711e88be943ee2c72..0000000000000000000000000000000000000000 --- a/hw-vfio-common-trace-vfio_connect_container-operatio.patch +++ /dev/null @@ -1,53 +0,0 @@ -From b107e6ec2a5a34e0ba95345a89dcf5f505ad9da4 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 22 Feb 2021 10:13:55 -0500 -Subject: [PATCH] hw/vfio/common: trace vfio_connect_container operations - -We currently trace vfio_disconnect_container() but we do not trace -the container <-> group creation, which can be useful to understand -the VFIO topology. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 3 +++ - hw/vfio/trace-events | 2 ++ - 2 files changed, 5 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 206fb83e28..fefa2ccfdf 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1848,6 +1848,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - QLIST_FOREACH(container, &space->containers, next) { - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - group->container = container; -+ trace_vfio_connect_existing_container(group->groupid, -+ container->fd); - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - vfio_kvm_device_add_group(group); - return 0; -@@ -1881,6 +1883,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - if (ret) { - goto free_container_exit; - } -+ trace_vfio_connect_new_container(group->groupid, container->fd); - - switch (container->iommu_type) { - case VFIO_TYPE1v2_IOMMU: -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 575ebde6e0..561dc6e758 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -102,6 +102,8 @@ vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t si - vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 - vfio_disconnect_container(int fd) "close container->fd=%d" -+vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d" -+vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d" - vfio_put_group(int fd) "close group->fd=%d" - vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" - vfio_put_base_device(int fd) "close vdev->fd=%d" --- -2.27.0 - diff --git a/hw-vfio-hct-fix-ccp_index-error-caused-by-uninitiali.patch b/hw-vfio-hct-fix-ccp_index-error-caused-by-uninitiali.patch new file mode 100644 index 0000000000000000000000000000000000000000..22e8907408273539973738bf0b80f4c8c6b515b0 --- /dev/null +++ b/hw-vfio-hct-fix-ccp_index-error-caused-by-uninitiali.patch @@ -0,0 +1,28 @@ +From 360bd43ff3c4e4938ee8af1a5ccf981152f7ca95 Mon Sep 17 00:00:00 2001 +From: yangdepei +Date: Mon, 26 Aug 2024 15:40:25 +0800 +Subject: [PATCH] hw/vfio/hct: fix ccp_index error caused by uninitialized buf + +Signed-off-by: yangdepei +--- + hw/vfio/hct.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index 790bb78439..9374e95e85 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -235,8 +235,8 @@ static int hct_check_duplicated_index(int index) + + static int hct_get_ccp_index(HCTDevState *state) + { +- char path[PATH_MAX]; +- char buf[CCP_INDEX_BYTES]; ++ char path[PATH_MAX] = {0}; ++ char buf[CCP_INDEX_BYTES] = {0}; + int fd; + int ret; + int ccp_index; +-- +2.41.0.windows.1 + diff --git a/hw-vfio-hct-qemu-startup-terminate-once-error-happen.patch b/hw-vfio-hct-qemu-startup-terminate-once-error-happen.patch new file mode 100644 index 0000000000000000000000000000000000000000..1609833876b8408382051ba941794705af42b843 --- /dev/null +++ b/hw-vfio-hct-qemu-startup-terminate-once-error-happen.patch @@ -0,0 +1,79 @@ +From 32855e315c3050f09388f1335c0869bba065fbae Mon Sep 17 00:00:00 2001 +From: yangdepei +Date: Fri, 27 Sep 2024 17:08:08 +0800 +Subject: [PATCH] hw/vfio/hct: qemu startup terminate once error happened in + hct + +Signed-off-by: yangdepei +--- + hw/vfio/hct.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index 9374e95e85..7fd3977182 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -136,7 +136,9 @@ static const MemoryRegionOps hct_mmio_ops = { + static void vfio_hct_detach_device(HCTDevState *state) + { + vfio_detach_device(&state->vdev); +- g_free(state->vdev.name); ++ ++ if (state->vdev.name) ++ g_free(state->vdev.name); + } + + static void vfio_hct_exit(PCIDevice *dev) +@@ -413,7 +415,6 @@ static int hct_data_init(HCTDevState *state) + int ret; + + if (hct_data.init == 0) { +- + hct_data.hct_fd = qemu_open_old(HCT_SHARE_DEV, O_RDWR); + if (hct_data.hct_fd < 0) { + error_report("fail to open %s, errno %d.", HCT_SHARE_DEV, errno); +@@ -465,7 +466,6 @@ static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) + { + int ret; + char *mdevid; +- Error *err = NULL; + HCTDevState *state = PCI_HCT_DEV(pci_dev); + + /* parsing mdev device name from startup scripts */ +@@ -475,14 +475,18 @@ static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) + ret = hct_data_init(state); + if (ret < 0) { + g_free(state->vdev.name); ++ state->vdev.name = NULL; ++ error_setg(errp, "hct data init failed"); + goto out; + } + + ret = vfio_attach_device(state->vdev.name, &state->vdev, +- pci_device_iommu_address_space(pci_dev), &err); ++ pci_device_iommu_address_space(pci_dev), errp); + + if (ret) { +- error_report("attach device failed, name = %s", state->vdev.name); ++ g_free(state->vdev.name); ++ state->vdev.name = NULL; ++ error_setg(errp, "attach device failed, name = %s", state->vdev.name); + goto data_uninit_out; + } + +@@ -491,7 +495,12 @@ static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) + + ret = vfio_hct_region_mmap(state); + if (ret < 0) ++ { ++ g_free(state->vdev.name); ++ state->vdev.name = NULL; ++ error_setg(errp, "region mmap failed, name = %s", state->vdev.name); + goto detach_device_out; ++ } + + return; + +-- +2.41.0.windows.1 + diff --git a/hw-vfio-hct-update-support-ccp-count-to-48.patch b/hw-vfio-hct-update-support-ccp-count-to-48.patch new file mode 100644 index 0000000000000000000000000000000000000000..57e394762bfdc95b2a94a51ee5cc863bc932f160 --- /dev/null +++ b/hw-vfio-hct-update-support-ccp-count-to-48.patch @@ -0,0 +1,57 @@ +From 3af7045d3aea901d366f4f6dee51e70998351698 Mon Sep 17 00:00:00 2001 +From: Yabin Li +Date: Tue, 23 Apr 2024 15:38:48 +0800 +Subject: [PATCH] hw/vfio/hct: update support ccp count to 48. + +Signed-off-by: Yabin Li +Signed-off-by: yangdepei +--- + hw/vfio/hct.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index 476e86c61d..790bb78439 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -28,7 +28,7 @@ + #include "qapi/error.h" + #include "hw/qdev-properties.h" + +-#define MAX_CCP_CNT 16 ++#define MAX_CCP_CNT 48 + #define PAGE_SIZE 4096 + #define HCT_SHARED_MEMORY_SIZE (PAGE_SIZE * MAX_CCP_CNT) + #define CCP_INDEX_BYTES 4 +@@ -43,7 +43,7 @@ + + #define HCT_SHARE_DEV "/dev/hct_share" + +-#define HCT_VERSION_STRING "0.2" ++#define HCT_VERSION_STRING "0.5" + #define DEF_VERSION_STRING "0.1" + #define VERSION_SIZE 16 + +@@ -281,15 +281,14 @@ static int hct_api_version_check(void) + memcpy(ctrl.version, DEF_VERSION_STRING, sizeof(DEF_VERSION_STRING)); + ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); + if (ret < 0) { +- error_report("ret %d, errno %d: fail to get hct.ko version, please " +- "update hct.ko to version 0.2.\n", +- ret, errno); ++ error_report("ret %d, errno %d: fail to get hct.ko version.\n", ret, ++ errno); + return -1; + } else if (memcmp(ctrl.version, HCT_VERSION_STRING, + sizeof(HCT_VERSION_STRING)) < 0) { +- error_report("The API version %s is larger than hct.ko version %s, " +- "please update hct.ko to version 0.2\n", +- HCT_VERSION_STRING, ctrl.version); ++ error_report("The hct.ko version is %s, please upgrade to version %s " ++ "or higher.\n", ++ ctrl.version, HCT_VERSION_STRING); + return -1; + } + +-- +2.41.0.windows.1 + diff --git a/hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch b/hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch new file mode 100644 index 0000000000000000000000000000000000000000..6355641a134dd31a44259e069840adc62d19c53a --- /dev/null +++ b/hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch @@ -0,0 +1,67 @@ +From 8c1ad2043705184da00d39250402a70f403d14a7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:11 +0200 +Subject: [PATCH] hw/virtio: Introduce virtio_bh_new_guarded() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce virtio_bh_new_guarded(), similar to qemu_bh_new_guarded() +but using the transport memory guard, instead of the device one +(there can only be one virtio device per virtio bus). + +Inspired-by: Gerd Hoffmann +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-2-philmd@linaro.org> +--- + hw/virtio/virtio.c | 10 ++++++++++ + include/hw/virtio/virtio.h | 7 +++++++ + 2 files changed, 17 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d00effe4d5..202aae868e 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -4148,3 +4148,13 @@ static void virtio_register_types(void) + } + + type_init(virtio_register_types) ++ ++QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev, ++ QEMUBHFunc *cb, void *opaque, ++ const char *name) ++{ ++ DeviceState *transport = qdev_get_parent_bus(dev)->parent; ++ ++ return qemu_bh_new_full(cb, opaque, name, ++ &transport->mem_reentrancy_guard); ++} +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index e612441357..60494aed62 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -22,6 +22,7 @@ + #include "standard-headers/linux/virtio_config.h" + #include "standard-headers/linux/virtio_ring.h" + #include "qom/object.h" ++#include "block/aio.h" + + /* + * A guest should never accept this. It implies negotiation is broken +@@ -510,4 +511,10 @@ static inline bool virtio_device_disabled(VirtIODevice *vdev) + bool virtio_legacy_allowed(VirtIODevice *vdev); + bool virtio_legacy_check_disabled(VirtIODevice *vdev); + ++QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev, ++ QEMUBHFunc *cb, void *opaque, ++ const char *name); ++#define virtio_bh_new_guarded(dev, cb, opaque) \ ++ virtio_bh_new_guarded_full((dev), (cb), (opaque), (stringify(cb))) ++ + #endif +-- +2.27.0 + diff --git a/hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch b/hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch new file mode 100644 index 0000000000000000000000000000000000000000..3b4fc6c0d7ba401daf46c8326b9414f42054ae59 --- /dev/null +++ b/hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch @@ -0,0 +1,43 @@ +From edb30c972ba68b03cc5febefc880698573a17b04 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:41 +0200 +Subject: [PATCH] hw/virtio/virtio-crypto: Protect from DMA re-entrancy + bugs(CVE-2024-3446) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Replace qemu_bh_new_guarded() by virtio_bh_new_guarded() +so the bus and device use the same guard. Otherwise the +DMA-reentrancy protection can be bypassed. + +Fixes: CVE-2024-3446 +Cc: qemu-stable@nongnu.org +Suggested-by: Alexander Bulekov +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-5-philmd@linaro.org> +--- + hw/virtio/virtio-crypto.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 0e2cc8d5a8..4aaced74be 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1080,8 +1080,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) + vcrypto->vqs[i].dataq = + virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); + vcrypto->vqs[i].dataq_bh = +- qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], +- &dev->mem_reentrancy_guard); ++ virtio_bh_new_guarded(dev, virtio_crypto_dataq_bh, ++ &vcrypto->vqs[i]); + vcrypto->vqs[i].vcrypto = vcrypto; + } + +-- +2.27.0 + diff --git a/hw-virtio-virtio-pci-Support-shadow-device-for-virti.patch b/hw-virtio-virtio-pci-Support-shadow-device-for-virti.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6942768824db06771806f2bc35e4533456b5680 --- /dev/null +++ b/hw-virtio-virtio-pci-Support-shadow-device-for-virti.patch @@ -0,0 +1,65 @@ +From e4c28afade86b8533b46bc87a56a8a0f32ab191a Mon Sep 17 00:00:00 2001 +From: Jia Qingtong +Date: Mon, 16 Jun 2025 17:24:13 +0800 +Subject: [PATCH] hw/virtio/virtio-pci:Support shadow device for + virtio-net/blk/scsi devices + +Currently we only support shadow device for "virtio-net", now let's +extend this feature to support "virtio-blk" and "virtio-scsi" devices. + +Signed-off-by: Yanan Wang +Signed-off-by: Jia Qingtong +--- + hw/virtio/virtio-pci.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 558471307a..13220c258d 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1020,6 +1020,15 @@ int __attribute__((weak)) kvm_delete_shadow_device(PCIDevice *dev) + } + #endif + ++#ifdef __aarch64__ ++static bool shadow_device_supported(VirtIODevice *vdev) ++{ ++ return !strcmp(vdev->name, "virtio-net") || ++ !strcmp(vdev->name, "virtio-blk") || ++ !strcmp(vdev->name, "virtio-scsi"); ++} ++#endif ++ + static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; +@@ -1027,7 +1036,7 @@ static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + + #ifdef __aarch64__ +- if (!strcmp(vdev->name, "virtio-net")) { ++ if (shadow_device_supported(vdev)) { + kvm_create_shadow_device(&proxy->pci_dev); + } + #endif +@@ -1044,7 +1053,7 @@ static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + virtio_pci_commit_route_changes(vdev); + + #ifdef __aarch64__ +- if (!strcmp(vdev->name, "virtio-net") && ret != 0) { ++ if (shadow_device_supported(vdev) && ret != 0) { + kvm_delete_shadow_device(&proxy->pci_dev); + } + #endif +@@ -1093,7 +1102,7 @@ static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) + } + + #ifdef __aarch64__ +- if (!strcmp(vdev->name, "virtio-net")) { ++ if (shadow_device_supported(vdev)) { + kvm_delete_shadow_device(&proxy->pci_dev); + } + #endif +-- +2.33.0 + diff --git a/hw-xen-Fix-xen_bus_realize-error-handling.patch b/hw-xen-Fix-xen_bus_realize-error-handling.patch new file mode 100644 index 0000000000000000000000000000000000000000..52a2023e0240545f3f95ebf0a9ce05570ebb017e --- /dev/null +++ b/hw-xen-Fix-xen_bus_realize-error-handling.patch @@ -0,0 +1,43 @@ +From 5eb0bb1f8ce9835b368e78d414ff6136c77ef94b Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Tue, 8 Apr 2025 06:51:26 -0400 +Subject: [PATCH] hw/xen: Fix xen_bus_realize() error handling + +cheery-pick from de7b18083bfed4e1a01bb40b4ad050c47d2011fa + +The Error ** argument must be NULL, &error_abort, &error_fatal, or a +pointer to a variable containing NULL. Passing an argument of the +latter kind twice without clearing it in between is wrong: if the +first call sets an error, it no longer points to NULL for the second +call. + +xen_bus_realize() is wrong that way: it passes &local_err to +xs_node_watch() in a loop. If this fails in more than one iteration, +it can trip error_setv()'s assertion. + +Fix by clearing @local_err. + +Fixes: c4583c8c394e (xen-bus: reduce scope of backend watch) +Signed-off-by: Markus Armbruster +Message-ID: <20250314143500.2449658-2-armbru@redhat.com> +Reviewed-by: Stefano Stabellini +Signed-off-by: qihao_yewu +--- + hw/xen/xen-bus.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c +index 4973e7d9c9..c10b089914 100644 +--- a/hw/xen/xen-bus.c ++++ b/hw/xen/xen-bus.c +@@ -352,6 +352,7 @@ static void xen_bus_realize(BusState *bus, Error **errp) + error_reportf_err(local_err, + "failed to set up '%s' enumeration watch: ", + type[i]); ++ local_err = NULL; + } + + g_free(node); +-- +2.41.0.windows.1 + diff --git a/hw-xhci-check-return-value-of-usb_packet_map.patch b/hw-xhci-check-return-value-of-usb_packet_map.patch deleted file mode 100644 index fd81478de3a588852232349f483bbf16dd403034..0000000000000000000000000000000000000000 --- a/hw-xhci-check-return-value-of-usb_packet_map.patch +++ /dev/null @@ -1,31 +0,0 @@ -From e43f0019b0aff881c562c8d2428bce6b3d55845c Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Fri, 18 Sep 2020 11:08:28 +0800 -Subject: [PATCH] hw: xhci: check return value of 'usb_packet_map' - -Currently we don't check the return value of 'usb_packet_map', -this will cause an NAF issue. This is LP#1891341. -Following is the reproducer provided in: --->https://bugs.launchpad.net/qemu/+bug/1891341 - -This patch fixes this. - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index a21485fe..3b25abca 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -1614,7 +1614,10 @@ static int xhci_setup_packet(XHCITransfer *xfer) - xhci_xfer_create_sgl(xfer, dir == USB_TOKEN_IN); /* Also sets int_req */ - usb_packet_setup(&xfer->packet, dir, ep, xfer->streamid, - xfer->trbs[0].addr, false, xfer->int_req); -- usb_packet_map(&xfer->packet, &xfer->sgl); -+ if (usb_packet_map(&xfer->packet, &xfer->sgl)) { -+ qemu_sglist_destroy(&xfer->sgl); -+ return -1; -+ } - DPRINTF("xhci: setup packet pid 0x%x addr %d ep %d\n", - xfer->packet.pid, ep->dev->addr, ep->nr); - return 0; --- -2.23.0 - diff --git a/i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch b/i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch deleted file mode 100644 index dfa4a7064590348b9353dee3515e9682d926cd3d..0000000000000000000000000000000000000000 --- a/i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch +++ /dev/null @@ -1,44 +0,0 @@ -From a6206163d42156cb9de290f914c6883c77b012b9 Mon Sep 17 00:00:00 2001 -From: Sebastian Andrzej Siewior -Date: Wed, 25 Sep 2019 23:49:48 +0200 -Subject: [PATCH] i386: Add CPUID bit for CLZERO and XSAVEERPTR - -The CPUID bits CLZERO and XSAVEERPTR are availble on AMD's ZEN platform -and could be passed to the guest. - -Signed-off-by: Sebastian Andrzej Siewior -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 2 ++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index f09612f9da..e65f372f25 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1134,7 +1134,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - [FEAT_8000_0008_EBX] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { -- NULL, NULL, NULL, NULL, -+ "clzero", NULL, "xsaveerptr", NULL, - NULL, NULL, NULL, NULL, - NULL, "wbnoinvd", NULL, NULL, - "ibpb", NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 7ff8ddd464..24d489db0f 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -696,6 +696,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; - - #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) /* AVX512 BFloat16 Instruction */ - -+#define CPUID_8000_0008_EBX_CLZERO (1U << 0) /* CLZERO instruction */ -+#define CPUID_8000_0008_EBX_XSAVEERPTR (1U << 2) /* Always save/restore FP error pointers */ - #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) /* Write back and - do not invalidate cache */ - #define CPUID_8000_0008_EBX_IBPB (1U << 12) /* Indirect Branch Prediction Barrier */ --- -2.27.0 - diff --git a/i386-Add-MSR-feature-bit-for-MDS-NO.patch b/i386-Add-MSR-feature-bit-for-MDS-NO.patch deleted file mode 100644 index 9a24836e3bf8e9084dea784deec9f75b50f96633..0000000000000000000000000000000000000000 --- a/i386-Add-MSR-feature-bit-for-MDS-NO.patch +++ /dev/null @@ -1,34 +0,0 @@ -From aaa6c86f46232c68f6846b2da859e4e0b8198664 Mon Sep 17 00:00:00 2001 -From: Cathy Zhang -Date: Tue, 22 Oct 2019 15:35:26 +0800 -Subject: [PATCH] i386: Add MSR feature bit for MDS-NO - -Define MSR_ARCH_CAP_MDS_NO in the IA32_ARCH_CAPABILITIES MSR to allow -CPU models to report the feature when host supports it. - -Signed-off-by: Cathy Zhang -Reviewed-by: Xiaoyao Li -Reviewed-by: Tao Xu -Message-Id: <1571729728-23284-2-git-send-email-cathy.zhang@intel.com> -Signed-off-by: Eduardo Habkost - -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 488b4dc778..9ef868eb71 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -747,6 +747,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; - #define MSR_ARCH_CAP_RSBA (1U << 2) - #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) - #define MSR_ARCH_CAP_SSB_NO (1U << 4) -+#define MSR_ARCH_CAP_MDS_NO (1U << 5) - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - --- -2.27.0 - diff --git a/i386-Add-macro-for-stibp.patch b/i386-Add-macro-for-stibp.patch deleted file mode 100644 index bf53f56757197ffdc94f388b01800a82d32aed4a..0000000000000000000000000000000000000000 --- a/i386-Add-macro-for-stibp.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 67f68f735af6b1ba829689af2e021bba97e7132a Mon Sep 17 00:00:00 2001 -From: Cathy Zhang -Date: Tue, 22 Oct 2019 15:35:27 +0800 -Subject: [PATCH] i386: Add macro for stibp - -stibp feature is already added through the following commit. -https://github.com/qemu/qemu/commit/0e8916582991b9fd0b94850a8444b8b80d0a0955 - -Add a macro for it to allow CPU models to report it when host supports. - -Signed-off-by: Cathy Zhang -Reviewed-by: Xiaoyao Li -Reviewed-by: Tao Xu -Message-Id: <1571729728-23284-3-git-send-email-cathy.zhang@intel.com> -Signed-off-by: Eduardo Habkost - -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 9ef868eb71..58d8c48964 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -689,6 +689,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ - #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */ -+#define CPUID_7_0_EDX_STIBP (1U << 27) /* Single Thread Indirect Branch Predictors */ - #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) /*Arch Capabilities*/ - #define CPUID_7_0_EDX_CORE_CAPABILITY (1U << 30) /*Core Capability*/ - #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ --- -2.27.0 - diff --git a/i386-Add-new-CPU-model-Cooperlake.patch b/i386-Add-new-CPU-model-Cooperlake.patch deleted file mode 100644 index 60d249fdcd743a6c57c3d1c7051de82c0e1fefe4..0000000000000000000000000000000000000000 --- a/i386-Add-new-CPU-model-Cooperlake.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 8e9eb2f71396e3293d9ba9b1cfaf5f1487f1d475 Mon Sep 17 00:00:00 2001 -From: Cathy Zhang -Date: Tue, 22 Oct 2019 15:35:28 +0800 -Subject: [PATCH] i386: Add new CPU model Cooperlake - -Cooper Lake is intel's successor to Cascade Lake, the new -CPU model inherits features from Cascadelake-Server, while -add one platform associated new feature: AVX512_BF16. Meanwhile, -add STIBP for speculative execution. - -Signed-off-by: Cathy Zhang -Reviewed-by: Xiaoyao Li -Reviewed-by: Tao Xu -Message-Id: <1571729728-23284-4-git-send-email-cathy.zhang@intel.com> -Reviewed-by: Bruce Rogers -Signed-off-by: Eduardo Habkost - -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 60 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1ade90c28b..5329d73316 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2378,6 +2378,66 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .name = "Cooperlake", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_INTEL, -+ .family = 6, -+ .model = 85, -+ .stepping = 10, -+ .features[FEAT_1_EDX] = -+ CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | -+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | -+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | -+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | -+ CPUID_DE | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | -+ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | -+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | -+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | -+ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | -+ CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | -+ CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | -+ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | -+ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | -+ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | -+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | -+ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_PKU | -+ CPUID_7_0_ECX_AVX512VNNI, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_STIBP | -+ CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, -+ .features[FEAT_ARCH_CAPABILITIES] = -+ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | -+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, -+ .features[FEAT_7_1_EAX] = -+ CPUID_7_1_EAX_AVX512_BF16, -+ /* -+ * Missing: XSAVES (not supported by some Linux versions, -+ * including v4.1 to v4.12). -+ * KVM doesn't yet expose any XSAVES state save component, -+ * and the only one defined in Skylake (processor tracing) -+ * probably will block migration anyway. -+ */ -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .xlevel = 0x80000008, -+ .model_id = "Intel Xeon Processor (Cooperlake)", -+ }, - { - .name = "Icelake-Client", - .level = 0xd, --- -2.27.0 - diff --git a/i386-Resolve-CPU-models-to-v1-by-default.patch b/i386-Resolve-CPU-models-to-v1-by-default.patch deleted file mode 100644 index f8c6315866df7028a1135869a7aa44234eb07b44..0000000000000000000000000000000000000000 --- a/i386-Resolve-CPU-models-to-v1-by-default.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 6a5e994c1dec959143f6d3f83169a7adcb173fc4 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Thu, 5 Dec 2019 19:33:39 -0300 -Subject: [PATCH] i386: Resolve CPU models to v1 by default - -When using `query-cpu-definitions` using `-machine none`, -QEMU is resolving all CPU models to their latest versions. The -actual CPU model version being used by another machine type (e.g. -`pc-q35-4.0`) might be different. - -In theory, this was OK because the correct CPU model -version is returned when using the correct `-machine` argument. - -Except that in practice, this breaks libvirt expectations: -libvirt always use `-machine none` when checking if a CPU model -is runnable, because runnability is not expected to be affected -when the machine type is changed. - -For example, when running on a Haswell host without TSX, -Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, -`query-cpu-definitions` says Haswell is runnable if using -`-machine none`, but Haswell is actually not runnable using any -of the `pc-*` machine types (because they resolve Haswell to -Haswell-v1). In other words, we're breaking the "runnability -guarantee" we promised to not break for a few releases (see -qemu-deprecated.texi). - -To address this issue, change the default CPU model version to v1 -on all machine types, so we make `query-cpu-definitions` output -when using `-machine none` match the results when using `pc-*`. -This will change in the future (the plan is to always return the -latest CPU model version if using `-machine none`), but only -after giving libvirt the opportunity to adapt. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 -Signed-off-by: Eduardo Habkost -Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> -Signed-off-by: Eduardo Habkost ---- - qemu-deprecated.texi | 8 ++++++++ - target/i386/cpu.c | 8 +++++++- - 2 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi -index fff07bb2a3..719ac23d72 100644 ---- a/qemu-deprecated.texi -+++ b/qemu-deprecated.texi -@@ -331,3 +331,11 @@ existing CPU models. Management software that needs runnability - guarantees must resolve the CPU model aliases using te - ``alias-of'' field returned by the ``query-cpu-definitions'' QMP - command. -+ -+While those guarantees are kept, the return value of -+``query-cpu-definitions'' will have existing CPU model aliases -+point to a version that doesn't break runnability guarantees -+(specifically, version 1 of those CPU models). In future QEMU -+versions, aliases will point to newer CPU model versions -+depending on the machine type, so management software must -+resolve CPU model aliases before starting a virtual machine. -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index e0f3a2dd99..22e0e89718 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3933,7 +3933,13 @@ static PropValue tcg_default_props[] = { - }; - - --X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; -+/* -+ * We resolve CPU model aliases using -v1 when using "-machine -+ * none", but this is just for compatibility while libvirt isn't -+ * adapted to resolve CPU model versions before creating VMs. -+ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. -+ */ -+X86CPUVersion default_cpu_version = 1; - - void x86_cpu_set_default_version(X86CPUVersion version) - { --- -2.27.0 - diff --git a/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch b/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3a119f754fc931c7f1b0fe716e91fe81cf5b3c6 --- /dev/null +++ b/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch @@ -0,0 +1,65 @@ +From ff43e9201aba8f4047e6fd5edb93a4861cc8fed2 Mon Sep 17 00:00:00 2001 +From: Yanan Wang +Date: Thu, 28 Mar 2024 18:57:56 +0800 +Subject: [PATCH] i386: cache passthrough: Update AMD 8000_001D.EAX[25:14] + based on vCPU topo + +On AMD target, when host cache passthrough is disabled we will +emulate the guest caches with default values and initialize the +shared cpu list of the caches based on vCPU topology. However +when host cache passthrough is enabled, the shared cpu list is +consistent with host regardless what the vCPU topology is. + +For example, when cache passthrough is enabled, running a guest +with vThreads=1 on a host with pThreads=2, we will get that there +are every *two* logical vCPUs sharing a L1/L2 cache, which is not +consistent with the vCPU topology (vThreads=1). + +So let's reinitialize BITs[25:14] of AMD CPUID 8000_001D.EAX +based on the actual vCPU topology instead of host pCPU topology. + +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang +--- + target/i386/cpu.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f94405c02b..491cf40cc7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6597,9 +6597,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + break; + case 0x8000001D: ++ /* Populate AMD Processor Cache Information */ + *eax = 0; + if (cpu->cache_info_passthrough) { + x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); ++ ++ /* ++ * Clear BITs[25:14] and then update them based on the guest ++ * vCPU topology, like what we do in encode_cache_cpuid8000001d ++ * when cache_info_passthrough is not enabled. ++ */ ++ *eax &= ~0x03FFC000; ++ switch (count) { ++ case 0: /* L1 dcache info */ ++ case 1: /* L1 icache info */ ++ case 2: /* L2 cache info */ ++ *eax |= ((topo_info.threads_per_core - 1) << 14); ++ break; ++ case 3: /* L3 cache info */ ++ *eax |= ((topo_info.cores_per_die * ++ topo_info.threads_per_core - 1) << 14); ++ break; ++ default: /* end of info */ ++ *eax = *ebx = *ecx = *edx = 0; ++ break; ++ } + break; + } + switch (count) { +-- +2.27.0 + diff --git a/i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch b/i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch new file mode 100644 index 0000000000000000000000000000000000000000..8549070247aefc0f6091388411438386d580b2ec --- /dev/null +++ b/i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch @@ -0,0 +1,38 @@ +From c952c9acfab98a83122b4e6d406f4a7a0dfe871f Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Mon, 15 Jan 2024 04:13:24 -0500 +Subject: [PATCH] i386/cpu: Clear FEAT_XSAVE_XSS_LO/HI leafs when + CPUID_EXT_XSAVE is not available + +commit 81f5cad3858f27623b1b14467926032d229b76cc upstream. + +Leaf FEAT_XSAVE_XSS_LO and FEAT_XSAVE_XSS_HI also need to be cleared +when CPUID_EXT_XSAVE is not set. + +Fixes: 301e90675c3f ("target/i386: Enable support for XSAVES based features") +Signed-off-by: Xiaoyao Li +Reviewed-by: Yang Weijiang +Message-ID: <20240115091325.1904229-2-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cd16cb893d..8b9ef218d3 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6927,6 +6927,8 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + env->features[FEAT_XSAVE_XCR0_LO] = 0; + env->features[FEAT_XSAVE_XCR0_HI] = 0; ++ env->features[FEAT_XSAVE_XSS_LO] = 0; ++ env->features[FEAT_XSAVE_XSS_HI] = 0; + return; + } + +-- +2.27.0 + diff --git a/i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch b/i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch deleted file mode 100644 index 3e42e71e5d9e2d038d4540fa0b57b87adcbf92d7..0000000000000000000000000000000000000000 --- a/i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch +++ /dev/null @@ -1,33 +0,0 @@ -From e6f3e08acd55d13cbb154ff8abb1b3c2ed658285 Mon Sep 17 00:00:00 2001 -From: Xiaoyao Li -Date: Tue, 14 Jul 2020 01:44:36 +0800 -Subject: [PATCH] i386/cpu: Don't add unavailable_features to - env->user_features - -Features unavailable due to absent of their dependent features should -not be added to env->user_features. env->user_features only contains the -feature explicity specified with -feature/+feature by user. - -Fixes: 99e24dbdaa68 ("target/i386: introduce generic feature dependency mechanism") -Signed-off-by: Xiaoyao Li -Message-Id: <20200713174436.41070-3-xiaoyao.li@intel.com> -Signed-off-by: Eduardo Habkost ---- - target/i386/cpu.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6f27a5170a..e0f3a2dd99 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6173,7 +6173,6 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - unavailable_features & env->user_features[d->to.index], - "This feature depends on other features that were not requested"); - -- env->user_features[d->to.index] |= unavailable_features; - env->features[d->to.index] &= ~unavailable_features; - } - } --- -2.27.0 - diff --git a/i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch b/i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch new file mode 100644 index 0000000000000000000000000000000000000000..612d46547a6e221c90f8f826f78226593f529373 --- /dev/null +++ b/i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch @@ -0,0 +1,42 @@ +From 26ddb3428182503b28ac87cad7543eb241a9d353 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Mon, 15 Jan 2024 04:13:25 -0500 +Subject: [PATCH] i386/cpu: Mask with XCR0/XSS mask for FEAT_XSAVE_XCR0_HI and + FEAT_XSAVE_XSS_HI leafs + +commit a11a365159b944e05be76f3ec3b98c8b38cb70fd upstream. + +The value of FEAT_XSAVE_XCR0_HI leaf and FEAT_XSAVE_XSS_HI leaf also +need to be masked by XCR0 and XSS mask respectively, to make it +logically correct. + +Fixes: 301e90675c3f ("target/i386: Enable support for XSAVES based features") +Signed-off-by: Xiaoyao Li +Reviewed-by: Yang Weijiang +Message-ID: <20240115091325.1904229-3-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 8b9ef218d3..a66e5a357b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6947,9 +6947,9 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + } + + env->features[FEAT_XSAVE_XCR0_LO] = mask & CPUID_XSTATE_XCR0_MASK; +- env->features[FEAT_XSAVE_XCR0_HI] = mask >> 32; ++ env->features[FEAT_XSAVE_XCR0_HI] = (mask & CPUID_XSTATE_XCR0_MASK) >> 32; + env->features[FEAT_XSAVE_XSS_LO] = mask & CPUID_XSTATE_XSS_MASK; +- env->features[FEAT_XSAVE_XSS_HI] = mask >> 32; ++ env->features[FEAT_XSAVE_XSS_HI] = (mask & CPUID_XSTATE_XSS_MASK) >> 32; + } + + /***** Steps involved on loading and filtering CPUID data +-- +2.27.0 + diff --git a/i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch b/i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch new file mode 100644 index 0000000000000000000000000000000000000000..37ee5b31f618e36974d8a9a7efd8e0d928eada52 --- /dev/null +++ b/i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch @@ -0,0 +1,38 @@ +From 576170252c3cbd79ed918f688d088f1ccd15602a Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 24 Jan 2024 21:40:14 -0500 +Subject: [PATCH] i386/cpuid: Decrease cpuid_i when skipping CPUID leaf 1F + +commit 10f92799af8ba3c3cef2352adcd4780f13fbab31 upstream. + +Existing code misses a decrement of cpuid_i when skip leaf 0x1F. +There's a blank CPUID entry(with leaf, subleaf as 0, and all fields +stuffed 0s) left in the CPUID array. + +It conflicts with correct CPUID leaf 0. + +Signed-off-by: Xiaoyao Li +Reviewed-by:Yang Weijiang +Message-ID: <20240125024016.2521244-2-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 4ce80555b4..e68eb8f5e6 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1914,6 +1914,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + case 0x1f: + if (env->nr_dies < 2) { ++ cpuid_i--; + break; + } + /* fallthrough */ +-- +2.27.0 + diff --git a/i386-cpuid-Move-leaf-7-to-correct-group.patch b/i386-cpuid-Move-leaf-7-to-correct-group.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd217f55e0aae3a2a205dd6523a336e7b70cf453 --- /dev/null +++ b/i386-cpuid-Move-leaf-7-to-correct-group.patch @@ -0,0 +1,50 @@ +From bf3d3ecf9ff5808d1f03e83a363c8295f7abad76 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 24 Jan 2024 21:40:16 -0500 +Subject: [PATCH] i386/cpuid: Move leaf 7 to correct group + +commit 0729857c707535847d7fe31d3d91eb8b2a118e3c upstream. + +CPUID leaf 7 was grouped together with SGX leaf 0x12 by commit +b9edbadefb9e ("i386: Propagate SGX CPUID sub-leafs to KVM") by mistake. + +SGX leaf 0x12 has its specific logic to check if subleaf (starting from 2) +is valid or not by checking the bit 0:3 of corresponding EAX is 1 or +not. + +Leaf 7 follows the logic that EAX of subleaf 0 enumerates the maximum +valid subleaf. + +Fixes: b9edbadefb9e ("i386: Propagate SGX CPUID sub-leafs to KVM") +Signed-off-by: Xiaoyao Li +Message-ID: <20240125024016.2521244-4-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index e68eb8f5e6..a0bc9ea7b1 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1955,7 +1955,6 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; +- case 0x7: + case 0x12: + for (j = 0; ; j++) { + c->function = i; +@@ -1975,6 +1974,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; ++ case 0x7: + case 0x14: + case 0x1d: + case 0x1e: { +-- +2.27.0 + diff --git a/i386-cpuid-Remove-subleaf-constraint-on-CPUID-leaf-1.patch b/i386-cpuid-Remove-subleaf-constraint-on-CPUID-leaf-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..2443f3bcf5e64e15ba083f5ad005de61cfae58a9 --- /dev/null +++ b/i386-cpuid-Remove-subleaf-constraint-on-CPUID-leaf-1.patch @@ -0,0 +1,38 @@ +From 0d5ac4f36208eadbb922f552ba1b762f5bd0c3a6 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 24 Jan 2024 21:40:15 -0500 +Subject: [PATCH] i386/cpuid: Remove subleaf constraint on CPUID leaf 1F + +commit a3b5376521a0de898440e8d0942b54e628f0949f upstream. + +No such constraint that subleaf index needs to be less than 64. + +Intel-SIG: commit a3b5376521a0 i386/cpuid: Remove subleaf constraint on CPUID leaf 1F + +Signed-off-by: Xiaoyao Li +Reviewed-by:Yang Weijiang +Message-ID: <20240125024016.2521244-3-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index ce96ed9158..850104f6b5 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1928,10 +1928,6 @@ int kvm_arch_init_vcpu(CPUState *cs) + break; + } + +- if (i == 0x1f && j == 64) { +- break; +- } +- + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; +-- +2.41.0.windows.1 + diff --git a/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch b/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch new file mode 100644 index 0000000000000000000000000000000000000000..133577de23a77bfa7915097451864fb1ef639af6 --- /dev/null +++ b/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch @@ -0,0 +1,42 @@ +From 06fc5eb48668a1c83e6a4e76c1a71403917b1835 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 20:33:47 +0800 +Subject: [PATCH] i6300esb watchdog: bugfix: Add a runstate transition + +QEMU will abort() for the reasons now: + + invalid runstate transition: 'prelaunch' -> 'postmigrate' + Aborted + +This happens when: + |<- watchdog timeout happened, then sets reset_requested to + | SHUTDOWN_CAUSE_GUEST_RESET; + |<- hot-migration thread sets vm state to RUN_STATE_FINISH_MIGRATE + | before the last time of migration; + |<- main thread gets the change of reset_requested and triggers + | reset, then sets vm state to RUN_STATE_PRELAUNCH; + |<- hot-migration thread sets vm state to RUN_STATE_POSTMIGRATE. + +Then 'prelaunch' -> 'postmigrate' runstate transition will happen. +It is legal so add this transition to runstate_transitions_def. + +Signed-off-by: Jinhua Cao +--- + system/runstate.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/system/runstate.c b/system/runstate.c +index ea9d6c2a32..9d3f627fee 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -116,6 +116,7 @@ static const RunStateTransition runstate_transitions_def[] = { + { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, + { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, ++ { RUN_STATE_PRELAUNCH, RUN_STATE_POSTMIGRATE }, + + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, +-- +2.27.0 + diff --git a/ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch b/ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch deleted file mode 100644 index 9570b46b755e06705212253195a6605d738db350..0000000000000000000000000000000000000000 --- a/ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch +++ /dev/null @@ -1,89 +0,0 @@ -From ed78352a59ea7acf7520d4d47a96b9911bae7fc3 Mon Sep 17 00:00:00 2001 -From: Alexander Popov -Date: Mon, 23 Dec 2019 20:51:16 +0300 -Subject: [PATCH] ide: Fix incorrect handling of some PRDTs in ide_dma_cb() - -The commit a718978ed58a from July 2015 introduced the assertion which -implies that the size of successful DMA transfers handled in ide_dma_cb() -should be multiple of 512 (the size of a sector). But guest systems can -initiate DMA transfers that don't fit this requirement. - -For fixing that let's check the number of bytes prepared for the transfer -by the prepare_buf() handler. The code in ide_dma_cb() must behave -according to the Programming Interface for Bus Master IDE Controller -(Revision 1.0 5/16/94): -1. If PRDs specified a smaller size than the IDE transfer - size, then the Interrupt and Active bits in the Controller - status register are not set (Error Condition). -2. If the size of the physical memory regions was equal to - the IDE device transfer size, the Interrupt bit in the - Controller status register is set to 1, Active bit is set to 0. -3. If PRDs specified a larger size than the IDE transfer size, - the Interrupt and Active bits in the Controller status register - are both set to 1. - -Signed-off-by: Alexander Popov -Reviewed-by: Kevin Wolf -Message-id: 20191223175117.508990-2-alex.popov@linux.com -Signed-off-by: John Snow - -diff --git a/hw/ide/core.c b/hw/ide/core.c -index 754ff4dc34..80000eb766 100644 ---- a/hw/ide/core.c -+++ b/hw/ide/core.c -@@ -849,6 +849,7 @@ static void ide_dma_cb(void *opaque, int ret) - int64_t sector_num; - uint64_t offset; - bool stay_active = false; -+ int32_t prep_size = 0; - - if (ret == -EINVAL) { - ide_dma_error(s); -@@ -863,13 +864,15 @@ static void ide_dma_cb(void *opaque, int ret) - } - } - -- n = s->io_buffer_size >> 9; -- if (n > s->nsector) { -- /* The PRDs were longer than needed for this request. Shorten them so -- * we don't get a negative remainder. The Active bit must remain set -- * after the request completes. */ -+ if (s->io_buffer_size > s->nsector * 512) { -+ /* -+ * The PRDs were longer than needed for this request. -+ * The Active bit must remain set after the request completes. -+ */ - n = s->nsector; - stay_active = true; -+ } else { -+ n = s->io_buffer_size >> 9; - } - - sector_num = ide_get_sector(s); -@@ -892,9 +895,20 @@ static void ide_dma_cb(void *opaque, int ret) - n = s->nsector; - s->io_buffer_index = 0; - s->io_buffer_size = n * 512; -- if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size) < 512) { -- /* The PRDs were too short. Reset the Active bit, but don't raise an -- * interrupt. */ -+ prep_size = s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size); -+ /* prepare_buf() must succeed and respect the limit */ -+ assert(prep_size >= 0 && prep_size <= n * 512); -+ -+ /* -+ * Now prep_size stores the number of bytes in the sglist, and -+ * s->io_buffer_size stores the number of bytes described by the PRDs. -+ */ -+ -+ if (prep_size < n * 512) { -+ /* -+ * The PRDs are too short for this request. Error condition! -+ * Reset the Active bit and don't raise the interrupt. -+ */ - s->status = READY_STAT | SEEK_STAT; - dma_buf_commit(s, 0); - goto eot; --- -2.23.0 - diff --git a/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch index 97824d4d27236961e5a9c3e3f4242e73d1334d44..5aaf01663e077c1336c79a115a433a15c53002e8 100644 --- a/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch +++ b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch @@ -1,4 +1,4 @@ -From c7fd5f3841f14c24e442fb6968c9f2d9e016f28a Mon Sep 17 00:00:00 2001 +From 6689eebbb520dc75bc65e0914c4e05e40a4efc1d Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Mon, 21 Jun 2021 09:22:35 +0800 Subject: [PATCH] ide: ahci: add check to avoid null dereference @@ -14,15 +14,17 @@ Reported-by: Bugs SysSec Signed-off-by: Prasad J Pandit Signed-off-by: Jiajie Li +Signed-off-by: Yan Wang +Signed-off-by: Adttil --- hw/ide/ahci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c -index 6aaf66534a..a7be0ae4fe 100644 +index afdc44b8e0..8062e1743c 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c -@@ -1455,8 +1455,10 @@ static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes) +@@ -1519,8 +1519,10 @@ static void ahci_commit_buf(const IDEDMA *dma, uint32_t tx_bytes) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -34,7 +36,7 @@ index 6aaf66534a..a7be0ae4fe 100644 + } } - static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) + static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write) -- 2.27.0 diff --git a/ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch b/ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch deleted file mode 100644 index da58bb9cc28d6e193d7b55ba530768d69a04324a..0000000000000000000000000000000000000000 --- a/ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 5209fbd340efe3fa7f8ea82f671db2fa04dda19b Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 23 Feb 2021 15:20:03 +0800 -Subject: [PATCH] ide:atapi: check io_buffer_index in ide_atapi_cmd_reply_end - -Fix CVE-2020-29443 - -During data transfer via packet command in 'ide_atapi_cmd_reply_end' -'s->io_buffer_index' could exceed the 's->io_buffer' length, leading -to OOB access issue. Add check to avoid it. - ... - #9 ahci_pio_transfer ../hw/ide/ahci.c:1383 - #10 ide_transfer_start_norecurse ../hw/ide/core.c:553 - #11 ide_atapi_cmd_reply_end ../hw/ide/atapi.c:284 - #12 ide_atapi_cmd_read_pio ../hw/ide/atapi.c:329 - #13 ide_atapi_cmd_read ../hw/ide/atapi.c:442 - #14 cmd_read ../hw/ide/atapi.c:988 - #15 ide_atapi_cmd ../hw/ide/atapi.c:1352 - #16 ide_transfer_start ../hw/ide/core.c:561 - #17 cmd_packet ../hw/ide/core.c:1729 - #18 ide_exec_cmd ../hw/ide/core.c:2107 - #19 handle_reg_h2d_fis ../hw/ide/ahci.c:1267 - #20 handle_cmd ../hw/ide/ahci.c:1318 - #21 check_cmd ../hw/ide/ahci.c:592 - #22 ahci_port_write ../hw/ide/ahci.c:373 - #23 ahci_mem_write ../hw/ide/ahci.c:513 - -Reported-by: Wenxiang Qian -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/ide/atapi.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c -index 1b0f66cc08..fc9dc87f03 100644 ---- a/hw/ide/atapi.c -+++ b/hw/ide/atapi.c -@@ -300,6 +300,9 @@ void ide_atapi_cmd_reply_end(IDEState *s) - s->packet_transfer_size -= size; - s->elementary_transfer_size -= size; - s->io_buffer_index += size; -+ if (s->io_buffer_index > s->io_buffer_total_len) { -+ return; -+ } - - /* Some adapters process PIO data right away. In that case, we need - * to avoid mutual recursion between ide_transfer_start --- -2.27.0 - diff --git a/ide-fix-leak-from-qemu_allocate_irqs.patch b/ide-fix-leak-from-qemu_allocate_irqs.patch deleted file mode 100644 index dce6e906ce92e7e303d2198d7a612905ca0632c1..0000000000000000000000000000000000000000 --- a/ide-fix-leak-from-qemu_allocate_irqs.patch +++ /dev/null @@ -1,28 +0,0 @@ -From df35f8fe2687df32cb65f6a03b8dd80314cc4c53 Mon Sep 17 00:00:00 2001 -From: lizhengui -Date: Wed, 9 Sep 2020 15:00:08 +0800 -Subject: [PATCH] ide: fix leak from qemu_allocate_irqs - -The array returned by qemu_allocate_irqs is malloced, free it. - -Signed-off-by: Paolo Bonzini -Reviewed-by: Thomas Huth ---- - hw/ide/cmd646.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c -index ed23aabf..a149cd6c 100644 ---- a/hw/ide/cmd646.c -+++ b/hw/ide/cmd646.c -@@ -299,6 +299,7 @@ static void pci_cmd646_ide_realize(PCIDevice *dev, Error **errp) - d->bmdma[i].bus = &d->bus[i]; - ide_register_restart_cb(&d->bus[i]); - } -+ g_free(irq); - - vmstate_register(DEVICE(dev), 0, &vmstate_ide_pci, d); - qemu_register_reset(cmd646_reset, d); --- -2.19.1 - diff --git a/imx7-ccm-add-digprog-mmio-write-method.patch b/imx7-ccm-add-digprog-mmio-write-method.patch deleted file mode 100644 index b68bf028b9b3e9bc2fe2d1838f6d3cf64dce7866..0000000000000000000000000000000000000000 --- a/imx7-ccm-add-digprog-mmio-write-method.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 5979338f8fb4562f7af32c58b7e7542d7396954e Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:29:28 +0800 -Subject: [PATCH] imx7-ccm: add digprog mmio write method - -fix CVE-2020-15469 - -Add digprog mmio write method to avoid assert failure during -initialisation. - -Reviewed-by: Li Qiang -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/misc/imx7_ccm.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/misc/imx7_ccm.c b/hw/misc/imx7_ccm.c -index d9bdcf1027..831311a7c8 100644 ---- a/hw/misc/imx7_ccm.c -+++ b/hw/misc/imx7_ccm.c -@@ -130,8 +130,15 @@ static const struct MemoryRegionOps imx7_set_clr_tog_ops = { - }, - }; - -+static void imx7_digprog_write(void *opaque, hwaddr addr, -+ uint64_t data, unsigned size) -+{ -+ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); -+} -+ - static const struct MemoryRegionOps imx7_digprog_ops = { - .read = imx7_set_clr_tog_read, -+ .write = imx7_digprog_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .impl = { - .min_access_size = 4, --- -2.27.0 - diff --git a/include-Make-headers-more-self-contained.patch b/include-Make-headers-more-self-contained.patch deleted file mode 100644 index 565471c8ce67ec70b0bb5691f66cba384b8a1202..0000000000000000000000000000000000000000 --- a/include-Make-headers-more-self-contained.patch +++ /dev/null @@ -1,1551 +0,0 @@ -From 1b6a1ef572411efee7cbf1b65aeb15c704b997cc Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Mon, 12 Aug 2019 07:23:31 +0200 -Subject: [PATCH] include: Make headers more self-contained -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Back in 2016, we discussed[1] rules for headers, and these were -generally liked: - -1. Have a carefully curated header that's included everywhere first. We - got that already thanks to Peter: osdep.h. - -2. Headers should normally include everything they need beyond osdep.h. - If exceptions are needed for some reason, they must be documented in - the header. If all that's needed from a header is typedefs, put - those into qemu/typedefs.h instead of including the header. - -3. Cyclic inclusion is forbidden. - -This patch gets include/ closer to obeying 2. - -It's actually extracted from my "[RFC] Baby steps towards saner -headers" series[2], which demonstrates a possible path towards -checking 2 automatically. It passes the RFC test there. - -[1] Message-ID: <87h9g8j57d.fsf@blackfin.pond.sub.org> - https://lists.nongnu.org/archive/html/qemu-devel/2016-03/msg03345.html -[2] Message-Id: <20190711122827.18970-1-armbru@redhat.com> - https://lists.nongnu.org/archive/html/qemu-devel/2019-07/msg02715.html - -Signed-off-by: Markus Armbruster -Reviewed-by: Alistair Francis -Message-Id: <20190812052359.30071-2-armbru@redhat.com> -Tested-by: Philippe Mathieu-Daudé ---- - include/block/raw-aio.h | 2 ++ - include/block/write-threshold.h | 2 ++ - include/disas/disas.h | 1 + - include/exec/cputlb.h | 3 +++ - include/exec/exec-all.h | 1 + - include/exec/ioport.h | 2 ++ - include/exec/memory-internal.h | 2 ++ - include/exec/ram_addr.h | 1 + - include/exec/softmmu-semi.h | 2 ++ - include/exec/tb-hash.h | 2 ++ - include/exec/user/thunk.h | 2 ++ - include/fpu/softfloat-macros.h | 2 ++ - include/hw/acpi/pci.h | 3 +++ - include/hw/acpi/tco.h | 3 +++ - include/hw/adc/stm32f2xx_adc.h | 2 ++ - include/hw/arm/allwinner-a10.h | 1 + - include/hw/arm/aspeed_soc.h | 1 + - include/hw/arm/bcm2836.h | 1 + - include/hw/arm/exynos4210.h | 3 +-- - include/hw/arm/fsl-imx25.h | 1 + - include/hw/arm/fsl-imx31.h | 1 + - include/hw/arm/sharpsl.h | 3 +++ - include/hw/arm/xlnx-zynqmp.h | 1 + - include/hw/block/fdc.h | 2 ++ - include/hw/block/flash.h | 1 + - include/hw/char/escc.h | 1 + - include/hw/char/xilinx_uartlite.h | 2 ++ - include/hw/core/generic-loader.h | 1 + - include/hw/cris/etraxfs.h | 1 + - include/hw/cris/etraxfs_dma.h | 3 +++ - include/hw/display/i2c-ddc.h | 1 + - include/hw/empty_slot.h | 2 ++ - include/hw/gpio/bcm2835_gpio.h | 1 + - include/hw/i2c/aspeed_i2c.h | 2 ++ - include/hw/i386/apic_internal.h | 1 + - include/hw/i386/ioapic_internal.h | 1 + - include/hw/intc/allwinner-a10-pic.h | 2 ++ - include/hw/intc/heathrow_pic.h | 2 ++ - include/hw/intc/mips_gic.h | 1 + - include/hw/isa/vt82c686.h | 2 ++ - include/hw/mips/cps.h | 1 + - include/hw/misc/macio/cuda.h | 2 ++ - include/hw/misc/macio/gpio.h | 3 +++ - include/hw/misc/macio/macio.h | 2 ++ - include/hw/misc/macio/pmu.h | 3 +++ - include/hw/misc/mips_cmgcr.h | 2 ++ - include/hw/misc/mips_cpc.h | 2 ++ - include/hw/misc/pvpanic.h | 3 +++ - include/hw/net/allwinner_emac.h | 1 + - include/hw/net/lance.h | 1 + - include/hw/nvram/chrp_nvram.h | 2 ++ - include/hw/pci-host/sabre.h | 2 ++ - include/hw/pci-host/uninorth.h | 2 +- - include/hw/pci/pcie_aer.h | 1 + - include/hw/ppc/pnv_core.h | 1 + - include/hw/ppc/ppc4xx.h | 4 ++++ - include/hw/ppc/spapr_irq.h | 3 +++ - include/hw/ppc/spapr_vio.h | 1 + - include/hw/ppc/spapr_xive.h | 2 ++ - include/hw/ppc/xive_regs.h | 3 +++ - include/hw/riscv/boot.h | 2 ++ - include/hw/riscv/riscv_hart.h | 3 +++ - include/hw/riscv/sifive_clint.h | 2 ++ - include/hw/riscv/sifive_e.h | 1 + - include/hw/riscv/sifive_plic.h | 2 +- - include/hw/riscv/sifive_prci.h | 2 ++ - include/hw/riscv/sifive_test.h | 2 ++ - include/hw/riscv/sifive_u.h | 1 + - include/hw/riscv/sifive_uart.h | 3 +++ - include/hw/riscv/spike.h | 3 +++ - include/hw/riscv/virt.h | 3 +++ - include/hw/s390x/ap-device.h | 3 +++ - include/hw/s390x/css-bridge.h | 3 ++- - include/hw/s390x/css.h | 1 + - include/hw/s390x/tod.h | 2 +- - include/hw/semihosting/console.h | 2 ++ - include/hw/sh4/sh_intc.h | 1 + - include/hw/sparc/sparc64.h | 2 ++ - include/hw/ssi/aspeed_smc.h | 1 + - include/hw/ssi/xilinx_spips.h | 1 + - include/hw/timer/allwinner-a10-pit.h | 1 + - include/hw/timer/i8254_internal.h | 1 + - include/hw/timer/m48t59.h | 2 ++ - include/hw/timer/mc146818rtc_regs.h | 2 ++ - include/hw/timer/xlnx-zynqmp-rtc.h | 1 + - include/hw/virtio/virtio-access.h | 1 + - include/hw/virtio/virtio-gpu-bswap.h | 1 + - include/hw/virtio/virtio-rng.h | 1 + - include/hw/watchdog/wdt_aspeed.h | 1 + - include/libdecnumber/decNumberLocal.h | 1 + - include/migration/cpu.h | 3 +++ - include/monitor/hmp-target.h | 2 ++ - include/qemu/atomic128.h | 2 ++ - include/qemu/ratelimit.h | 2 ++ - include/qemu/thread-win32.h | 2 +- - include/sysemu/balloon.h | 1 + - include/sysemu/cryptodev-vhost-user.h | 3 +++ - include/sysemu/hvf.h | 1 + - include/sysemu/iothread.h | 1 + - include/sysemu/kvm_int.h | 2 ++ - include/sysemu/memory_mapping.h | 2 ++ - include/sysemu/xen-mapcache.h | 2 ++ - include/ui/egl-helpers.h | 3 +++ - include/ui/input.h | 1 + - include/ui/spice-display.h | 1 + - target/hppa/cpu.h | 2 +- - 106 files changed, 183 insertions(+), 8 deletions(-) - -diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h -index 0cb7cc74a2..4629f24d08 100644 ---- a/include/block/raw-aio.h -+++ b/include/block/raw-aio.h -@@ -12,9 +12,11 @@ - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ -+ - #ifndef QEMU_RAW_AIO_H - #define QEMU_RAW_AIO_H - -+#include "block/aio.h" - #include "qemu/coroutine.h" - #include "qemu/iov.h" - -diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h -index 80d8aab5d0..c646f267a4 100644 ---- a/include/block/write-threshold.h -+++ b/include/block/write-threshold.h -@@ -9,9 +9,11 @@ - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - */ -+ - #ifndef BLOCK_WRITE_THRESHOLD_H - #define BLOCK_WRITE_THRESHOLD_H - -+#include "block/block_int.h" - - /* - * bdrv_write_threshold_set: -diff --git a/include/disas/disas.h b/include/disas/disas.h -index 15da511f49..ba47e9197c 100644 ---- a/include/disas/disas.h -+++ b/include/disas/disas.h -@@ -1,6 +1,7 @@ - #ifndef QEMU_DISAS_H - #define QEMU_DISAS_H - -+#include "exec/hwaddr.h" - - #ifdef NEED_CPU_H - #include "cpu.h" -diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h -index 5373188be3..a62cfb28d5 100644 ---- a/include/exec/cputlb.h -+++ b/include/exec/cputlb.h -@@ -16,9 +16,12 @@ - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ -+ - #ifndef CPUTLB_H - #define CPUTLB_H - -+#include "exec/cpu-common.h" -+ - #if !defined(CONFIG_USER_ONLY) - /* cputlb.c */ - void tlb_protect_code(ram_addr_t ram_addr); -diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h -index 16034ee651..135aeaab0d 100644 ---- a/include/exec/exec-all.h -+++ b/include/exec/exec-all.h -@@ -20,6 +20,7 @@ - #ifndef EXEC_ALL_H - #define EXEC_ALL_H - -+#include "cpu.h" - #include "exec/tb-context.h" - #include "sysemu/cpus.h" - -diff --git a/include/exec/ioport.h b/include/exec/ioport.h -index a298b89ce1..97feb296d2 100644 ---- a/include/exec/ioport.h -+++ b/include/exec/ioport.h -@@ -24,6 +24,8 @@ - #ifndef IOPORT_H - #define IOPORT_H - -+#include "exec/memory.h" -+ - #define MAX_IOPORTS (64 * 1024) - #define IOPORTS_MASK (MAX_IOPORTS - 1) - -diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h -index d1a9dd1ec8..ef4fb92371 100644 ---- a/include/exec/memory-internal.h -+++ b/include/exec/memory-internal.h -@@ -20,6 +20,8 @@ - #ifndef MEMORY_INTERNAL_H - #define MEMORY_INTERNAL_H - -+#include "cpu.h" -+ - #ifndef CONFIG_USER_ONLY - static inline AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv) - { -diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h -index 523440662b..27a164b669 100644 ---- a/include/exec/ram_addr.h -+++ b/include/exec/ram_addr.h -@@ -20,6 +20,7 @@ - #define RAM_ADDR_H - - #ifndef CONFIG_USER_ONLY -+#include "cpu.h" - #include "hw/xen/xen.h" - #include "sysemu/tcg.h" - #include "exec/ramlist.h" -diff --git a/include/exec/softmmu-semi.h b/include/exec/softmmu-semi.h -index 970837992e..fbcae88f4b 100644 ---- a/include/exec/softmmu-semi.h -+++ b/include/exec/softmmu-semi.h -@@ -10,6 +10,8 @@ - #ifndef SOFTMMU_SEMI_H - #define SOFTMMU_SEMI_H - -+#include "cpu.h" -+ - static inline uint64_t softmmu_tget64(CPUArchState *env, target_ulong addr) - { - uint64_t val; -diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h -index 4f3a37d927..805235d321 100644 ---- a/include/exec/tb-hash.h -+++ b/include/exec/tb-hash.h -@@ -20,6 +20,8 @@ - #ifndef EXEC_TB_HASH_H - #define EXEC_TB_HASH_H - -+#include "exec/cpu-defs.h" -+#include "exec/exec-all.h" - #include "qemu/xxhash.h" - - #ifdef CONFIG_SOFTMMU -diff --git a/include/exec/user/thunk.h b/include/exec/user/thunk.h -index 8d3af5a3be..eae2c27f99 100644 ---- a/include/exec/user/thunk.h -+++ b/include/exec/user/thunk.h -@@ -16,10 +16,12 @@ - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ -+ - #ifndef THUNK_H - #define THUNK_H - - #include "cpu.h" -+#include "exec/user/abitypes.h" - - /* types enums definitions */ - -diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h -index c55aa6d174..be83a833ec 100644 ---- a/include/fpu/softfloat-macros.h -+++ b/include/fpu/softfloat-macros.h -@@ -82,6 +82,8 @@ this code that are retained. - #ifndef FPU_SOFTFLOAT_MACROS_H - #define FPU_SOFTFLOAT_MACROS_H - -+#include "fpu/softfloat.h" -+ - /*---------------------------------------------------------------------------- - | Shifts `a' right by the number of bits given in `count'. If any nonzero - | bits are shifted off, they are ``jammed'' into the least significant bit of -diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h -index 8bbd32cf45..bf2a3ed0ba 100644 ---- a/include/hw/acpi/pci.h -+++ b/include/hw/acpi/pci.h -@@ -22,9 +22,12 @@ - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - */ -+ - #ifndef HW_ACPI_PCI_H - #define HW_ACPI_PCI_H - -+#include "hw/acpi/bios-linker-loader.h" -+ - typedef struct AcpiMcfgInfo { - uint64_t base; - uint32_t size; -diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/tco.h -index d19dd59353..726f840cce 100644 ---- a/include/hw/acpi/tco.h -+++ b/include/hw/acpi/tco.h -@@ -6,9 +6,12 @@ - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ -+ - #ifndef HW_ACPI_TCO_H - #define HW_ACPI_TCO_H - -+#include "exec/memory.h" -+#include "migration/vmstate.h" - - /* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */ - #define TCO_TICK_NSEC 600000000LL -diff --git a/include/hw/adc/stm32f2xx_adc.h b/include/hw/adc/stm32f2xx_adc.h -index a72f734eb1..663b79f4f3 100644 ---- a/include/hw/adc/stm32f2xx_adc.h -+++ b/include/hw/adc/stm32f2xx_adc.h -@@ -25,6 +25,8 @@ - #ifndef HW_STM32F2XX_ADC_H - #define HW_STM32F2XX_ADC_H - -+#include "hw/sysbus.h" -+ - #define ADC_SR 0x00 - #define ADC_CR1 0x04 - #define ADC_CR2 0x08 -diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h -index e99fe2ea2e..7182ce5c4b 100644 ---- a/include/hw/arm/allwinner-a10.h -+++ b/include/hw/arm/allwinner-a10.h -@@ -11,6 +11,7 @@ - #include "hw/ide/ahci.h" - - #include "sysemu/sysemu.h" -+#include "target/arm/cpu.h" - - - #define AW_A10_PIC_REG_BASE 0x01c20400 -diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h -index cef605ad6b..976fd6be93 100644 ---- a/include/hw/arm/aspeed_soc.h -+++ b/include/hw/arm/aspeed_soc.h -@@ -22,6 +22,7 @@ - #include "hw/ssi/aspeed_smc.h" - #include "hw/watchdog/wdt_aspeed.h" - #include "hw/net/ftgmac100.h" -+#include "target/arm/cpu.h" - - #define ASPEED_SPIS_NUM 2 - #define ASPEED_WDTS_NUM 3 -diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h -index a2cb8454de..97187f72be 100644 ---- a/include/hw/arm/bcm2836.h -+++ b/include/hw/arm/bcm2836.h -@@ -13,6 +13,7 @@ - - #include "hw/arm/bcm2835_peripherals.h" - #include "hw/intc/bcm2836_control.h" -+#include "target/arm/cpu.h" - - #define TYPE_BCM283X "bcm283x" - #define BCM283X(obj) OBJECT_CHECK(BCM283XState, (obj), TYPE_BCM283X) -diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h -index aa137271c0..f0f23b0e9b 100644 ---- a/include/hw/arm/exynos4210.h -+++ b/include/hw/arm/exynos4210.h -@@ -19,13 +19,12 @@ - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . -- * - */ - - #ifndef EXYNOS4210_H - #define EXYNOS4210_H - --#include "exec/memory.h" -+#include "hw/sysbus.h" - #include "target/arm/cpu-qom.h" - - #define EXYNOS4210_NCPUS 2 -diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h -index 3280ab1fb0..241efb52ae 100644 ---- a/include/hw/arm/fsl-imx25.h -+++ b/include/hw/arm/fsl-imx25.h -@@ -27,6 +27,7 @@ - #include "hw/i2c/imx_i2c.h" - #include "hw/gpio/imx_gpio.h" - #include "exec/memory.h" -+#include "target/arm/cpu.h" - - #define TYPE_FSL_IMX25 "fsl,imx25" - #define FSL_IMX25(obj) OBJECT_CHECK(FslIMX25State, (obj), TYPE_FSL_IMX25) -diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h -index e68a81efd7..ac5ca9826a 100644 ---- a/include/hw/arm/fsl-imx31.h -+++ b/include/hw/arm/fsl-imx31.h -@@ -26,6 +26,7 @@ - #include "hw/i2c/imx_i2c.h" - #include "hw/gpio/imx_gpio.h" - #include "exec/memory.h" -+#include "target/arm/cpu.h" - - #define TYPE_FSL_IMX31 "fsl,imx31" - #define FSL_IMX31(obj) OBJECT_CHECK(FslIMX31State, (obj), TYPE_FSL_IMX31) -diff --git a/include/hw/arm/sharpsl.h b/include/hw/arm/sharpsl.h -index 5bf6db1fa2..89e168fbff 100644 ---- a/include/hw/arm/sharpsl.h -+++ b/include/hw/arm/sharpsl.h -@@ -3,9 +3,12 @@ - * - * This file is licensed under the GNU GPL. - */ -+ - #ifndef QEMU_SHARPSL_H - #define QEMU_SHARPSL_H - -+#include "exec/hwaddr.h" -+ - #define zaurus_printf(format, ...) \ - fprintf(stderr, "%s: " format, __func__, ##__VA_ARGS__) - -diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h -index 35804ea80a..6cb65e7537 100644 ---- a/include/hw/arm/xlnx-zynqmp.h -+++ b/include/hw/arm/xlnx-zynqmp.h -@@ -32,6 +32,7 @@ - #include "hw/intc/xlnx-zynqmp-ipi.h" - #include "hw/timer/xlnx-zynqmp-rtc.h" - #include "hw/cpu/cluster.h" -+#include "target/arm/cpu.h" - - #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp" - #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \ -diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h -index 8cece84326..f4fe2f471b 100644 ---- a/include/hw/block/fdc.h -+++ b/include/hw/block/fdc.h -@@ -1,6 +1,8 @@ - #ifndef HW_FDC_H - #define HW_FDC_H - -+#include "exec/hwaddr.h" -+#include "hw/irq.h" - #include "qapi/qapi-types-block.h" - - /* fdc.c */ -diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h -index 1acaf7de80..83a75f3170 100644 ---- a/include/hw/block/flash.h -+++ b/include/hw/block/flash.h -@@ -4,6 +4,7 @@ - /* NOR flash devices */ - - #include "exec/memory.h" -+#include "migration/vmstate.h" - - /* pflash_cfi01.c */ - -diff --git a/include/hw/char/escc.h b/include/hw/char/escc.h -index 42aca83611..d5196c53e6 100644 ---- a/include/hw/char/escc.h -+++ b/include/hw/char/escc.h -@@ -3,6 +3,7 @@ - - #include "chardev/char-fe.h" - #include "chardev/char-serial.h" -+#include "hw/sysbus.h" - #include "ui/input.h" - - /* escc.c */ -diff --git a/include/hw/char/xilinx_uartlite.h b/include/hw/char/xilinx_uartlite.h -index 634086b657..99d8bbf405 100644 ---- a/include/hw/char/xilinx_uartlite.h -+++ b/include/hw/char/xilinx_uartlite.h -@@ -15,6 +15,8 @@ - #ifndef XILINX_UARTLITE_H - #define XILINX_UARTLITE_H - -+#include "hw/sysbus.h" -+ - static inline DeviceState *xilinx_uartlite_create(hwaddr addr, - qemu_irq irq, - Chardev *chr) -diff --git a/include/hw/core/generic-loader.h b/include/hw/core/generic-loader.h -index dd27c42ab0..9ffce1c5a3 100644 ---- a/include/hw/core/generic-loader.h -+++ b/include/hw/core/generic-loader.h -@@ -19,6 +19,7 @@ - #define GENERIC_LOADER_H - - #include "elf.h" -+#include "hw/qdev-core.h" - - typedef struct GenericLoaderState { - /* */ -diff --git a/include/hw/cris/etraxfs.h b/include/hw/cris/etraxfs.h -index 8da965addb..494222d315 100644 ---- a/include/hw/cris/etraxfs.h -+++ b/include/hw/cris/etraxfs.h -@@ -27,6 +27,7 @@ - - #include "net/net.h" - #include "hw/cris/etraxfs_dma.h" -+#include "hw/sysbus.h" - - /* Instantiate an ETRAXFS Ethernet MAC. */ - static inline DeviceState * -diff --git a/include/hw/cris/etraxfs_dma.h b/include/hw/cris/etraxfs_dma.h -index f6f33e0980..31ae360611 100644 ---- a/include/hw/cris/etraxfs_dma.h -+++ b/include/hw/cris/etraxfs_dma.h -@@ -1,6 +1,9 @@ - #ifndef HW_ETRAXFS_DMA_H - #define HW_ETRAXFS_DMA_H - -+#include "exec/hwaddr.h" -+#include "hw/irq.h" -+ - struct dma_context_metadata { - /* data descriptor md */ - uint16_t metadata; -diff --git a/include/hw/display/i2c-ddc.h b/include/hw/display/i2c-ddc.h -index c29443c5af..1cf53a0c8d 100644 ---- a/include/hw/display/i2c-ddc.h -+++ b/include/hw/display/i2c-ddc.h -@@ -20,6 +20,7 @@ - #define I2C_DDC_H - - #include "hw/display/edid.h" -+#include "hw/i2c/i2c.h" - - /* A simple I2C slave which just returns the contents of its EDID blob. */ - struct I2CDDCState { -diff --git a/include/hw/empty_slot.h b/include/hw/empty_slot.h -index 123a9f8989..cb9a221aa6 100644 ---- a/include/hw/empty_slot.h -+++ b/include/hw/empty_slot.h -@@ -1,6 +1,8 @@ - #ifndef HW_EMPTY_SLOT_H - #define HW_EMPTY_SLOT_H - -+#include "exec/hwaddr.h" -+ - /* empty_slot.c */ - void empty_slot_init(hwaddr addr, uint64_t slot_size); - -diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h -index 9f8e0c720c..b0de0a3c74 100644 ---- a/include/hw/gpio/bcm2835_gpio.h -+++ b/include/hw/gpio/bcm2835_gpio.h -@@ -15,6 +15,7 @@ - #define BCM2835_GPIO_H - - #include "hw/sd/sd.h" -+#include "hw/sysbus.h" - - typedef struct BCM2835GpioState { - SysBusDevice parent_obj; -diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h -index f9020acdef..a2753f0bbb 100644 ---- a/include/hw/i2c/aspeed_i2c.h -+++ b/include/hw/i2c/aspeed_i2c.h -@@ -17,10 +17,12 @@ - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -+ - #ifndef ASPEED_I2C_H - #define ASPEED_I2C_H - - #include "hw/i2c/i2c.h" -+#include "hw/sysbus.h" - - #define TYPE_ASPEED_I2C "aspeed.i2c" - #define ASPEED_I2C(obj) \ -diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h -index 1209eb483a..b04bdd947f 100644 ---- a/include/hw/i386/apic_internal.h -+++ b/include/hw/i386/apic_internal.h -@@ -24,6 +24,7 @@ - #include "cpu.h" - #include "exec/memory.h" - #include "qemu/timer.h" -+#include "target/i386/cpu-qom.h" - - /* APIC Local Vector Table */ - #define APIC_LVT_TIMER 0 -diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h -index 07002f9662..3d2eec2aa7 100644 ---- a/include/hw/i386/ioapic_internal.h -+++ b/include/hw/i386/ioapic_internal.h -@@ -24,6 +24,7 @@ - - #include "hw/hw.h" - #include "exec/memory.h" -+#include "hw/i386/ioapic.h" - #include "hw/sysbus.h" - #include "qemu/notify.h" - -diff --git a/include/hw/intc/allwinner-a10-pic.h b/include/hw/intc/allwinner-a10-pic.h -index 1d314a70d9..a5895401d1 100644 ---- a/include/hw/intc/allwinner-a10-pic.h -+++ b/include/hw/intc/allwinner-a10-pic.h -@@ -1,6 +1,8 @@ - #ifndef ALLWINNER_A10_PIC_H - #define ALLWINNER_A10_PIC_H - -+#include "hw/sysbus.h" -+ - #define TYPE_AW_A10_PIC "allwinner-a10-pic" - #define AW_A10_PIC(obj) OBJECT_CHECK(AwA10PICState, (obj), TYPE_AW_A10_PIC) - -diff --git a/include/hw/intc/heathrow_pic.h b/include/hw/intc/heathrow_pic.h -index 6c91ec91bb..b163e27ab9 100644 ---- a/include/hw/intc/heathrow_pic.h -+++ b/include/hw/intc/heathrow_pic.h -@@ -26,6 +26,8 @@ - #ifndef HW_INTC_HEATHROW_PIC_H - #define HW_INTC_HEATHROW_PIC_H - -+#include "hw/sysbus.h" -+ - #define TYPE_HEATHROW "heathrow" - #define HEATHROW(obj) OBJECT_CHECK(HeathrowState, (obj), TYPE_HEATHROW) - -diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h -index 902a12b178..8428287bf9 100644 ---- a/include/hw/intc/mips_gic.h -+++ b/include/hw/intc/mips_gic.h -@@ -13,6 +13,7 @@ - - #include "qemu/units.h" - #include "hw/timer/mips_gictimer.h" -+#include "hw/sysbus.h" - #include "cpu.h" - /* - * GIC Specific definitions -diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h -index c3c2b6e786..a54c3fe60a 100644 ---- a/include/hw/isa/vt82c686.h -+++ b/include/hw/isa/vt82c686.h -@@ -1,6 +1,8 @@ - #ifndef HW_VT82C686_H - #define HW_VT82C686_H - -+#include "hw/irq.h" -+ - #define TYPE_VT82C686B_SUPERIO "vt82c686b-superio" - - /* vt82c686.c */ -diff --git a/include/hw/mips/cps.h b/include/hw/mips/cps.h -index aab1af926d..a941c55f27 100644 ---- a/include/hw/mips/cps.h -+++ b/include/hw/mips/cps.h -@@ -25,6 +25,7 @@ - #include "hw/intc/mips_gic.h" - #include "hw/misc/mips_cpc.h" - #include "hw/misc/mips_itu.h" -+#include "target/mips/cpu.h" - - #define TYPE_MIPS_CPS "mips-cps" - #define MIPS_CPS(obj) OBJECT_CHECK(MIPSCPSState, (obj), TYPE_MIPS_CPS) -diff --git a/include/hw/misc/macio/cuda.h b/include/hw/misc/macio/cuda.h -index 7dad469142..5768075ac5 100644 ---- a/include/hw/misc/macio/cuda.h -+++ b/include/hw/misc/macio/cuda.h -@@ -26,6 +26,8 @@ - #ifndef CUDA_H - #define CUDA_H - -+#include "hw/misc/mos6522.h" -+ - /* CUDA commands (2nd byte) */ - #define CUDA_WARM_START 0x0 - #define CUDA_AUTOPOLL 0x1 -diff --git a/include/hw/misc/macio/gpio.h b/include/hw/misc/macio/gpio.h -index 2838ae5fde..24a4364b39 100644 ---- a/include/hw/misc/macio/gpio.h -+++ b/include/hw/misc/macio/gpio.h -@@ -26,6 +26,9 @@ - #ifndef MACIO_GPIO_H - #define MACIO_GPIO_H - -+#include "hw/ppc/openpic.h" -+#include "hw/sysbus.h" -+ - #define TYPE_MACIO_GPIO "macio-gpio" - #define MACIO_GPIO(obj) OBJECT_CHECK(MacIOGPIOState, (obj), TYPE_MACIO_GPIO) - -diff --git a/include/hw/misc/macio/macio.h b/include/hw/misc/macio/macio.h -index 970058b6ed..070a694eb5 100644 ---- a/include/hw/misc/macio/macio.h -+++ b/include/hw/misc/macio/macio.h -@@ -27,10 +27,12 @@ - #define MACIO_H - - #include "hw/char/escc.h" -+#include "hw/ide/internal.h" - #include "hw/intc/heathrow_pic.h" - #include "hw/misc/macio/cuda.h" - #include "hw/misc/macio/gpio.h" - #include "hw/misc/macio/pmu.h" -+#include "hw/ppc/mac.h" - #include "hw/ppc/mac_dbdma.h" - #include "hw/ppc/openpic.h" - -diff --git a/include/hw/misc/macio/pmu.h b/include/hw/misc/macio/pmu.h -index d10895ba5f..7ef83dee4c 100644 ---- a/include/hw/misc/macio/pmu.h -+++ b/include/hw/misc/macio/pmu.h -@@ -10,6 +10,9 @@ - #ifndef PMU_H - #define PMU_H - -+#include "hw/misc/mos6522.h" -+#include "hw/misc/macio/gpio.h" -+ - /* - * PMU commands - */ -diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h -index c9dfcb4b84..3e6e223273 100644 ---- a/include/hw/misc/mips_cmgcr.h -+++ b/include/hw/misc/mips_cmgcr.h -@@ -10,6 +10,8 @@ - #ifndef MIPS_CMGCR_H - #define MIPS_CMGCR_H - -+#include "hw/sysbus.h" -+ - #define TYPE_MIPS_GCR "mips-gcr" - #define MIPS_GCR(obj) OBJECT_CHECK(MIPSGCRState, (obj), TYPE_MIPS_GCR) - -diff --git a/include/hw/misc/mips_cpc.h b/include/hw/misc/mips_cpc.h -index 72c834e039..3f670578b0 100644 ---- a/include/hw/misc/mips_cpc.h -+++ b/include/hw/misc/mips_cpc.h -@@ -20,6 +20,8 @@ - #ifndef MIPS_CPC_H - #define MIPS_CPC_H - -+#include "hw/sysbus.h" -+ - #define CPC_ADDRSPACE_SZ 0x6000 - - /* CPC blocks offsets relative to base address */ -diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h -index 1ee071a703..ae0c8188ce 100644 ---- a/include/hw/misc/pvpanic.h -+++ b/include/hw/misc/pvpanic.h -@@ -11,9 +11,12 @@ - * See the COPYING file in the top-level directory. - * - */ -+ - #ifndef HW_MISC_PVPANIC_H - #define HW_MISC_PVPANIC_H - -+#include "qom/object.h" -+ - #define TYPE_PVPANIC "pvpanic" - - #define PVPANIC_IOPORT_PROP "ioport" -diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h -index 905a43deb4..5013207d15 100644 ---- a/include/hw/net/allwinner_emac.h -+++ b/include/hw/net/allwinner_emac.h -@@ -27,6 +27,7 @@ - #include "net/net.h" - #include "qemu/fifo8.h" - #include "hw/net/mii.h" -+#include "hw/sysbus.h" - - #define TYPE_AW_EMAC "allwinner-emac" - #define AW_EMAC(obj) OBJECT_CHECK(AwEmacState, (obj), TYPE_AW_EMAC) -diff --git a/include/hw/net/lance.h b/include/hw/net/lance.h -index ffdd35c4d7..0357f5f65c 100644 ---- a/include/hw/net/lance.h -+++ b/include/hw/net/lance.h -@@ -31,6 +31,7 @@ - - #include "net/net.h" - #include "hw/net/pcnet.h" -+#include "hw/sysbus.h" - - #define TYPE_LANCE "lance" - #define SYSBUS_PCNET(obj) \ -diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h -index b4f5b2b104..09941a9be4 100644 ---- a/include/hw/nvram/chrp_nvram.h -+++ b/include/hw/nvram/chrp_nvram.h -@@ -18,6 +18,8 @@ - #ifndef CHRP_NVRAM_H - #define CHRP_NVRAM_H - -+#include "qemu/bswap.h" -+ - /* OpenBIOS NVRAM partition */ - typedef struct { - uint8_t signature; -diff --git a/include/hw/pci-host/sabre.h b/include/hw/pci-host/sabre.h -index 9afa4938fd..99b5aefbec 100644 ---- a/include/hw/pci-host/sabre.h -+++ b/include/hw/pci-host/sabre.h -@@ -1,6 +1,8 @@ - #ifndef HW_PCI_HOST_SABRE_H - #define HW_PCI_HOST_SABRE_H - -+#include "hw/pci/pci.h" -+#include "hw/pci/pci_host.h" - #include "hw/sparc/sun4u_iommu.h" - - #define MAX_IVEC 0x40 -diff --git a/include/hw/pci-host/uninorth.h b/include/hw/pci-host/uninorth.h -index 060324536a..9a5cabd4c5 100644 ---- a/include/hw/pci-host/uninorth.h -+++ b/include/hw/pci-host/uninorth.h -@@ -26,7 +26,7 @@ - #define UNINORTH_H - - #include "hw/hw.h" -- -+#include "hw/pci/pci_host.h" - #include "hw/ppc/openpic.h" - - /* UniNorth version */ -diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h -index 729a9439c8..502dcd7eba 100644 ---- a/include/hw/pci/pcie_aer.h -+++ b/include/hw/pci/pcie_aer.h -@@ -22,6 +22,7 @@ - #define QEMU_PCIE_AER_H - - #include "hw/hw.h" -+#include "hw/pci/pci_regs.h" - - /* definitions which PCIExpressDevice uses */ - -diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h -index d0926454a9..bfbd2ec42a 100644 ---- a/include/hw/ppc/pnv_core.h -+++ b/include/hw/ppc/pnv_core.h -@@ -21,6 +21,7 @@ - #define PPC_PNV_CORE_H - - #include "hw/cpu/core.h" -+#include "target/ppc/cpu.h" - - #define TYPE_PNV_CORE "powernv-cpu-core" - #define PNV_CORE(obj) \ -diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h -index 39a7ba1ce6..90f8866138 100644 ---- a/include/hw/ppc/ppc4xx.h -+++ b/include/hw/ppc/ppc4xx.h -@@ -25,6 +25,10 @@ - #ifndef PPC4XX_H - #define PPC4XX_H - -+#include "hw/ppc/ppc.h" -+#include "exec/cpu-common.h" -+#include "exec/memory.h" -+ - /* PowerPC 4xx core initialization */ - PowerPCCPU *ppc4xx_init(const char *cpu_model, - clk_setup_t *cpu_clk, clk_setup_t *tb_clk, -diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h -index f965a58f89..cd6e18b05e 100644 ---- a/include/hw/ppc/spapr_irq.h -+++ b/include/hw/ppc/spapr_irq.h -@@ -10,6 +10,9 @@ - #ifndef HW_SPAPR_IRQ_H - #define HW_SPAPR_IRQ_H - -+#include "hw/irq.h" -+#include "target/ppc/cpu-qom.h" -+ - /* - * IRQ range offsets per device type - */ -diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h -index 97951fc6b4..92bfa72caf 100644 ---- a/include/hw/ppc/spapr_vio.h -+++ b/include/hw/ppc/spapr_vio.h -@@ -22,6 +22,7 @@ - * License along with this library; if not, see . - */ - -+#include "hw/ppc/spapr.h" - #include "sysemu/dma.h" - - #define TYPE_VIO_SPAPR_DEVICE "vio-spapr-device" -diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h -index 7197144265..a39e672f27 100644 ---- a/include/hw/ppc/spapr_xive.h -+++ b/include/hw/ppc/spapr_xive.h -@@ -10,7 +10,9 @@ - #ifndef PPC_SPAPR_XIVE_H - #define PPC_SPAPR_XIVE_H - -+#include "hw/ppc/spapr_irq.h" - #include "hw/ppc/xive.h" -+#include "sysemu/sysemu.h" - - #define TYPE_SPAPR_XIVE "spapr-xive" - #define SPAPR_XIVE(obj) OBJECT_CHECK(SpaprXive, (obj), TYPE_SPAPR_XIVE) -diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h -index 1a8c5b5e64..b0c68ab5f7 100644 ---- a/include/hw/ppc/xive_regs.h -+++ b/include/hw/ppc/xive_regs.h -@@ -16,6 +16,9 @@ - #ifndef PPC_XIVE_REGS_H - #define PPC_XIVE_REGS_H - -+#include "qemu/bswap.h" -+#include "qemu/host-utils.h" -+ - /* - * Interrupt source number encoding on PowerBUS - */ -diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h -index d56f2ae3eb..1f21c2bef1 100644 ---- a/include/hw/riscv/boot.h -+++ b/include/hw/riscv/boot.h -@@ -20,6 +20,8 @@ - #ifndef RISCV_BOOT_H - #define RISCV_BOOT_H - -+#include "exec/cpu-defs.h" -+ - void riscv_find_and_load_firmware(MachineState *machine, - const char *default_machine_firmware, - hwaddr firmware_load_addr); -diff --git a/include/hw/riscv/riscv_hart.h b/include/hw/riscv/riscv_hart.h -index 0671d88a44..3b52b50571 100644 ---- a/include/hw/riscv/riscv_hart.h -+++ b/include/hw/riscv/riscv_hart.h -@@ -21,6 +21,9 @@ - #ifndef HW_RISCV_HART_H - #define HW_RISCV_HART_H - -+#include "hw/sysbus.h" -+#include "target/riscv/cpu.h" -+ - #define TYPE_RISCV_HART_ARRAY "riscv.hart_array" - - #define RISCV_HART_ARRAY(obj) \ -diff --git a/include/hw/riscv/sifive_clint.h b/include/hw/riscv/sifive_clint.h -index e2865be1d1..ae8286c884 100644 ---- a/include/hw/riscv/sifive_clint.h -+++ b/include/hw/riscv/sifive_clint.h -@@ -20,6 +20,8 @@ - #ifndef HW_SIFIVE_CLINT_H - #define HW_SIFIVE_CLINT_H - -+#include "hw/sysbus.h" -+ - #define TYPE_SIFIVE_CLINT "riscv.sifive.clint" - - #define SIFIVE_CLINT(obj) \ -diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h -index d175b24cb2..9c868dd7f9 100644 ---- a/include/hw/riscv/sifive_e.h -+++ b/include/hw/riscv/sifive_e.h -@@ -19,6 +19,7 @@ - #ifndef HW_SIFIVE_E_H - #define HW_SIFIVE_E_H - -+#include "hw/riscv/riscv_hart.h" - #include "hw/riscv/sifive_gpio.h" - - #define TYPE_RISCV_E_SOC "riscv.sifive.e.soc" -diff --git a/include/hw/riscv/sifive_plic.h b/include/hw/riscv/sifive_plic.h -index ce8907f6aa..b0edba2884 100644 ---- a/include/hw/riscv/sifive_plic.h -+++ b/include/hw/riscv/sifive_plic.h -@@ -21,7 +21,7 @@ - #ifndef HW_SIFIVE_PLIC_H - #define HW_SIFIVE_PLIC_H - --#include "hw/irq.h" -+#include "hw/sysbus.h" - - #define TYPE_SIFIVE_PLIC "riscv.sifive.plic" - -diff --git a/include/hw/riscv/sifive_prci.h b/include/hw/riscv/sifive_prci.h -index bd51c4af3c..8b7de134f8 100644 ---- a/include/hw/riscv/sifive_prci.h -+++ b/include/hw/riscv/sifive_prci.h -@@ -19,6 +19,8 @@ - #ifndef HW_SIFIVE_PRCI_H - #define HW_SIFIVE_PRCI_H - -+#include "hw/sysbus.h" -+ - enum { - SIFIVE_PRCI_HFROSCCFG = 0x0, - SIFIVE_PRCI_HFXOSCCFG = 0x4, -diff --git a/include/hw/riscv/sifive_test.h b/include/hw/riscv/sifive_test.h -index 71d4c9fad7..3a603a6ead 100644 ---- a/include/hw/riscv/sifive_test.h -+++ b/include/hw/riscv/sifive_test.h -@@ -19,6 +19,8 @@ - #ifndef HW_SIFIVE_TEST_H - #define HW_SIFIVE_TEST_H - -+#include "hw/sysbus.h" -+ - #define TYPE_SIFIVE_TEST "riscv.sifive.test" - - #define SIFIVE_TEST(obj) \ -diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h -index 892f0eee21..be021ce256 100644 ---- a/include/hw/riscv/sifive_u.h -+++ b/include/hw/riscv/sifive_u.h -@@ -20,6 +20,7 @@ - #define HW_SIFIVE_U_H - - #include "hw/net/cadence_gem.h" -+#include "hw/riscv/riscv_hart.h" - - #define TYPE_RISCV_U_SOC "riscv.sifive.u.soc" - #define RISCV_U_SOC(obj) \ -diff --git a/include/hw/riscv/sifive_uart.h b/include/hw/riscv/sifive_uart.h -index c8dc1c57fd..65668825a3 100644 ---- a/include/hw/riscv/sifive_uart.h -+++ b/include/hw/riscv/sifive_uart.h -@@ -20,6 +20,9 @@ - #ifndef HW_SIFIVE_UART_H - #define HW_SIFIVE_UART_H - -+#include "chardev/char-fe.h" -+#include "hw/sysbus.h" -+ - enum { - SIFIVE_UART_TXFIFO = 0, - SIFIVE_UART_RXFIFO = 4, -diff --git a/include/hw/riscv/spike.h b/include/hw/riscv/spike.h -index 641b70da67..03d870363c 100644 ---- a/include/hw/riscv/spike.h -+++ b/include/hw/riscv/spike.h -@@ -19,6 +19,9 @@ - #ifndef HW_RISCV_SPIKE_H - #define HW_RISCV_SPIKE_H - -+#include "hw/riscv/riscv_hart.h" -+#include "hw/sysbus.h" -+ - typedef struct { - /*< private >*/ - SysBusDevice parent_obj; -diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h -index d01a1a85c4..6e5fbe5d3b 100644 ---- a/include/hw/riscv/virt.h -+++ b/include/hw/riscv/virt.h -@@ -19,6 +19,9 @@ - #ifndef HW_RISCV_VIRT_H - #define HW_RISCV_VIRT_H - -+#include "hw/riscv/riscv_hart.h" -+#include "hw/sysbus.h" -+ - typedef struct { - /*< private >*/ - SysBusDevice parent_obj; -diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h -index 765e9082a3..8df9cd2954 100644 ---- a/include/hw/s390x/ap-device.h -+++ b/include/hw/s390x/ap-device.h -@@ -7,9 +7,12 @@ - * your option) any later version. See the COPYING file in the top-level - * directory. - */ -+ - #ifndef HW_S390X_AP_DEVICE_H - #define HW_S390X_AP_DEVICE_H - -+#include "hw/qdev-core.h" -+ - #define AP_DEVICE_TYPE "ap-device" - - typedef struct APDevice { -diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h -index 5a0203be5f..f7ed2d9a03 100644 ---- a/include/hw/s390x/css-bridge.h -+++ b/include/hw/s390x/css-bridge.h -@@ -12,8 +12,9 @@ - - #ifndef HW_S390X_CSS_BRIDGE_H - #define HW_S390X_CSS_BRIDGE_H -+ - #include "qom/object.h" --#include "hw/qdev-core.h" -+#include "hw/sysbus.h" - - /* virtual css bridge */ - typedef struct VirtualCssBridge { -diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h -index d033387fba..f46bcafb16 100644 ---- a/include/hw/s390x/css.h -+++ b/include/hw/s390x/css.h -@@ -17,6 +17,7 @@ - #include "hw/s390x/s390_flic.h" - #include "hw/s390x/ioinst.h" - #include "sysemu/kvm.h" -+#include "target/s390x/cpu-qom.h" - - /* Channel subsystem constants. */ - #define MAX_DEVNO 65535 -diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h -index 9c4a6000c3..d71f4ea8a7 100644 ---- a/include/hw/s390x/tod.h -+++ b/include/hw/s390x/tod.h -@@ -12,7 +12,7 @@ - #define HW_S390_TOD_H - - #include "hw/qdev.h" --#include "s390-tod.h" -+#include "target/s390x/s390-tod.h" - - typedef struct S390TOD { - uint8_t high; -diff --git a/include/hw/semihosting/console.h b/include/hw/semihosting/console.h -index cfab572c0c..9be9754bcd 100644 ---- a/include/hw/semihosting/console.h -+++ b/include/hw/semihosting/console.h -@@ -9,6 +9,8 @@ - #ifndef SEMIHOST_CONSOLE_H - #define SEMIHOST_CONSOLE_H - -+#include "cpu.h" -+ - /** - * qemu_semihosting_console_outs: - * @env: CPUArchState -diff --git a/include/hw/sh4/sh_intc.h b/include/hw/sh4/sh_intc.h -index b7c2404334..3d3efde059 100644 ---- a/include/hw/sh4/sh_intc.h -+++ b/include/hw/sh4/sh_intc.h -@@ -1,6 +1,7 @@ - #ifndef SH_INTC_H - #define SH_INTC_H - -+#include "exec/memory.h" - #include "hw/irq.h" - - typedef unsigned char intc_enum; -diff --git a/include/hw/sparc/sparc64.h b/include/hw/sparc/sparc64.h -index 21ab79e343..4ced36fb5a 100644 ---- a/include/hw/sparc/sparc64.h -+++ b/include/hw/sparc/sparc64.h -@@ -1,6 +1,8 @@ - #ifndef HW_SPARC_SPARC64_H - #define HW_SPARC_SPARC64_H - -+#include "target/sparc/cpu-qom.h" -+ - #define IVEC_MAX 0x40 - - SPARCCPU *sparc64_cpu_devinit(const char *cpu_type, uint64_t prom_addr); -diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h -index 591279ba1f..aa07dac4fe 100644 ---- a/include/hw/ssi/aspeed_smc.h -+++ b/include/hw/ssi/aspeed_smc.h -@@ -26,6 +26,7 @@ - #define ASPEED_SMC_H - - #include "hw/ssi/ssi.h" -+#include "hw/sysbus.h" - - typedef struct AspeedSegments { - hwaddr addr; -diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h -index a0a0ae7584..6a39b55a7b 100644 ---- a/include/hw/ssi/xilinx_spips.h -+++ b/include/hw/ssi/xilinx_spips.h -@@ -28,6 +28,7 @@ - #include "hw/ssi/ssi.h" - #include "qemu/fifo32.h" - #include "hw/stream.h" -+#include "hw/sysbus.h" - - typedef struct XilinxSPIPS XilinxSPIPS; - -diff --git a/include/hw/timer/allwinner-a10-pit.h b/include/hw/timer/allwinner-a10-pit.h -index c0cc3e2169..871c95b512 100644 ---- a/include/hw/timer/allwinner-a10-pit.h -+++ b/include/hw/timer/allwinner-a10-pit.h -@@ -2,6 +2,7 @@ - #define ALLWINNER_A10_PIT_H - - #include "hw/ptimer.h" -+#include "hw/sysbus.h" - - #define TYPE_AW_A10_PIT "allwinner-A10-timer" - #define AW_A10_PIT(obj) OBJECT_CHECK(AwA10PITState, (obj), TYPE_AW_A10_PIT) -diff --git a/include/hw/timer/i8254_internal.h b/include/hw/timer/i8254_internal.h -index c37a438f82..e611c6f227 100644 ---- a/include/hw/timer/i8254_internal.h -+++ b/include/hw/timer/i8254_internal.h -@@ -27,6 +27,7 @@ - - #include "hw/hw.h" - #include "hw/isa/isa.h" -+#include "hw/timer/i8254.h" - #include "qemu/timer.h" - - typedef struct PITChannelState { -diff --git a/include/hw/timer/m48t59.h b/include/hw/timer/m48t59.h -index 43efc91f56..d3fb50e08c 100644 ---- a/include/hw/timer/m48t59.h -+++ b/include/hw/timer/m48t59.h -@@ -1,6 +1,8 @@ - #ifndef HW_M48T59_H - #define HW_M48T59_H - -+#include "exec/hwaddr.h" -+#include "hw/irq.h" - #include "qom/object.h" - - #define TYPE_NVRAM "nvram" -diff --git a/include/hw/timer/mc146818rtc_regs.h b/include/hw/timer/mc146818rtc_regs.h -index c62f17bf2d..bfbb57e570 100644 ---- a/include/hw/timer/mc146818rtc_regs.h -+++ b/include/hw/timer/mc146818rtc_regs.h -@@ -25,6 +25,8 @@ - #ifndef MC146818RTC_REGS_H - #define MC146818RTC_REGS_H - -+#include "qemu/timer.h" -+ - #define RTC_ISA_IRQ 8 - - #define RTC_SECONDS 0 -diff --git a/include/hw/timer/xlnx-zynqmp-rtc.h b/include/hw/timer/xlnx-zynqmp-rtc.h -index 6e9134edf6..97e32322ed 100644 ---- a/include/hw/timer/xlnx-zynqmp-rtc.h -+++ b/include/hw/timer/xlnx-zynqmp-rtc.h -@@ -28,6 +28,7 @@ - #define HW_TIMER_XLNX_ZYNQMP_RTC_H - - #include "hw/register.h" -+#include "hw/sysbus.h" - - #define TYPE_XLNX_ZYNQMP_RTC "xlnx-zynmp.rtc" - -diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h -index bdf58f3119..6818a23a2d 100644 ---- a/include/hw/virtio/virtio-access.h -+++ b/include/hw/virtio/virtio-access.h -@@ -16,6 +16,7 @@ - #ifndef QEMU_VIRTIO_ACCESS_H - #define QEMU_VIRTIO_ACCESS_H - -+#include "exec/hwaddr.h" - #include "hw/virtio/virtio.h" - #include "hw/virtio/virtio-bus.h" - -diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h -index 38d12160f6..203f9e1718 100644 ---- a/include/hw/virtio/virtio-gpu-bswap.h -+++ b/include/hw/virtio/virtio-gpu-bswap.h -@@ -15,6 +15,7 @@ - #define HW_VIRTIO_GPU_BSWAP_H - - #include "qemu/bswap.h" -+#include "standard-headers/linux/virtio_gpu.h" - - static inline void - virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr) -diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h -index 922dce7cac..ff699335e3 100644 ---- a/include/hw/virtio/virtio-rng.h -+++ b/include/hw/virtio/virtio-rng.h -@@ -12,6 +12,7 @@ - #ifndef QEMU_VIRTIO_RNG_H - #define QEMU_VIRTIO_RNG_H - -+#include "hw/virtio/virtio.h" - #include "sysemu/rng.h" - #include "sysemu/rng-random.h" - #include "standard-headers/linux/virtio_rng.h" -diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h -index daef0c0e23..8c5691ce20 100644 ---- a/include/hw/watchdog/wdt_aspeed.h -+++ b/include/hw/watchdog/wdt_aspeed.h -@@ -10,6 +10,7 @@ - #ifndef WDT_ASPEED_H - #define WDT_ASPEED_H - -+#include "hw/misc/aspeed_scu.h" - #include "hw/sysbus.h" - - #define TYPE_ASPEED_WDT "aspeed.wdt" -diff --git a/include/libdecnumber/decNumberLocal.h b/include/libdecnumber/decNumberLocal.h -index 12cf1d8b6f..4d53c077f2 100644 ---- a/include/libdecnumber/decNumberLocal.h -+++ b/include/libdecnumber/decNumberLocal.h -@@ -44,6 +44,7 @@ - #define DECNLAUTHOR "Mike Cowlishaw" /* Who to blame */ - - #include "libdecnumber/dconfig.h" -+ #include "libdecnumber/decContext.h" - - /* Conditional code flag -- set this to match hardware platform */ - /* 1=little-endian, 0=big-endian */ -diff --git a/include/migration/cpu.h b/include/migration/cpu.h -index a40bd3549f..da1618d620 100644 ---- a/include/migration/cpu.h -+++ b/include/migration/cpu.h -@@ -1,7 +1,10 @@ - /* Declarations for use for CPU state serialization. */ -+ - #ifndef MIGRATION_CPU_H - #define MIGRATION_CPU_H - -+#include "exec/cpu-defs.h" -+ - #if TARGET_LONG_BITS == 64 - #define qemu_put_betl qemu_put_be64 - #define qemu_get_betl qemu_get_be64 -diff --git a/include/monitor/hmp-target.h b/include/monitor/hmp-target.h -index 454e8ed155..8b7820a3ad 100644 ---- a/include/monitor/hmp-target.h -+++ b/include/monitor/hmp-target.h -@@ -25,6 +25,8 @@ - #ifndef MONITOR_HMP_TARGET_H - #define MONITOR_HMP_TARGET_H - -+#include "cpu.h" -+ - #define MD_TLONG 0 - #define MD_I32 1 - -diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h -index ddd0d55d31..6b34484e15 100644 ---- a/include/qemu/atomic128.h -+++ b/include/qemu/atomic128.h -@@ -13,6 +13,8 @@ - #ifndef QEMU_ATOMIC128_H - #define QEMU_ATOMIC128_H - -+#include "qemu/int128.h" -+ - /* - * GCC is a house divided about supporting large atomic operations. - * -diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h -index 1b38291823..01da8d63f1 100644 ---- a/include/qemu/ratelimit.h -+++ b/include/qemu/ratelimit.h -@@ -14,6 +14,8 @@ - #ifndef QEMU_RATELIMIT_H - #define QEMU_RATELIMIT_H - -+#include "qemu/timer.h" -+ - typedef struct { - int64_t slice_start_time; - int64_t slice_end_time; -diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h -index 50af5dd7ab..d0a1a9597e 100644 ---- a/include/qemu/thread-win32.h -+++ b/include/qemu/thread-win32.h -@@ -47,6 +47,6 @@ struct QemuThread { - }; - - /* Only valid for joinable threads. */ --HANDLE qemu_thread_get_handle(QemuThread *thread); -+HANDLE qemu_thread_get_handle(struct QemuThread *thread); - - #endif -diff --git a/include/sysemu/balloon.h b/include/sysemu/balloon.h -index c8f6145257..aea0c44985 100644 ---- a/include/sysemu/balloon.h -+++ b/include/sysemu/balloon.h -@@ -14,6 +14,7 @@ - #ifndef QEMU_BALLOON_H - #define QEMU_BALLOON_H - -+#include "exec/cpu-common.h" - #include "qapi/qapi-types-misc.h" - - typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target); -diff --git a/include/sysemu/cryptodev-vhost-user.h b/include/sysemu/cryptodev-vhost-user.h -index 6debf53fc5..0d3421e7e8 100644 ---- a/include/sysemu/cryptodev-vhost-user.h -+++ b/include/sysemu/cryptodev-vhost-user.h -@@ -20,9 +20,12 @@ - * License along with this library; if not, see . - * - */ -+ - #ifndef CRYPTODEV_VHOST_USER_H - #define CRYPTODEV_VHOST_USER_H - -+#include "sysemu/cryptodev-vhost.h" -+ - #define VHOST_USER_MAX_AUTH_KEY_LEN 512 - #define VHOST_USER_MAX_CIPHER_KEY_LEN 64 - -diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h -index d275b5a843..dd1722f2df 100644 ---- a/include/sysemu/hvf.h -+++ b/include/sysemu/hvf.h -@@ -13,6 +13,7 @@ - #ifndef HVF_H - #define HVF_H - -+#include "cpu.h" - #include "qemu/bitops.h" - #include "exec/memory.h" - #include "sysemu/accel.h" -diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h -index 5f6240d5cb..6181486401 100644 ---- a/include/sysemu/iothread.h -+++ b/include/sysemu/iothread.h -@@ -16,6 +16,7 @@ - - #include "block/aio.h" - #include "qemu/thread.h" -+#include "qom/object.h" - - #define TYPE_IOTHREAD "iothread" - -diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h -index 31df465fdc..787dbc7770 100644 ---- a/include/sysemu/kvm_int.h -+++ b/include/sysemu/kvm_int.h -@@ -9,6 +9,8 @@ - #ifndef QEMU_KVM_INT_H - #define QEMU_KVM_INT_H - -+#include "exec/cpu-common.h" -+#include "exec/memory.h" - #include "sysemu/sysemu.h" - #include "sysemu/accel.h" - #include "sysemu/kvm.h" -diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h -index 58452457ce..1b440df486 100644 ---- a/include/sysemu/memory_mapping.h -+++ b/include/sysemu/memory_mapping.h -@@ -15,6 +15,8 @@ - #define MEMORY_MAPPING_H - - #include "qemu/queue.h" -+#include "exec/cpu-common.h" -+#include "exec/cpu-defs.h" - #include "exec/memory.h" - - typedef struct GuestPhysBlock { -diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h -index a03e2f1878..c8e7c2f6cf 100644 ---- a/include/sysemu/xen-mapcache.h -+++ b/include/sysemu/xen-mapcache.h -@@ -9,6 +9,8 @@ - #ifndef XEN_MAPCACHE_H - #define XEN_MAPCACHE_H - -+#include "exec/cpu-common.h" -+ - typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, - ram_addr_t size); - #ifdef CONFIG_XEN -diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h -index d714127799..58bd3a1ec4 100644 ---- a/include/ui/egl-helpers.h -+++ b/include/ui/egl-helpers.h -@@ -4,6 +4,9 @@ - #include - #include - #include -+#include "qapi/qapi-types-ui.h" -+#include "ui/console.h" -+#include "ui/shader.h" - - extern EGLDisplay *qemu_egl_display; - extern EGLConfig qemu_egl_config; -diff --git a/include/ui/input.h b/include/ui/input.h -index 8c8ccb999f..c86219a1c1 100644 ---- a/include/ui/input.h -+++ b/include/ui/input.h -@@ -2,6 +2,7 @@ - #define INPUT_H - - #include "qapi/qapi-types-ui.h" -+#include "qemu/notify.h" - - #define INPUT_EVENT_MASK_KEY (1< - #include - #include - #include -diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h -index aab251bc4b..e9fba96be9 100644 ---- a/target/hppa/cpu.h -+++ b/target/hppa/cpu.h -@@ -22,7 +22,7 @@ - - #include "cpu-qom.h" - #include "exec/cpu-defs.h" -- -+#include "exec/memory.h" - - /* PA-RISC 1.x processors have a strong memory model. */ - /* ??? While we do not yet implement PA-RISC 2.0, those processors have --- -2.27.0 - diff --git a/include-qom-object.h-New-OBJECT_DEFINE_SIMPLE_TYPE-_.patch b/include-qom-object.h-New-OBJECT_DEFINE_SIMPLE_TYPE-_.patch new file mode 100644 index 0000000000000000000000000000000000000000..5ca38f59fc280dd54314fca15aa084db5a026f62 --- /dev/null +++ b/include-qom-object.h-New-OBJECT_DEFINE_SIMPLE_TYPE-_.patch @@ -0,0 +1,249 @@ +From b1304358281cd973a8c7ef057e350e5e2028e005 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Tue, 20 Feb 2024 16:06:16 +0000 +Subject: [PATCH] include/qom/object.h: New OBJECT_DEFINE_SIMPLE_TYPE{, + _WITH_INTERFACES} macros +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://gitlab.com/qemu-project/qemu/-/commit/e54c24339f3e6533af0b0c4364c5c9c9f74e9273 + +We have an OBJECT_DEFINE_TYPE_EXTENDED macro, plus several variations +on it, which emits the boilerplate for the TypeInfo and ensures it is +registered with the type system. However, all the existing macros +insist that the type being defined has its own FooClass struct, so +they aren't useful for the common case of a simple leaf class which +doesn't have any new methods or any other need for its own class +struct (that is, for the kind of type that OBJECT_DECLARE_SIMPLE_TYPE +declares). + +Pull the actual implementation of OBJECT_DEFINE_TYPE_EXTENDED out +into a new DO_OBJECT_DEFINE_TYPE_EXTENDED which parameterizes the +value we use for the class_size field. This lets us add a new +OBJECT_DEFINE_SIMPLE_TYPE which does the same job as the various +existing OBJECT_DEFINE_*_TYPE_* family macros for this kind of simple +type, and the variant OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES for +when the type will implement some interfaces. + +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Reviewed-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20240220160622.114437-5-peter.maydell@linaro.org +Reviewed-by: Zhao Liu +Signed-off-by: houmingyong +--- + docs/devel/qom.rst | 34 +++++++++++++--- + include/qom/object.h | 96 ++++++++++++++++++++++++++++++++++++-------- + 2 files changed, 108 insertions(+), 22 deletions(-) + +diff --git a/docs/devel/qom.rst b/docs/devel/qom.rst +index 9918fac7f2..0889ca949c 100644 +--- a/docs/devel/qom.rst ++++ b/docs/devel/qom.rst +@@ -348,12 +348,14 @@ used. This does the same as OBJECT_DECLARE_SIMPLE_TYPE(), but without + the 'struct MyDeviceClass' definition. + + To implement the type, the OBJECT_DEFINE macro family is available. +-In the simple case the OBJECT_DEFINE_TYPE macro is suitable: ++For the simplest case of a leaf class which doesn't need any of its ++own virtual functions (i.e. which was declared with OBJECT_DECLARE_SIMPLE_TYPE) ++the OBJECT_DEFINE_SIMPLE_TYPE macro is suitable: + + .. code-block:: c + :caption: Defining a simple type + +- OBJECT_DEFINE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE) ++ OBJECT_DEFINE_SIMPLE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE) + + This is equivalent to the following: + +@@ -370,7 +372,6 @@ This is equivalent to the following: + .instance_size = sizeof(MyDevice), + .instance_init = my_device_init, + .instance_finalize = my_device_finalize, +- .class_size = sizeof(MyDeviceClass), + .class_init = my_device_class_init, + }; + +@@ -385,13 +386,36 @@ This is sufficient to get the type registered with the type + system, and the three standard methods now need to be implemented + along with any other logic required for the type. + ++If the class needs its own virtual methods, or has some other ++per-class state it needs to store in its own class struct, ++then you can use the OBJECT_DEFINE_TYPE macro. This does the ++same thing as OBJECT_DEFINE_SIMPLE_TYPE, but it also sets the ++class_size of the type to the size of the class struct. ++ ++.. code-block:: c ++ :caption: Defining a type which needs a class struct ++ ++ OBJECT_DEFINE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE) ++ + If the type needs to implement one or more interfaces, then the +-OBJECT_DEFINE_TYPE_WITH_INTERFACES() macro can be used instead. +-This accepts an array of interface type names. ++OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES() and ++OBJECT_DEFINE_TYPE_WITH_INTERFACES() macros can be used instead. ++These accept an array of interface type names. The difference between ++them is that the former is for simple leaf classes that don't need ++a class struct, and the latter is for when you will be defining ++a class struct. + + .. code-block:: c + :caption: Defining a simple type implementing interfaces + ++ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(MyDevice, my_device, ++ MY_DEVICE, DEVICE, ++ { TYPE_USER_CREATABLE }, ++ { NULL }) ++ ++.. code-block:: c ++ :caption: Defining a type implementing interfaces ++ + OBJECT_DEFINE_TYPE_WITH_INTERFACES(MyDevice, my_device, + MY_DEVICE, DEVICE, + { TYPE_USER_CREATABLE }, +diff --git a/include/qom/object.h b/include/qom/object.h +index afccd24ca7..f52ab216cd 100644 +--- a/include/qom/object.h ++++ b/include/qom/object.h +@@ -259,31 +259,23 @@ struct Object + + + /** +- * OBJECT_DEFINE_TYPE_EXTENDED: ++ * DO_OBJECT_DEFINE_TYPE_EXTENDED: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * @ABSTRACT: boolean flag to indicate whether the object can be instantiated ++ * @CLASS_SIZE: size of the type's class + * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces + * +- * This macro is typically used in a source file, and will: +- * +- * - declare prototypes for _finalize, _class_init and _init methods +- * - declare the TypeInfo struct instance +- * - provide the constructor to register the type +- * +- * After using this macro, implementations of the _finalize, _class_init, +- * and _init methods need to be written. Any of these can be zero-line +- * no-op impls if no special logic is required for a given type. +- * +- * This macro should rarely be used, instead one of the more specialized +- * macros is usually a better choice. ++ * This is the base macro used to implement all the OBJECT_DEFINE_* ++ * macros. It should never be used directly in a source file. + */ +-#define OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ +- MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ +- ABSTRACT, ...) \ ++#define DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ ++ MODULE_OBJ_NAME, \ ++ PARENT_MODULE_OBJ_NAME, \ ++ ABSTRACT, CLASS_SIZE, ...) \ + static void \ + module_obj_name##_finalize(Object *obj); \ + static void \ +@@ -298,7 +290,7 @@ struct Object + .instance_align = __alignof__(ModuleObjName), \ + .instance_init = module_obj_name##_init, \ + .instance_finalize = module_obj_name##_finalize, \ +- .class_size = sizeof(ModuleObjName##Class), \ ++ .class_size = CLASS_SIZE, \ + .class_init = module_obj_name##_class_init, \ + .abstract = ABSTRACT, \ + .interfaces = (InterfaceInfo[]) { __VA_ARGS__ } , \ +@@ -311,6 +303,37 @@ struct Object + } \ + type_init(module_obj_name##_register_types); + ++/** ++ * OBJECT_DEFINE_TYPE_EXTENDED: ++ * @ModuleObjName: the object name with initial caps ++ * @module_obj_name: the object name in lowercase with underscore separators ++ * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators ++ * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore ++ * separators ++ * @ABSTRACT: boolean flag to indicate whether the object can be instantiated ++ * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces ++ * ++ * This macro is typically used in a source file, and will: ++ * ++ * - declare prototypes for _finalize, _class_init and _init methods ++ * - declare the TypeInfo struct instance ++ * - provide the constructor to register the type ++ * ++ * After using this macro, implementations of the _finalize, _class_init, ++ * and _init methods need to be written. Any of these can be zero-line ++ * no-op impls if no special logic is required for a given type. ++ * ++ * This macro should rarely be used, instead one of the more specialized ++ * macros is usually a better choice. ++ */ ++#define OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ ++ MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ ++ ABSTRACT, ...) \ ++ DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ ++ MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ ++ ABSTRACT, sizeof(ModuleObjName##Class), \ ++ __VA_ARGS__) ++ + /** + * OBJECT_DEFINE_TYPE: + * @ModuleObjName: the object name with initial caps +@@ -368,6 +391,45 @@ struct Object + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + true, { NULL }) + ++/** ++ * OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES: ++ * @ModuleObjName: the object name with initial caps ++ * @module_obj_name: the object name in lowercase with underscore separators ++ * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators ++ * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore ++ * separators ++ * ++ * This is a variant of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable for ++ * the case of a non-abstract type, with interfaces, and with no requirement ++ * for a class struct. ++ */ ++#define OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(ModuleObjName, \ ++ module_obj_name, \ ++ MODULE_OBJ_NAME, \ ++ PARENT_MODULE_OBJ_NAME, ...) \ ++ DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ ++ MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ ++ false, 0, __VA_ARGS__) ++ ++/** ++ * OBJECT_DEFINE_SIMPLE_TYPE: ++ * @ModuleObjName: the object name with initial caps ++ * @module_obj_name: the object name in lowercase with underscore separators ++ * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators ++ * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore ++ * separators ++ * ++ * This is a variant of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable for ++ * the common case of a non-abstract type, without any interfaces, and with ++ * no requirement for a class struct. If you declared your type with ++ * OBJECT_DECLARE_SIMPLE_TYPE then this is probably the right choice for ++ * defining it. ++ */ ++#define OBJECT_DEFINE_SIMPLE_TYPE(ModuleObjName, module_obj_name, \ ++ MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME) \ ++ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(ModuleObjName, module_obj_name, \ ++ MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, { NULL }) ++ + /** + * struct TypeInfo: + * @name: The name of the type. +-- +2.33.0 + diff --git a/include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch b/include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch new file mode 100644 index 0000000000000000000000000000000000000000..1e47ca26def6601174a9c693596606e4dd05e99b --- /dev/null +++ b/include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch @@ -0,0 +1,36 @@ +From 56bfcb77a384419dbd09ca37075a3cf4ba2e9f19 Mon Sep 17 00:00:00 2001 +From: Elen Avan +Date: Fri, 22 Dec 2023 22:17:21 +0300 +Subject: [PATCH] include/ui/rect.h: fix qemu_rect_init() mis-assignment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Elen Avan +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051 +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050 +Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test" +Cc: qemu-stable@nongnu.org +Reviewed-by: Michael Tokarev +Reviewed-by: Marc-André Lureau +Signed-off-by: Michael Tokarev +--- + include/ui/rect.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/ui/rect.h b/include/ui/rect.h +index 94898f92d0..68f05d78a8 100644 +--- a/include/ui/rect.h ++++ b/include/ui/rect.h +@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect, + uint16_t width, uint16_t height) + { + rect->x = x; +- rect->y = x; ++ rect->y = y; + rect->width = width; + rect->height = height; + } +-- +2.27.0 + diff --git a/intc-arm_gic-Support-IRQ-injection-for-more-than-256.patch b/intc-arm_gic-Support-IRQ-injection-for-more-than-256.patch deleted file mode 100644 index d69dcc71825a8ed70e133ffb7d28af03a08485a1..0000000000000000000000000000000000000000 --- a/intc-arm_gic-Support-IRQ-injection-for-more-than-256.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 15849387df5c25e8ebaef19e2a16e8d428675f5d Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 3 Oct 2019 17:46:39 +0200 -Subject: [PATCH 2/3] intc/arm_gic: Support IRQ injection for more than 256 - vpus - -Host kernels that expose the KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 capability -allow injection of interrupts along with vcpu ids larger than 255. -Let's encode the vpcu id on 12 bits according to the upgraded KVM_IRQ_LINE -ABI when needed. - -Given that we have two callsites that need to assemble -the value for kvm_set_irq(), a new helper routine, kvm_arm_set_irq -is introduced. - -Without that patch qemu exits with "kvm_set_irq: Invalid argument" -message. - -Signed-off-by: Eric Auger -Reported-by: Zenghui Yu -Reviewed-by: Richard Henderson -Reviewed-by: Andrew Jones -Acked-by: Marc Zyngier -Message-id: 20191003154640.22451-3-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry-picked from commit f6530926e2310147a7844a3e663230d47b3d7333) -Signed-off-by: Zenghui Yu ---- - hw/intc/arm_gic_kvm.c | 7 ++----- - target/arm/cpu.c | 10 ++++------ - target/arm/kvm.c | 12 ++++++++++++ - target/arm/kvm_arm.h | 1 + - 4 files changed, 19 insertions(+), 11 deletions(-) - -diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c -index a611e8ee..7d600a61 100644 ---- a/hw/intc/arm_gic_kvm.c -+++ b/hw/intc/arm_gic_kvm.c -@@ -55,7 +55,7 @@ void kvm_arm_gic_set_irq(uint32_t num_irq, int irq, int level) - * has separate fields in the irq number for type, - * CPU number and interrupt number. - */ -- int kvm_irq, irqtype, cpu; -+ int irqtype, cpu; - - if (irq < (num_irq - GIC_INTERNAL)) { - /* External interrupt. The kernel numbers these like the GIC -@@ -72,10 +72,7 @@ void kvm_arm_gic_set_irq(uint32_t num_irq, int irq, int level) - cpu = irq / GIC_INTERNAL; - irq %= GIC_INTERNAL; - } -- kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) -- | (cpu << KVM_ARM_IRQ_VCPU_SHIFT) | irq; -- -- kvm_set_irq(kvm_state, kvm_irq, !!level); -+ kvm_arm_set_irq(cpu, irqtype, irq, !!level); - } - - static void kvm_arm_gicv2_set_irq(void *opaque, int irq, int level) -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 41557821..0b4c8e27 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -561,16 +561,16 @@ static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level) - ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); -- int kvm_irq = KVM_ARM_IRQ_TYPE_CPU << KVM_ARM_IRQ_TYPE_SHIFT; - uint32_t linestate_bit; -+ int irq_id; - - switch (irq) { - case ARM_CPU_IRQ: -- kvm_irq |= KVM_ARM_IRQ_CPU_IRQ; -+ irq_id = KVM_ARM_IRQ_CPU_IRQ; - linestate_bit = CPU_INTERRUPT_HARD; - break; - case ARM_CPU_FIQ: -- kvm_irq |= KVM_ARM_IRQ_CPU_FIQ; -+ irq_id = KVM_ARM_IRQ_CPU_FIQ; - linestate_bit = CPU_INTERRUPT_FIQ; - break; - default: -@@ -582,9 +582,7 @@ static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level) - } else { - env->irq_line_state &= ~linestate_bit; - } -- -- kvm_irq |= cs->cpu_index << KVM_ARM_IRQ_VCPU_SHIFT; -- kvm_set_irq(kvm_state, kvm_irq, level ? 1 : 0); -+ kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level); - #endif - } - -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 79a79f01..f60185ad 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -728,6 +728,18 @@ int kvm_arm_vgic_probe(void) - } - } - -+int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level) -+{ -+ int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq; -+ int cpu_idx1 = cpu % 256; -+ int cpu_idx2 = cpu / 256; -+ -+ kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) | -+ (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT); -+ -+ return kvm_set_irq(kvm_state, kvm_irq, !!level); -+} -+ - int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, - uint64_t address, uint32_t data, PCIDevice *dev) - { -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 2a07333c..a9f3ccab 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -235,6 +235,7 @@ int kvm_arm_vgic_probe(void); - - void kvm_arm_pmu_set_irq(CPUState *cs, int irq); - void kvm_arm_pmu_init(CPUState *cs); -+int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); - - #else - --- -2.19.1 - diff --git a/intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch b/intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch deleted file mode 100644 index 30175fb5126a8a9b7138c206365b61c96bcddaf0..0000000000000000000000000000000000000000 --- a/intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch +++ /dev/null @@ -1,357 +0,0 @@ -From 0a75312c069d89be94bcaa688429d8f60a0c528b Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 13:15:35 +0800 -Subject: [PATCH] intc/gicv3: Add pre-sizing capability to GICv3 - -Currently GICv3 supports fixed smp_cpus CPUs, and all CPUs are -present always. Now we want to pre-sizing GICv3 to support max_cpus -CPUs and not all of them are present always, so some sizing codes -should be concerned. - -GIC irqs, GICR and GICC are pre-created for all possible CPUs at -start, but only smp_cpus CPUs are realize and irqs of smp_cpus CPUs -are connected. - -Other code changes are mainly for arm_gicv3, and we do little about -kvm_arm_gicv3 becasue KVM will deal with the sizing information properly. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/arm/virt.c | 17 +++++++++++---- - hw/intc/arm_gicv3.c | 43 +++++++++++++++++++++++++------------- - hw/intc/arm_gicv3_common.c | 23 ++++++++++++++++++-- - hw/intc/arm_gicv3_cpuif.c | 4 ++++ - hw/intc/arm_gicv3_kvm.c | 28 ++++++++++++++++++++++++- - include/hw/arm/virt.h | 3 ++- - 6 files changed, 96 insertions(+), 22 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 55d403bad6..dda22194b5 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -761,14 +761,19 @@ static void create_gic(VirtMachineState *vms) - SysBusDevice *gicbusdev; - const char *gictype; - int type = vms->gic_version, i; -+ /* The max number of CPUs suppored by GIC */ -+ unsigned int num_cpus = ms->smp.cpus; -+ /* The number of CPUs present before boot */ - unsigned int smp_cpus = ms->smp.cpus; - uint32_t nb_redist_regions = 0; - -+ assert(num_cpus >= smp_cpus); -+ - gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); - - vms->gic = qdev_create(NULL, gictype); - qdev_prop_set_uint32(vms->gic, "revision", type); -- qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); -+ qdev_prop_set_uint32(vms->gic, "num-cpu", num_cpus); - /* Note that the num-irq property counts both internal and external - * interrupts; there are always 32 of the former (mandated by GIC spec). - */ -@@ -780,7 +785,7 @@ static void create_gic(VirtMachineState *vms) - if (type == 3) { - uint32_t redist0_capacity = - vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; -- uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); -+ uint32_t redist0_count = MIN(num_cpus, redist0_capacity); - - nb_redist_regions = virt_gicv3_redist_region_count(vms); - -@@ -793,7 +798,7 @@ static void create_gic(VirtMachineState *vms) - vms->memmap[VIRT_HIGH_GIC_REDIST2].size / GICV3_REDIST_SIZE; - - qdev_prop_set_uint32(vms->gic, "redist-region-count[1]", -- MIN(smp_cpus - redist0_count, redist1_capacity)); -+ MIN(num_cpus - redist0_count, redist1_capacity)); - } - } else { - if (!kvm_irqchip_in_kernel()) { -@@ -820,7 +825,11 @@ static void create_gic(VirtMachineState *vms) - - /* Wire the outputs from each CPU's generic timer and the GICv3 - * maintenance interrupt signal to the appropriate GIC PPI inputs, -- * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. -+ * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's -+ * inputs. -+ * -+ * The irqs of remaining CPUs (if we has) will be connected during -+ * hotplugging. - */ - for (i = 0; i < smp_cpus; i++) { - connect_gic_cpu_irqs(vms, i); -diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c -index cacef26546..a60185113f 100644 ---- a/hw/intc/arm_gicv3.c -+++ b/hw/intc/arm_gicv3.c -@@ -20,6 +20,7 @@ - #include "qemu/module.h" - #include "hw/sysbus.h" - #include "hw/intc/arm_gicv3.h" -+#include "qom/cpu.h" - #include "gicv3_internal.h" - - static bool irqbetter(GICv3CPUState *cs, int irq, uint8_t prio) -@@ -206,7 +207,9 @@ static void gicv3_update_noirqset(GICv3State *s, int start, int len) - assert(len > 0); - - for (i = 0; i < s->num_cpu; i++) { -- s->cpu[i].seenbetter = false; -+ if (qemu_get_cpu(i)) { -+ s->cpu[i].seenbetter = false; -+ } - } - - /* Find the highest priority pending interrupt in this range. */ -@@ -248,16 +251,18 @@ static void gicv3_update_noirqset(GICv3State *s, int start, int len) - * now be the new best one). - */ - for (i = 0; i < s->num_cpu; i++) { -- GICv3CPUState *cs = &s->cpu[i]; -+ if (qemu_get_cpu(i)) { -+ GICv3CPUState *cs = &s->cpu[i]; - -- if (cs->seenbetter) { -- cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq); -- } -+ if (cs->seenbetter) { -+ cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq); -+ } - -- if (!cs->seenbetter && cs->hppi.prio != 0xff && -- cs->hppi.irq >= start && cs->hppi.irq < start + len) { -- gicv3_full_update_noirqset(s); -- break; -+ if (!cs->seenbetter && cs->hppi.prio != 0xff && -+ cs->hppi.irq >= start && cs->hppi.irq < start + len) { -+ gicv3_full_update_noirqset(s); -+ break; -+ } - } - } - } -@@ -268,7 +273,9 @@ void gicv3_update(GICv3State *s, int start, int len) - - gicv3_update_noirqset(s, start, len); - for (i = 0; i < s->num_cpu; i++) { -- gicv3_cpuif_update(&s->cpu[i]); -+ if (qemu_get_cpu(i)) { -+ gicv3_cpuif_update(&s->cpu[i]); -+ } - } - } - -@@ -280,7 +287,9 @@ void gicv3_full_update_noirqset(GICv3State *s) - int i; - - for (i = 0; i < s->num_cpu; i++) { -- s->cpu[i].hppi.prio = 0xff; -+ if (qemu_get_cpu(i)) { -+ s->cpu[i].hppi.prio = 0xff; -+ } - } - - /* Note that we can guarantee that these functions will not -@@ -291,7 +300,9 @@ void gicv3_full_update_noirqset(GICv3State *s) - gicv3_update_noirqset(s, GIC_INTERNAL, s->num_irq - GIC_INTERNAL); - - for (i = 0; i < s->num_cpu; i++) { -- gicv3_redist_update_noirqset(&s->cpu[i]); -+ if (qemu_get_cpu(i)) { -+ gicv3_redist_update_noirqset(&s->cpu[i]); -+ } - } - } - -@@ -304,7 +315,9 @@ void gicv3_full_update(GICv3State *s) - - gicv3_full_update_noirqset(s); - for (i = 0; i < s->num_cpu; i++) { -- gicv3_cpuif_update(&s->cpu[i]); -+ if (qemu_get_cpu(i)) { -+ gicv3_cpuif_update(&s->cpu[i]); -+ } - } - } - -@@ -401,7 +414,9 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) - } - - for (i = 0; i < s->num_cpu; i++) { -- gicv3_cpu_realize(s, i); -+ if (qemu_get_cpu(i)) { -+ gicv3_cpu_realize(s, i); -+ } - } - } - -diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c -index 8740a52c9f..913bf068be 100644 ---- a/hw/intc/arm_gicv3_common.c -+++ b/hw/intc/arm_gicv3_common.c -@@ -24,10 +24,12 @@ - #include "qemu/osdep.h" - #include "qapi/error.h" - #include "qemu/module.h" -+#include "qemu/error-report.h" - #include "qom/cpu.h" - #include "hw/intc/arm_gicv3_common.h" - #include "gicv3_internal.h" - #include "hw/arm/linux-boot-if.h" -+#include "hw/boards.h" - #include "sysemu/kvm.h" - - -@@ -363,10 +365,15 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) - for (i = 0; i < s->num_cpu; i++) { - CPUState *cpu = qemu_get_cpu(i); - -+ MachineState *ms = MACHINE(qdev_get_machine()); -+ MachineClass *mc = MACHINE_GET_CLASS(ms); -+ const CPUArchIdList *possible_cpus = NULL; - uint64_t cpu_affid; - int last; - -- arm_gicv3_common_cpu_realize(s, i); -+ if (cpu) { -+ arm_gicv3_common_cpu_realize(s, i); -+ } - - /* Pre-construct the GICR_TYPER: - * For our implementation: -@@ -380,7 +387,19 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) - * VLPIS == 0 (virtual LPIs not supported) - * PLPIS == 0 (physical LPIs not supported) - */ -- cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); -+ if (cpu) { -+ cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); -+ } else { -+ if (!mc->possible_cpu_arch_ids) { -+ error_report("MachineClass must implement possible_cpu_arch_ids " -+ "hook to support pre-sizing GICv3"); -+ exit(1); -+ } -+ -+ possible_cpus = mc->possible_cpu_arch_ids(ms); -+ cpu_affid = possible_cpus->cpus[i].arch_id; -+ } -+ - last = (i == s->num_cpu - 1); - - /* The CPU mp-affinity property is in MPIDR register format; squash -diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c -index 56aa5efede..a20aa693ea 100644 ---- a/hw/intc/arm_gicv3_cpuif.c -+++ b/hw/intc/arm_gicv3_cpuif.c -@@ -1648,6 +1648,10 @@ static void icc_generate_sgi(CPUARMState *env, GICv3CPUState *cs, - aff, targetlist); - - for (i = 0; i < s->num_cpu; i++) { -+ if (!qemu_get_cpu(i)) { -+ continue; -+ } -+ - GICv3CPUState *ocs = &s->cpu[i]; - - if (irm) { -diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c -index f8d7be5479..8eea7c9dd9 100644 ---- a/hw/intc/arm_gicv3_kvm.c -+++ b/hw/intc/arm_gicv3_kvm.c -@@ -341,6 +341,10 @@ static void kvm_arm_gicv3_put(GICv3State *s) - for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { - GICv3CPUState *c = &s->cpu[ncpu]; - -+ if (!qemu_get_cpu(ncpu)) { -+ continue; -+ } -+ - reg64 = c->gicr_propbaser; - regl = (uint32_t)reg64; - kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, true); -@@ -366,6 +370,10 @@ static void kvm_arm_gicv3_put(GICv3State *s) - for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { - GICv3CPUState *c = &s->cpu[ncpu]; - -+ if (!qemu_get_cpu(ncpu)) { -+ continue; -+ } -+ - reg = c->gicr_ctlr; - kvm_gicr_access(s, GICR_CTLR, ncpu, ®, true); - -@@ -462,6 +470,10 @@ static void kvm_arm_gicv3_put(GICv3State *s) - GICv3CPUState *c = &s->cpu[ncpu]; - int num_pri_bits; - -+ if (!qemu_get_cpu(ncpu)) { -+ continue; -+ } -+ - kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); - kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, - &c->icc_ctlr_el1[GICV3_NS], true); -@@ -525,6 +537,10 @@ static void kvm_arm_gicv3_get(GICv3State *s) - /* Redistributor state (one per CPU) */ - - for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { -+ if (!qemu_get_cpu(ncpu)) { -+ continue; -+ } -+ - GICv3CPUState *c = &s->cpu[ncpu]; - - kvm_gicr_access(s, GICR_CTLR, ncpu, ®, false); -@@ -560,6 +576,10 @@ static void kvm_arm_gicv3_get(GICv3State *s) - - if (redist_typer & GICR_TYPER_PLPIS) { - for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { -+ if (!qemu_get_cpu(ncpu)) { -+ continue; -+ } -+ - GICv3CPUState *c = &s->cpu[ncpu]; - - kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, false); -@@ -613,6 +633,10 @@ static void kvm_arm_gicv3_get(GICv3State *s) - */ - - for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { -+ if (!qemu_get_cpu(ncpu)) { -+ continue; -+ } -+ - GICv3CPUState *c = &s->cpu[ncpu]; - int num_pri_bits; - -@@ -806,7 +830,9 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) - } - - for (i = 0; i < s->num_cpu; i++) { -- kvm_arm_gicv3_cpu_realize(s, i); -+ if (qemu_get_cpu(i)) { -+ kvm_arm_gicv3_cpu_realize(s, i); -+ } - } - - /* Try to create the device via the device control API */ -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 6880ebe07c..beef4c8002 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -168,8 +168,9 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) - vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; - - assert(vms->gic_version == 3); -+ GICv3State *s = ARM_GICV3_COMMON(vms->gic); - -- return vms->smp_cpus > redist0_capacity ? 2 : 1; -+ return s->num_cpu > redist0_capacity ? 2 : 1; - } - - #endif /* QEMU_ARM_VIRT_H */ --- -2.19.1 diff --git a/intc-gicv3-Fixes-for-vcpu-hotplug.patch b/intc-gicv3-Fixes-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..5241a57ddaccffa65e13ff8defd9ffbad9750bc1 --- /dev/null +++ b/intc-gicv3-Fixes-for-vcpu-hotplug.patch @@ -0,0 +1,70 @@ +From 343b61303152b06f9e1ba6d09a405faeaa3fcc98 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:12:58 +0800 +Subject: [PATCH] intc/gicv3: Fixes for vcpu hotplug + +1. Some types of machine don't support possible_cpus +callback. +2. The cpu_update_notifier is register only when machine +support vcpu hotplug, so do notifier_remove() unconditi- +onally is wrong. + +Signed-off-by: Keqian Zhu +--- + cpu-common.c | 4 ++++ + hw/intc/arm_gicv3_common.c | 9 +++++++-- + 2 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/cpu-common.c b/cpu-common.c +index da52e45760..54e63b3f77 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -113,6 +113,10 @@ CPUState *qemu_get_possible_cpu(int index) + MachineState *ms = MACHINE(qdev_get_machine()); + const CPUArchIdList *possible_cpus = ms->possible_cpus; + ++ if (possible_cpus == NULL) { ++ return qemu_get_cpu(index); ++ } ++ + assert((index >= 0) && (index < possible_cpus->len)); + + return CPU(possible_cpus->cpus[index].cpu); +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index d051024a30..5667d9f40b 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -25,6 +25,7 @@ + #include "qapi/error.h" + #include "qemu/module.h" + #include "qemu/error-report.h" ++#include "hw/boards.h" + #include "hw/core/cpu.h" + #include "hw/intc/arm_gicv3_common.h" + #include "hw/qdev-properties.h" +@@ -446,7 +447,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu = g_new0(GICv3CPUState, s->num_cpu); + + for (i = 0; i < s->num_cpu; i++) { +- CPUState *cpu = qemu_get_possible_cpu(i); ++ CPUState *cpu = qemu_get_possible_cpu(i) ? : qemu_get_cpu(i); + uint64_t cpu_affid; + + if (qemu_enabled_cpu(cpu)) { +@@ -506,8 +507,12 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + static void arm_gicv3_finalize(Object *obj) + { + GICv3State *s = ARM_GICV3_COMMON(obj); ++ Object *ms = qdev_get_machine(); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + +- notifier_remove(&s->cpu_update_notifier); ++ if (mc->has_hotpluggable_cpus) { ++ notifier_remove(&s->cpu_update_notifier); ++ } + g_free(s->redist_region_count); + } + +-- +2.27.0 + diff --git a/intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch b/intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch deleted file mode 100644 index 5232d3f2aeda4d3a8a83a725173eaff72d2af8d5..0000000000000000000000000000000000000000 --- a/intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch +++ /dev/null @@ -1,50 +0,0 @@ -From a7391f391336024986a5997e3beae8882c983ed0 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 12:55:17 +0800 -Subject: [PATCH] intc/gicv3_common: Factor out arm_gicv3_common_cpu_realize - -The CPU object of hotplugged CPU will be defer-created (during -hotplug session), so we must factor out realization code to let -it can be applied to individual CPU. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/intc/arm_gicv3_common.c | 15 +++++++++++---- - 1 file changed, 11 insertions(+), 4 deletions(-) - -diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c -index 5edabb928f..798f295d7c 100644 ---- a/hw/intc/arm_gicv3_common.c -+++ b/hw/intc/arm_gicv3_common.c -@@ -303,6 +303,16 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, - } - } - -+static void arm_gicv3_common_cpu_realize(GICv3State *s, int ncpu) -+{ -+ CPUState *cpu = qemu_get_cpu(ncpu); -+ -+ s->cpu[ncpu].cpu = cpu; -+ s->cpu[ncpu].gic = s; -+ /* Store GICv3CPUState in CPUARMState gicv3state pointer */ -+ gicv3_set_gicv3state(cpu, &s->cpu[ncpu]); -+} -+ - static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) - { - GICv3State *s = ARM_GICV3_COMMON(dev); -@@ -350,10 +360,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) - uint64_t cpu_affid; - int last; - -- s->cpu[i].cpu = cpu; -- s->cpu[i].gic = s; -- /* Store GICv3CPUState in CPUARMState gicv3state pointer */ -- gicv3_set_gicv3state(cpu, &s->cpu[i]); -+ arm_gicv3_common_cpu_realize(s, i); - - /* Pre-construct the GICR_TYPER: - * For our implementation: --- -2.19.1 diff --git a/intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch b/intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch deleted file mode 100644 index 6af9a8f4f55fa4ce936c9d5898cd5c232abcaa9a..0000000000000000000000000000000000000000 --- a/intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch +++ /dev/null @@ -1,45 +0,0 @@ -From f45964c7e0df4ef17457a9ea92bfd255064139e1 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Fri, 10 Apr 2020 12:49:12 +0800 -Subject: [PATCH] intc/kvm_gicv3: Factor out kvm_arm_gicv3_cpu_realize - -The CPU object of hotplugged CPU will be defer-created (during -hotplug session), so we must factor out realization code to let -it can be applied to individual CPU. - -Signed-off-by: Keqian Zhu -Signed-off-by: Salil Mehta ---- - hw/intc/arm_gicv3_kvm.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c -index b1e74147ba..b2936938cb 100644 ---- a/hw/intc/arm_gicv3_kvm.c -+++ b/hw/intc/arm_gicv3_kvm.c -@@ -761,6 +761,12 @@ static void vm_change_state_handler(void *opaque, int running, - } - } - -+static void kvm_arm_gicv3_cpu_realize(GICv3State *s, int ncpu) -+{ -+ ARMCPU *cpu = ARM_CPU(qemu_get_cpu(ncpu)); -+ -+ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); -+} - - static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) - { -@@ -791,9 +797,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) - } - - for (i = 0; i < s->num_cpu; i++) { -- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); -- -- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); -+ kvm_arm_gicv3_cpu_realize(s, i); - } - - /* Try to create the device via the device control API */ --- -2.19.1 diff --git a/intel_iommu-Check-compatibility-with-host-IOMMU-capa.patch b/intel_iommu-Check-compatibility-with-host-IOMMU-capa.patch new file mode 100644 index 0000000000000000000000000000000000000000..140639c98083bee64c248ad3272488ce180ab50a --- /dev/null +++ b/intel_iommu-Check-compatibility-with-host-IOMMU-capa.patch @@ -0,0 +1,70 @@ +From 4ef1b086272552378c09356b0e9fd2548a27a621 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:43 +0800 +Subject: [PATCH] intel_iommu: Check compatibility with host IOMMU capabilities + +If check fails, host device (either VFIO or VDPA device) is not +compatible with current vIOMMU config and should not be passed to +guest. + +Only aw_bits is checked for now, we don't care about other caps +before scalable modern mode is introduced. + +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/i386/intel_iommu.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index bdc14f8438..60d86e0cb6 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -3838,6 +3838,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, + return vtd_dev_as; + } + ++static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, ++ Error **errp) ++{ ++ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); ++ int ret; ++ ++ if (!hiodc->get_cap) { ++ error_setg(errp, ".get_cap() not implemented"); ++ return false; ++ } ++ ++ /* Common checks */ ++ ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp); ++ if (ret < 0) { ++ return false; ++ } ++ if (s->aw_bits > ret) { ++ error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret); ++ return false; ++ } ++ ++ return true; ++} ++ + static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) + { +@@ -3858,6 +3882,11 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + return false; + } + ++ if (!vtd_check_hiod(s, hiod, errp)) { ++ vtd_iommu_unlock(s); ++ return false; ++ } ++ + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; +-- +2.41.0.windows.1 + diff --git a/intel_iommu-Extract-out-vtd_cap_init-to-initialize-c.patch b/intel_iommu-Extract-out-vtd_cap_init-to-initialize-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1c4d3ec067976b7e3ca49ef60d3b1a1b2568463 --- /dev/null +++ b/intel_iommu-Extract-out-vtd_cap_init-to-initialize-c.patch @@ -0,0 +1,142 @@ +From a051e4349316d7065c9418de691787edae8e7f4e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:41 +0800 +Subject: [PATCH] intel_iommu: Extract out vtd_cap_init() to initialize + cap/ecap + +Extract cap/ecap initialization in vtd_cap_init() to make code +cleaner. + +No functional change intended. + +Reviewed-by: Eric Auger +Signed-off-by: Zhenzhong Duan +Reviewed-by: Michael S. Tsirkin +--- + hw/i386/intel_iommu.c | 93 ++++++++++++++++++++++++------------------- + 1 file changed, 51 insertions(+), 42 deletions(-) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 3da56e439e..6716407b7a 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -3935,30 +3935,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) + return; + } + +-/* Do the initialization. It will also be called when reset, so pay +- * attention when adding new initialization stuff. +- */ +-static void vtd_init(IntelIOMMUState *s) ++static void vtd_cap_init(IntelIOMMUState *s) + { + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + +- memset(s->csr, 0, DMAR_REG_SIZE); +- memset(s->wmask, 0, DMAR_REG_SIZE); +- memset(s->w1cmask, 0, DMAR_REG_SIZE); +- memset(s->womask, 0, DMAR_REG_SIZE); +- +- s->root = 0; +- s->root_scalable = false; +- s->dmar_enabled = false; +- s->intr_enabled = false; +- s->iq_head = 0; +- s->iq_tail = 0; +- s->iq = 0; +- s->iq_size = 0; +- s->qi_enabled = false; +- s->iq_last_desc_type = VTD_INV_DESC_NONE; +- s->iq_dw = false; +- s->next_frcd_reg = 0; + s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | + VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | + VTD_CAP_MGAW(s->aw_bits); +@@ -3975,27 +3955,6 @@ static void vtd_init(IntelIOMMUState *s) + } + s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + +- /* +- * Rsvd field masks for spte +- */ +- vtd_spte_rsvd[0] = ~0ULL; +- vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, +- x86_iommu->dt_supported); +- vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); +- vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); +- vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); +- +- vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, +- x86_iommu->dt_supported); +- vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, +- x86_iommu->dt_supported); +- +- if (s->scalable_mode || s->snoop_control) { +- vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; +- vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; +- vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; +- } +- + if (x86_iommu_ir_supported(x86_iommu)) { + s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; + if (s->intr_eim == ON_OFF_AUTO_ON) { +@@ -4028,6 +3987,56 @@ static void vtd_init(IntelIOMMUState *s) + if (s->pasid) { + s->ecap |= VTD_ECAP_PASID; + } ++} ++ ++/* ++ * Do the initialization. It will also be called when reset, so pay ++ * attention when adding new initialization stuff. ++ */ ++static void vtd_init(IntelIOMMUState *s) ++{ ++ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); ++ ++ memset(s->csr, 0, DMAR_REG_SIZE); ++ memset(s->wmask, 0, DMAR_REG_SIZE); ++ memset(s->w1cmask, 0, DMAR_REG_SIZE); ++ memset(s->womask, 0, DMAR_REG_SIZE); ++ ++ s->root = 0; ++ s->root_scalable = false; ++ s->dmar_enabled = false; ++ s->intr_enabled = false; ++ s->iq_head = 0; ++ s->iq_tail = 0; ++ s->iq = 0; ++ s->iq_size = 0; ++ s->qi_enabled = false; ++ s->iq_last_desc_type = VTD_INV_DESC_NONE; ++ s->iq_dw = false; ++ s->next_frcd_reg = 0; ++ ++ vtd_cap_init(s); ++ ++ /* ++ * Rsvd field masks for spte ++ */ ++ vtd_spte_rsvd[0] = ~0ULL; ++ vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, ++ x86_iommu->dt_supported); ++ vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); ++ vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); ++ vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); ++ ++ vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, ++ x86_iommu->dt_supported); ++ vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, ++ x86_iommu->dt_supported); ++ ++ if (s->scalable_mode || s->snoop_control) { ++ vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; ++ vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; ++ vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; ++ } + + vtd_reset_caches(s); + +-- +2.41.0.windows.1 + diff --git a/intel_iommu-Implement-set-unset-_iommu_device-callba.patch b/intel_iommu-Implement-set-unset-_iommu_device-callba.patch new file mode 100644 index 0000000000000000000000000000000000000000..572540296b4a82c78f11f80f880d0ef2ff60e28b --- /dev/null +++ b/intel_iommu-Implement-set-unset-_iommu_device-callba.patch @@ -0,0 +1,160 @@ +From 5834bb1ccce592380a91a5cf127f90a031cd7cf2 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Wed, 5 Jun 2024 16:30:42 +0800 +Subject: [PATCH] intel_iommu: Implement [set|unset]_iommu_device() callbacks + +Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU. +In set call, we take a reference of HostIOMMUDevice and store it +in hash table indexed by PCI BDF. + +Note this BDF index is device's real BDF not the aliased one which +is different from the index of VTDAddressSpace. There can be multiple +assigned devices under same virtual iommu group and share same +VTDAddressSpace, but each has its own HostIOMMUDevice. + +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/i386/intel_iommu.c | 81 +++++++++++++++++++++++++++++++++++ + include/hw/i386/intel_iommu.h | 2 + + 2 files changed, 83 insertions(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 6716407b7a..bdc14f8438 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -61,6 +61,12 @@ struct vtd_as_key { + uint32_t pasid; + }; + ++/* bus/devfn is PCI device's real BDF not the aliased one */ ++struct vtd_hiod_key { ++ PCIBus *bus; ++ uint8_t devfn; ++}; ++ + struct vtd_iotlb_key { + uint64_t gfn; + uint32_t pasid; +@@ -250,6 +256,25 @@ static guint vtd_as_hash(gconstpointer v) + return (guint)(value << 8 | key->devfn); + } + ++/* Same implementation as vtd_as_hash() */ ++static guint vtd_hiod_hash(gconstpointer v) ++{ ++ return vtd_as_hash(v); ++} ++ ++static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2) ++{ ++ const struct vtd_hiod_key *key1 = v1; ++ const struct vtd_hiod_key *key2 = v2; ++ ++ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); ++} ++ ++static void vtd_hiod_destroy(gpointer v) ++{ ++ object_unref(v); ++} ++ + static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, + gpointer user_data) + { +@@ -3813,6 +3838,58 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, + return vtd_dev_as; + } + ++static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, ++ HostIOMMUDevice *hiod, Error **errp) ++{ ++ IntelIOMMUState *s = opaque; ++ struct vtd_as_key key = { ++ .bus = bus, ++ .devfn = devfn, ++ }; ++ struct vtd_as_key *new_key; ++ ++ assert(hiod); ++ ++ vtd_iommu_lock(s); ++ ++ if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { ++ error_setg(errp, "Host IOMMU device already exist"); ++ vtd_iommu_unlock(s); ++ return false; ++ } ++ ++ new_key = g_malloc(sizeof(*new_key)); ++ new_key->bus = bus; ++ new_key->devfn = devfn; ++ ++ object_ref(hiod); ++ g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod); ++ ++ vtd_iommu_unlock(s); ++ ++ return true; ++} ++ ++static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) ++{ ++ IntelIOMMUState *s = opaque; ++ struct vtd_as_key key = { ++ .bus = bus, ++ .devfn = devfn, ++ }; ++ ++ vtd_iommu_lock(s); ++ ++ if (!g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { ++ vtd_iommu_unlock(s); ++ return; ++ } ++ ++ g_hash_table_remove(s->vtd_host_iommu_dev, &key); ++ ++ vtd_iommu_unlock(s); ++} ++ + /* Unmap the whole range in the notifier's scope. */ + static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) + { +@@ -4117,6 +4194,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) + + static PCIIOMMUOps vtd_iommu_ops = { + .get_address_space = vtd_host_dma_iommu, ++ .set_iommu_device = vtd_dev_set_iommu_device, ++ .unset_iommu_device = vtd_dev_unset_iommu_device, + }; + + static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) +@@ -4240,6 +4319,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) + g_free, g_free); + s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, + g_free, g_free); ++ s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, vtd_hiod_equal, ++ g_free, vtd_hiod_destroy); + vtd_init(s); + pci_setup_iommu(bus, &vtd_iommu_ops, dev); + /* Pseudo address space under root PCI bus. */ +diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h +index 7fa0a695c8..1eb05c29fc 100644 +--- a/include/hw/i386/intel_iommu.h ++++ b/include/hw/i386/intel_iommu.h +@@ -292,6 +292,8 @@ struct IntelIOMMUState { + /* list of registered notifiers */ + QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; + ++ GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */ ++ + /* interrupt remapping */ + bool intr_enabled; /* Whether guest enabled IR */ + dma_addr_t intr_root; /* Interrupt remapping table pointer */ +-- +2.41.0.windows.1 + diff --git a/intel_iommu-Send-IQE-event-when-setting-reserved-bit.patch b/intel_iommu-Send-IQE-event-when-setting-reserved-bit.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1e8e093dbc0639de5dac64db89e881dc33cbbe1 --- /dev/null +++ b/intel_iommu-Send-IQE-event-when-setting-reserved-bit.patch @@ -0,0 +1,39 @@ +From 66eb68e54a521bc0dac015415a9eca25fe479543 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 4 Nov 2024 20:55:34 +0800 +Subject: [PATCH] intel_iommu: Send IQE event when setting reserved bit in + IQT_TAIL + +According to VTD spec, Figure 11-22, Invalidation Queue Tail Register, +"When Descriptor Width (DW) field in Invalidation Queue Address Register +(IQA_REG) is Set (256-bit descriptors), hardware treats bit-4 as reserved +and a value of 1 in the bit will result in invalidation queue error." + +Current code missed to send IQE event to guest, fix it. + +Fixes: c0c1d351849b ("intel_iommu: add 256 bits qi_desc support") +Suggested-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Message-Id: <20241104125536.1236118-2-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Zhongrui Tang +--- + hw/i386/intel_iommu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 5085a6fee3..3da56e439e 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -2813,6 +2813,7 @@ static void vtd_handle_iqt_write(IntelIOMMUState *s) + if (s->iq_dw && (val & VTD_IQT_QT_256_RSV_BIT)) { + error_report_once("%s: RSV bit is set: val=0x%"PRIx64, + __func__, val); ++ vtd_handle_inv_queue_error(s); + return; + } + s->iq_tail = VTD_IQT_QT(s->iq_dw, val); +-- +2.41.0.windows.1 + diff --git a/io-Don-t-use-flag-of-printf-format.patch b/io-Don-t-use-flag-of-printf-format.patch deleted file mode 100644 index 61f3b71dda336dbc2c833b323880f7dd3be09dec..0000000000000000000000000000000000000000 --- a/io-Don-t-use-flag-of-printf-format.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 0aa003cd0e117cb160da7d4b6e50630bf2fedfd6 Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Mon, 19 Oct 2020 20:12:02 +0800 -Subject: [PATCH] io: Don't use '#' flag of printf format -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: AlexChen -Signed-off-by: Daniel P. Berrangé -(cherry-picked from commit 77b7829e75) ---- - io/channel-websock.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/io/channel-websock.c b/io/channel-websock.c -index fc36d44eba..d48a929e49 100644 ---- a/io/channel-websock.c -+++ b/io/channel-websock.c -@@ -734,7 +734,7 @@ static int qio_channel_websock_decode_header(QIOChannelWebsock *ioc, - opcode != QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE && - opcode != QIO_CHANNEL_WEBSOCK_OPCODE_PING && - opcode != QIO_CHANNEL_WEBSOCK_OPCODE_PONG) { -- error_setg(errp, "unsupported opcode: %#04x; only binary, close, " -+ error_setg(errp, "unsupported opcode: 0x%04x; only binary, close, " - "ping, and pong websocket frames are supported", opcode); - qio_channel_websock_write_close( - ioc, QIO_CHANNEL_WEBSOCK_STATUS_INVALID_DATA , --- -2.27.0 - diff --git a/iommu-Introduce-generic-header.patch b/iommu-Introduce-generic-header.patch deleted file mode 100644 index 76e0c0c80ff83bfd8a5f0130ca73c0623e0efc35..0000000000000000000000000000000000000000 --- a/iommu-Introduce-generic-header.patch +++ /dev/null @@ -1,53 +0,0 @@ -From e8055075dbbc932afccc1f18f4acc093fe9e4dc3 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 9 Jul 2019 12:20:12 +0200 -Subject: [PATCH] iommu: Introduce generic header - -This header is meant to exposes data types used by -several IOMMU devices such as struct for SVA and -nested stage configuration. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - include/hw/iommu/iommu.h | 28 ++++++++++++++++++++++++++++ - 1 file changed, 28 insertions(+) - create mode 100644 include/hw/iommu/iommu.h - -diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h -new file mode 100644 -index 0000000000..12092bda7b ---- /dev/null -+++ b/include/hw/iommu/iommu.h -@@ -0,0 +1,28 @@ -+/* -+ * common header for iommu devices -+ * -+ * Copyright Red Hat, Inc. 2019 -+ * -+ * Authors: -+ * Eric Auger -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. See -+ * the COPYING file in the top-level directory. -+ */ -+ -+#ifndef QEMU_HW_IOMMU_IOMMU_H -+#define QEMU_HW_IOMMU_IOMMU_H -+#ifdef __linux__ -+#include -+#endif -+ -+typedef struct IOMMUConfig { -+ union { -+#ifdef __linux__ -+ struct iommu_pasid_table_config pasid_cfg; -+#endif -+ }; -+} IOMMUConfig; -+ -+ -+#endif /* QEMU_HW_IOMMU_IOMMU_H */ --- -2.27.0 - diff --git a/iommufd.h-Updated-to-openeuler-olk-6.6-kernel.patch b/iommufd.h-Updated-to-openeuler-olk-6.6-kernel.patch new file mode 100644 index 0000000000000000000000000000000000000000..fab0bfa658275ca79de924aba7326d90d19bce71 --- /dev/null +++ b/iommufd.h-Updated-to-openeuler-olk-6.6-kernel.patch @@ -0,0 +1,90 @@ +From 8414bc02f988ecca7dda5325227ff5ffbe45150c Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 15 Jan 2025 10:02:58 +0000 +Subject: [PATCH] iommufd.h: Updated to openeuler olk-6.6 kernel + +Signed-off-by: Shameer Kolothum +--- + linux-headers/linux/iommufd.h | 26 ++++++++++++-------------- + 1 file changed, 12 insertions(+), 14 deletions(-) + +diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h +index 41559c6064..3e57fee01c 100644 +--- a/linux-headers/linux/iommufd.h ++++ b/linux-headers/linux/iommufd.h +@@ -51,8 +51,8 @@ enum { + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, + IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, +- IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f, +- IOMMUFD_CMD_VDEVICE_ALLOC = 0x90, ++ IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, ++ IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, + }; + + /** +@@ -397,18 +397,20 @@ struct iommu_hwpt_vtd_s1 { + }; + + /** +- * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 Context Descriptor Table info ++ * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE + * (IOMMU_HWPT_DATA_ARM_SMMUV3) + * + * @ste: The first two double words of the user space Stream Table Entry for +- * a user stage-1 Context Descriptor Table. Must be little-endian. ++ * the translation. Must be little-endian. + * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) + * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax + * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * + * -EIO will be returned if @ste is not legal or contains any non-allowed field. + * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass +- * nested domain will translate the same as the nesting parent. ++ * nested domain will translate the same as the nesting parent. The S1 will ++ * install a Context Descriptor Table pointing at userspace memory translated ++ * by the nesting parent. + */ + struct iommu_hwpt_arm_smmuv3 { + __aligned_le64 ste[2]; +@@ -920,8 +922,8 @@ enum iommu_viommu_type { + * that is unique to a specific VM. Operations global to the IOMMU are connected + * to the vIOMMU, such as: + * - Security namespace for guest owned ID, e.g. guest-controlled cache tags ++ * - Non-device-affiliated event reporting, e.g. invalidation queue errors + * - Access to a sharable nesting parent pagetable across physical IOMMUs +- * - Non-affiliated event reporting (e.g. an invalidation queue error) + * - Virtualization of various platforms IDs, e.g. RIDs and others + * - Delivery of paravirtualized invalidation + * - Direct assigned invalidation queues +@@ -941,12 +943,10 @@ struct iommu_viommu_alloc { + * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) + * @size: sizeof(struct iommu_vdevice_alloc) + * @viommu_id: vIOMMU ID to associate with the virtual device +- * @dev_id: The pyhsical device to allocate a virtual instance on the vIOMMU +- * @__reserved: Must be 0 ++ * @dev_id: The physical device to allocate a virtual instance on the vIOMMU ++ * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY + * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID +- * of AMD IOMMU, and vID of a nested Intel VT-d to a Context Table. +- * @out_vdevice_id: Output virtual instance ID for the allocated object +- * @__reserved2: Must be 0 ++ * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table + * + * Allocate a virtual device instance (for a physical device) against a vIOMMU. + * This instance holds the device's information (related to its vIOMMU) in a VM. +@@ -955,10 +955,8 @@ struct iommu_vdevice_alloc { + __u32 size; + __u32 viommu_id; + __u32 dev_id; +- __u32 __reserved; +- __aligned_u64 virt_id; + __u32 out_vdevice_id; +- __u32 __reserved2; ++ __aligned_u64 virt_id; + }; + #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) + #endif +-- +2.41.0.windows.1 + diff --git a/iotests-143-Create-socket-in-SOCK_DIR.patch b/iotests-143-Create-socket-in-SOCK_DIR.patch deleted file mode 100644 index 31d6a8421e46d181deb1e7c6792f78546a3d873e..0000000000000000000000000000000000000000 --- a/iotests-143-Create-socket-in-SOCK_DIR.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 2e8fecd9e963c740cfe73d0de4491541423e185f Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Thu, 17 Oct 2019 15:31:40 +0200 -Subject: [PATCH] iotests/143: Create socket in $SOCK_DIR - -Signed-off-by: Max Reitz -Reviewed-by: Eric Blake -Reviewed-by: Thomas Huth -Message-id: 20191017133155.5327-9-mreitz@redhat.com -Signed-off-by: Max Reitz ---- - tests/qemu-iotests/143 | 6 +++--- - tests/qemu-iotests/143.out | 2 +- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 -index 92249ac8da..f649b36195 100755 ---- a/tests/qemu-iotests/143 -+++ b/tests/qemu-iotests/143 -@@ -29,7 +29,7 @@ status=1 # failure is the default! - _cleanup() - { - _cleanup_qemu -- rm -f "$TEST_DIR/nbd" -+ rm -f "$SOCK_DIR/nbd" - } - trap "_cleanup; exit \$status" 0 1 2 3 15 - -@@ -51,12 +51,12 @@ _send_qemu_cmd $QEMU_HANDLE \ - _send_qemu_cmd $QEMU_HANDLE \ - "{ 'execute': 'nbd-server-start', - 'arguments': { 'addr': { 'type': 'unix', -- 'data': { 'path': '$TEST_DIR/nbd' }}}}" \ -+ 'data': { 'path': '$SOCK_DIR/nbd' }}}}" \ - 'return' - - # This should just result in a client error, not in the server crashing - $QEMU_IO_PROG -f raw -c quit \ -- "nbd+unix:///no_such_export?socket=$TEST_DIR/nbd" 2>&1 \ -+ "nbd+unix:///no_such_export?socket=$SOCK_DIR/nbd" 2>&1 \ - | _filter_qemu_io | _filter_nbd - - _send_qemu_cmd $QEMU_HANDLE \ -diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out -index ee71b5aa42..037d34a409 100644 ---- a/tests/qemu-iotests/143.out -+++ b/tests/qemu-iotests/143.out -@@ -1,7 +1,7 @@ - QA output created by 143 - {"return": {}} - {"return": {}} --qemu-io: can't open device nbd+unix:///no_such_export?socket=TEST_DIR/nbd: Requested export not available -+qemu-io: can't open device nbd+unix:///no_such_export?socket=SOCK_DIR/nbd: Requested export not available - server reported: export 'no_such_export' not present - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --- -2.27.0 - diff --git a/iotests-244-Don-t-store-data-file-with-protocol-in-i.patch b/iotests-244-Don-t-store-data-file-with-protocol-in-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..604f10a482f83dbfd3582d438a918971236a7b8f --- /dev/null +++ b/iotests-244-Don-t-store-data-file-with-protocol-in-i.patch @@ -0,0 +1,52 @@ +From 905b918d99f2b60834b55f24738728ce9972ea29 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 +Subject: [PATCH] iotests/244: Don't store data-file with protocol in image + (CVE-2024-4467) + +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +--- + tests/qemu-iotests/244 | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 +index 3e61fa25bb..bb9cc6512f 100755 +--- a/tests/qemu-iotests/244 ++++ b/tests/qemu-iotests/244 +@@ -215,9 +215,22 @@ $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" + $QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" + + # blkdebug doesn't support copy offloading, so this tests the error path +-$QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG" +-$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" +-$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" ++test_img_with_blkdebug="json:{ ++ 'driver': 'qcow2', ++ 'file': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'blkdebug', ++ 'image': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG.data' ++ } ++ } ++}" ++$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$test_img_with_blkdebug" ++$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$test_img_with_blkdebug" + + echo + echo "=== Flushing should flush the data file ===" +-- +2.41.0.windows.1 + diff --git a/iotests-270-Don-t-store-data-file-with-json-prefix-i.patch b/iotests-270-Don-t-store-data-file-with-json-prefix-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..766ee8b1a058688f2a7a47056f94e33ddd993089 --- /dev/null +++ b/iotests-270-Don-t-store-data-file-with-json-prefix-i.patch @@ -0,0 +1,54 @@ +From db48de0be2e1f4b476ffcaa94a4bd2c4b222f077 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 +Subject: [PATCH] iotests/270: Don't store data-file with json: prefix in image + (CVE-2024-4467) + +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +--- + tests/qemu-iotests/270 | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/270 b/tests/qemu-iotests/270 +index 74352342db..c37b674aa2 100755 +--- a/tests/qemu-iotests/270 ++++ b/tests/qemu-iotests/270 +@@ -60,8 +60,16 @@ _make_test_img -o cluster_size=2M,data_file="$TEST_IMG.orig" \ + # "write" 2G of data without using any space. + # (qemu-img create does not like it, though, because null-co does not + # support image creation.) +-$QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ +- "$TEST_IMG" ++test_img_with_null_data="json:{ ++ 'driver': '$IMGFMT', ++ 'file': { ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'null-co', ++ 'size':'4294967296' ++ } ++}" + + # This gives us a range of: + # 2^31 - 512 + 768 - 1 = 2^31 + 255 > 2^31 +@@ -74,7 +82,7 @@ $QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ + # on L2 boundaries, we need large L2 tables; hence the cluster size of + # 2 MB. (Anything from 256 kB should work, though, because then one L2 + # table covers 8 GB.) +-$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$TEST_IMG" | _filter_qemu_io ++$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$test_img_with_null_data" | _filter_qemu_io + + _check_test_img + +-- +2.41.0.windows.1 + diff --git a/iotests-adapt-to-output-change-for-recently-introduc.patch b/iotests-adapt-to-output-change-for-recently-introduc.patch new file mode 100644 index 0000000000000000000000000000000000000000..85946c303baf315c19a0762442ba38951a2be831 --- /dev/null +++ b/iotests-adapt-to-output-change-for-recently-introduc.patch @@ -0,0 +1,62 @@ +From 7212ca27f0dc957f83fe29858430ee2927e0175c Mon Sep 17 00:00:00 2001 +From: root +Date: Mon, 25 Mar 2024 21:31:32 +0800 +Subject: [PATCH] =?UTF-8?q?iotests:=20adapt=20to=20output=20change=20for?= + =?UTF-8?q?=20recently=20introduced=20'detached=20hea=E2=80=A6?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 39a94d7c34ce9d222fa9c0c99a14e20a567456d7 + +…der' field + +Failure was noticed when running the tests for the qcow2 image format. + +Fixes: 0bd779e ("crypto: Introduce 'detached-header' field in QCryptoBlockInfoLUKS") +Signed-off-by: Fiona Ebner +Message-ID: <20240216101415.293769-1-f.ebner@proxmox.com> +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +Signed-off-by: Gao Jiazhen +--- + tests/qemu-iotests/198.out | 2 ++ + tests/qemu-iotests/206.out | 1 + + 2 files changed, 3 insertions(+) + +diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out +index 805494916f..62fb73fa3e 100644 +--- a/tests/qemu-iotests/198.out ++++ b/tests/qemu-iotests/198.out +@@ -39,6 +39,7 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +@@ -84,6 +85,7 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out +index 7e95694777..979f00f9bf 100644 +--- a/tests/qemu-iotests/206.out ++++ b/tests/qemu-iotests/206.out +@@ -114,6 +114,7 @@ Format specific information: + refcount bits: 16 + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha1 + cipher alg: aes-128 + uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +-- +2.27.0 + diff --git a/ip_reass-Fix-use-after-free.patch b/ip_reass-Fix-use-after-free.patch deleted file mode 100644 index b26e8afb629d7d768608fdc471a9cf754be36f7e..0000000000000000000000000000000000000000 --- a/ip_reass-Fix-use-after-free.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 63b07dfe20a0d4971b0929d27359f478ba2d816b Mon Sep 17 00:00:00 2001 -From: Samuel Thibault -Date: Fri, 22 May 2020 10:52:55 +0800 -Subject: [PATCH] ip_reass: Fix use after free - -Using ip_deq after m_free might read pointers from an allocation reuse. - -This would be difficult to exploit, but that is still related with -CVE-2019-14378 which generates fragmented IP packets that would trigger this -issue and at least produce a DoS. -Signed-off-by: Samuel Thibault's avatarSamuel Thibault - -diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index 8c75d91..c07d7d4 100644 ---- a/slirp/src/ip_input.c -+++ b/slirp/src/ip_input.c -@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - */ - while (q != (struct ipasfrag *)&fp->frag_link && - ip->ip_off + ip->ip_len > q->ipf_off) { -+ struct ipasfrag *prev; - i = (ip->ip_off + ip->ip_len) - q->ipf_off; - if (i < q->ipf_len) { - q->ipf_len -= i; -@@ -299,9 +300,10 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - m_adj(dtom(slirp, q), i); - break; - } -+ prev = q; - q = q->ipf_next; -- m_free(dtom(slirp, q->ipf_prev)); -- ip_deq(q->ipf_prev); -+ ip_deq(prev); -+ m_free(dtom(slirp, prev)); - } - - insert: --- -1.8.3.1 - diff --git a/iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch deleted file mode 100644 index e6abdf7a51d13c4a94ee722164df1fbc54ed48bb..0000000000000000000000000000000000000000 --- a/iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 693fd2acdf14dd86c0bf852610f1c2cca80a74dc Mon Sep 17 00:00:00 2001 -From: Felipe Franciosi -Date: Thu, 23 Jan 2020 12:44:59 +0000 -Subject: [PATCH] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) - -When querying an iSCSI server for the provisioning status of blocks (via -GET LBA STATUS), Qemu only validates that the response descriptor zero's -LBA matches the one requested. Given the SCSI spec allows servers to -respond with the status of blocks beyond the end of the LUN, Qemu may -have its heap corrupted by clearing/setting too many bits at the end of -its allocmap for the LUN. - -A malicious guest in control of the iSCSI server could carefully program -Qemu's heap (by selectively setting the bitmap) and then smash it. - -This limits the number of bits that iscsi_co_block_status() will try to -update in the allocmap so it can't overflow the bitmap. - -Fixes: CVE-2020-1711 -Cc: qemu-stable@nongnu.org -Signed-off-by: Felipe Franciosi -Signed-off-by: Peter Turschmid -Signed-off-by: Raphael Norwitz -Signed-off-by: Kevin Wolf - -diff --git a/block/iscsi.c b/block/iscsi.c -index 2aea7e3f13..cbd57294ab 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -701,7 +701,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - struct scsi_get_lba_status *lbas = NULL; - struct scsi_lba_status_descriptor *lbasd = NULL; - struct IscsiTask iTask; -- uint64_t lba; -+ uint64_t lba, max_bytes; - int ret; - - iscsi_co_init_iscsitask(iscsilun, &iTask); -@@ -721,6 +721,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - } - - lba = offset / iscsilun->block_size; -+ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; - - qemu_mutex_lock(&iscsilun->mutex); - retry: -@@ -764,7 +765,7 @@ retry: - goto out_unlock; - } - -- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; -+ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); - - if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || - lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { --- -2.21.1 (Apple Git-122.3) - diff --git a/json-Fix-a-memleak-in-parse_pair.patch b/json-Fix-a-memleak-in-parse_pair.patch deleted file mode 100644 index c39776e6160b48f0f5bd1834899a0d186b03eeb7..0000000000000000000000000000000000000000 --- a/json-Fix-a-memleak-in-parse_pair.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 503d231e06c159c1530a76b1740b3ec7e47619e5 Mon Sep 17 00:00:00 2001 -From: Alex Chen -Date: Fri, 13 Nov 2020 14:55:25 +0000 -Subject: [PATCH] json: Fix a memleak in parse_pair() - -In qobject_type(), NULL is returned when the 'QObject' returned from parse_value() is not of QString type, -and this 'QObject' memory will leaked. -So we need to first cache the 'QObject' returned from parse_value(), and finally -free 'QObject' memory at the end of the function. -Also, we add a testcast about invalid dict key. - -The memleak stack is as follows: -Direct leak of 32 byte(s) in 1 object(s) allocated from: - #0 0xfffe4b3c34fb in __interceptor_malloc (/lib64/libasan.so.4+0xd34fb) - #1 0xfffe4ae48aa3 in g_malloc (/lib64/libglib-2.0.so.0+0x58aa3) - #2 0xaaab3557d9f7 in qnum_from_int qemu/qobject/qnum.c:25 - #3 0xaaab35584d23 in parse_literal qemu/qobject/json-parser.c:511 - #4 0xaaab35584d23 in parse_value qemu/qobject/json-parser.c:554 - #5 0xaaab35583d77 in parse_pair qemu/qobject/json-parser.c:270 - #6 0xaaab355845db in parse_object qemu/qobject/json-parser.c:327 - #7 0xaaab355845db in parse_value qemu/qobject/json-parser.c:546 - #8 0xaaab35585b1b in json_parser_parse qemu/qobject/json-parser.c:580 - #9 0xaaab35583703 in json_message_process_token qemu/qobject/json-streamer.c:92 - #10 0xaaab355ddccf in json_lexer_feed_char qemu/qobject/json-lexer.c:313 - #11 0xaaab355de0eb in json_lexer_feed qemu/qobject/json-lexer.c:350 - #12 0xaaab354aff67 in tcp_chr_read qemu/chardev/char-socket.c:525 - #13 0xfffe4ae429db in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x529db) - #14 0xfffe4ae42d8f (/lib64/libglib-2.0.so.0+0x52d8f) - #15 0xfffe4ae430df in g_main_loop_run (/lib64/libglib-2.0.so.0+0x530df) - #16 0xaaab34d70bff in iothread_run qemu/iothread.c:82 - #17 0xaaab3559d71b in qemu_thread_start qemu/util/qemu-thread-posix.c:519 - -Fixes: 532fb5328473 ("qapi: Make more of qobject_to()") -Reported-by: Euler Robot -Signed-off-by: Alex Chen -Signed-off-by: Chen Qun -Signed-off-by: Markus Armbruster -Message-Id: <20201113145525.85151-1-alex.chen@huawei.com> -[Commit message tweaked] -(cherry-picked form commit 922d42bb) ---- - qobject/json-parser.c | 12 ++++++------ - tests/check-qjson.c | 9 +++++++++ - 2 files changed, 15 insertions(+), 6 deletions(-) - -diff --git a/qobject/json-parser.c b/qobject/json-parser.c -index 7d23e12e33..840909ea6a 100644 ---- a/qobject/json-parser.c -+++ b/qobject/json-parser.c -@@ -257,8 +257,9 @@ static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) - */ - static int parse_pair(JSONParserContext *ctxt, QDict *dict) - { -+ QObject *key_obj = NULL; -+ QString *key; - QObject *value; -- QString *key = NULL; - JSONToken *peek, *token; - - peek = parser_context_peek_token(ctxt); -@@ -267,7 +268,8 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict) - goto out; - } - -- key = qobject_to(QString, parse_value(ctxt)); -+ key_obj = parse_value(ctxt); -+ key = qobject_to(QString, key_obj); - if (!key) { - parse_error(ctxt, peek, "key is not a string in object"); - goto out; -@@ -297,13 +299,11 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict) - - qdict_put_obj(dict, qstring_get_str(key), value); - -- qobject_unref(key); -- -+ qobject_unref(key_obj); - return 0; - - out: -- qobject_unref(key); -- -+ qobject_unref(key_obj); - return -1; - } - -diff --git a/tests/check-qjson.c b/tests/check-qjson.c -index fa2afccb0a..5e3e08fe79 100644 ---- a/tests/check-qjson.c -+++ b/tests/check-qjson.c -@@ -1415,6 +1415,14 @@ static void invalid_dict_comma(void) - g_assert(obj == NULL); - } - -+static void invalid_dict_key(void) -+{ -+ Error *err = NULL; -+ QObject *obj = qobject_from_json("{32:'abc'}", &err); -+ error_free_or_abort(&err); -+ g_assert(obj == NULL); -+} -+ - static void unterminated_literal(void) - { - Error *err = NULL; -@@ -1500,6 +1508,7 @@ int main(int argc, char **argv) - g_test_add_func("/errors/unterminated/dict_comma", unterminated_dict_comma); - g_test_add_func("/errors/invalid_array_comma", invalid_array_comma); - g_test_add_func("/errors/invalid_dict_comma", invalid_dict_comma); -+ g_test_add_func("/errors/invalid_dict_key", invalid_dict_key); - g_test_add_func("/errors/unterminated/literal", unterminated_literal); - g_test_add_func("/errors/limits/nesting", limits_nesting); - g_test_add_func("/errors/multiple_values", multiple_values); --- -2.27.0 - diff --git a/kconfig-Activate-IOMMUFD-for-s390x-machines.patch b/kconfig-Activate-IOMMUFD-for-s390x-machines.patch new file mode 100644 index 0000000000000000000000000000000000000000..565563a12b35a42ce0eb186c06efc08a2930cefe --- /dev/null +++ b/kconfig-Activate-IOMMUFD-for-s390x-machines.patch @@ -0,0 +1,34 @@ +From 3dfc0dd0b59925d1b73ca1a0db6d307ae597f76e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Sat, 11 Jan 2025 10:52:56 +0800 +Subject: [PATCH] kconfig: Activate IOMMUFD for s390x machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/s390x/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig +index 4c068d7960..26ad104485 100644 +--- a/hw/s390x/Kconfig ++++ b/hw/s390x/Kconfig +@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO + imply VFIO_CCW + imply WDT_DIAG288 + imply PCIE_DEVICES ++ imply IOMMUFD + select PCI_EXPRESS + select S390_FLIC + select S390_FLIC_KVM if KVM +-- +2.41.0.windows.1 + diff --git a/kvm-Add-support-for-CSV2-reboot.patch b/kvm-Add-support-for-CSV2-reboot.patch new file mode 100644 index 0000000000000000000000000000000000000000..10b0a12555d475565df8e3ed7fafb350a0ef6589 --- /dev/null +++ b/kvm-Add-support-for-CSV2-reboot.patch @@ -0,0 +1,171 @@ +From 09934a231a513289caaae68e68912b735cb44b75 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Thu, 15 Apr 2021 08:32:24 -0400 +Subject: [PATCH] kvm: Add support for CSV2 reboot + +Linux will set vcpu.arch.guest_state_protected to true after execute +LAUNCH_UPDATE_VMSA successfully, and then KVM will prevent any changes +to VMCB State Save Area. + +In order to support CSV2 guest reboot, calls cpus_control_pre_system_reset() +to set vcpu.arch.guest_state_protected to false, and calls +cpus_control_post_system_reset() to restore VMSA of guest's vcpu with +data generated by LAUNCH_UPDATE_VMSA. + +In addition, for memory encrypted guest, additional works may be +required during system reset, such as flushing the cache. The function +cpus_control_post_system_reset() hints linux to flush caches of guest +memory. + +Signed-off-by: hanliyang +--- + accel/kvm/kvm-accel-ops.c | 3 +++ + accel/kvm/kvm-all.c | 10 ++++++++++ + accel/kvm/kvm-cpus.h | 3 +++ + include/sysemu/accel-ops.h | 3 +++ + include/sysemu/cpus.h | 2 ++ + linux-headers/linux/kvm.h | 4 ++++ + system/cpus.c | 14 ++++++++++++++ + system/runstate.c | 5 +++++ + 8 files changed, 44 insertions(+) + +diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c +index 6195150a0b..54f19028b8 100644 +--- a/accel/kvm/kvm-accel-ops.c ++++ b/accel/kvm/kvm-accel-ops.c +@@ -112,6 +112,9 @@ static void kvm_accel_ops_class_init(ObjectClass *oc, void *data) + ops->remove_breakpoint = kvm_remove_breakpoint; + ops->remove_all_breakpoints = kvm_remove_all_breakpoints; + #endif ++ ++ ops->control_pre_system_reset = kvm_cpus_control_pre_system_reset; ++ ops->control_post_system_reset = kvm_cpus_control_post_system_reset; + } + + static const TypeInfo kvm_accel_ops_type = { +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index dc3605e648..8077630825 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2810,6 +2810,16 @@ void kvm_cpu_synchronize_pre_loadvm(CPUState *cpu) + run_on_cpu(cpu, do_kvm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); + } + ++void kvm_cpus_control_pre_system_reset(void) ++{ ++ kvm_vm_ioctl(kvm_state, KVM_CONTROL_VCPU_PRE_SYSTEM_RESET, NULL); ++} ++ ++void kvm_cpus_control_post_system_reset(void) ++{ ++ kvm_vm_ioctl(kvm_state, KVM_CONTROL_VCPU_POST_SYSTEM_RESET, NULL); ++} ++ + #ifdef KVM_HAVE_MCE_INJECTION + static __thread void *pending_sigbus_addr; + static __thread int pending_sigbus_code; +diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h +index ca40add32c..27b9d0d9db 100644 +--- a/accel/kvm/kvm-cpus.h ++++ b/accel/kvm/kvm-cpus.h +@@ -23,4 +23,7 @@ int kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); + int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); + void kvm_remove_all_breakpoints(CPUState *cpu); + ++void kvm_cpus_control_pre_system_reset(void); ++void kvm_cpus_control_post_system_reset(void); ++ + #endif /* KVM_CPUS_H */ +diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h +index ef91fc28bb..7a32e7f820 100644 +--- a/include/sysemu/accel-ops.h ++++ b/include/sysemu/accel-ops.h +@@ -53,6 +53,9 @@ struct AccelOpsClass { + int (*insert_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); + int (*remove_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); + void (*remove_all_breakpoints)(CPUState *cpu); ++ ++ void (*control_pre_system_reset)(void); ++ void (*control_post_system_reset)(void); + }; + + #endif /* ACCEL_OPS_H */ +diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h +index b4a566cfe7..f24d27daf5 100644 +--- a/include/sysemu/cpus.h ++++ b/include/sysemu/cpus.h +@@ -44,6 +44,8 @@ extern int icount_align_option; + void qemu_cpu_kick_self(void); + + bool cpus_are_resettable(void); ++void cpus_control_pre_system_reset(void); ++void cpus_control_post_system_reset(void); + + void cpu_synchronize_all_states(void); + void cpu_synchronize_all_post_reset(void); +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index e796105b76..eb30402c2d 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1626,6 +1626,10 @@ struct kvm_master_dev_info + #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) + #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) + ++/* ioctls for control vcpu setup during system reset */ ++#define KVM_CONTROL_VCPU_PRE_SYSTEM_RESET _IO(KVMIO, 0xe8) ++#define KVM_CONTROL_VCPU_POST_SYSTEM_RESET _IO(KVMIO, 0xe9) ++ + /* + * ioctls for vcpu fds + */ +diff --git a/system/cpus.c b/system/cpus.c +index f2289e9545..d9de09b9e8 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -193,6 +193,20 @@ void cpu_synchronize_pre_loadvm(CPUState *cpu) + } + } + ++void cpus_control_pre_system_reset(void) ++{ ++ if (cpus_accel->control_pre_system_reset) { ++ cpus_accel->control_pre_system_reset(); ++ } ++} ++ ++void cpus_control_post_system_reset(void) ++{ ++ if (cpus_accel->control_post_system_reset) { ++ cpus_accel->control_post_system_reset(); ++ } ++} ++ + bool cpus_are_resettable(void) + { + if (cpus_accel->cpus_are_resettable) { +diff --git a/system/runstate.c b/system/runstate.c +index 538c645326..7e41626bb1 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -487,6 +487,8 @@ void qemu_system_reset(ShutdownCause reason) + + mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; + ++ cpus_control_pre_system_reset(); ++ + cpu_synchronize_all_states(); + + if (mc && mc->reset) { +@@ -503,6 +505,9 @@ void qemu_system_reset(ShutdownCause reason) + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } + cpu_synchronize_all_post_reset(); ++ ++ cpus_control_post_system_reset(); ++ + monitor_qapi_event_discard_io_error(); + } + +-- +2.41.0.windows.1 + diff --git a/kvm-Add-support-for-SEV-shared-regions-list-and-KVM_.patch b/kvm-Add-support-for-SEV-shared-regions-list-and-KVM_.patch new file mode 100644 index 0000000000000000000000000000000000000000..e3b9f45950cee097849d1122d1c536d37cd072a8 --- /dev/null +++ b/kvm-Add-support-for-SEV-shared-regions-list-and-KVM_.patch @@ -0,0 +1,315 @@ +From 02e6bfc88ce5e944ce36b8ccb7d2af103a969980 Mon Sep 17 00:00:00 2001 +From: Ashish Kalra +Date: Tue, 27 Jul 2021 15:05:49 +0000 +Subject: [PATCH] kvm: Add support for SEV shared regions list and + KVM_EXIT_HYPERCALL. + +cherry-picked from https://github.com/AMDESE/qemu/commit/fcbbd9b19ac. + +KVM_HC_MAP_GPA_RANGE hypercall is used by the SEV guest to notify a +change in the page encryption status to the hypervisor. The hypercall +should be invoked only when the encryption attribute is changed from +encrypted -> decrypted and vice versa. By default all guest pages are +considered encrypted. + +The hypercall exits to userspace with KVM_EXIT_HYPERCALL exit code, +currently this is used only by SEV guests for guest page encryptiion +status tracking. Add support to handle this exit and invoke SEV +shared regions list handlers. + +Add support for SEV guest shared regions and implementation of the +SEV shared regions list. + +Signed-off-by: Ashish Kalra +[ Fix conflicts. ] +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 3 ++ + target/i386/kvm/kvm.c | 48 +++++++++++++++++ + target/i386/kvm/sev-stub.c | 11 ++++ + target/i386/sev.c | 106 +++++++++++++++++++++++++++++++++++++ + target/i386/sev.h | 3 ++ + 5 files changed, 171 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 8d12435e41..9489a20835 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -348,6 +348,7 @@ struct kvm_run { + } iocsr_io; + /* KVM_EXIT_HYPERCALL */ + struct { ++#define KVM_HC_MAP_GPA_RANGE 12 + __u64 nr; + __u64 args[6]; + __u64 ret; +@@ -1204,6 +1205,8 @@ struct kvm_ppc_resize_hpt { + + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + ++#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) ++ + #ifdef KVM_CAP_IRQ_ROUTING + + struct kvm_irq_routing_irqchip { +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a0bc9ea7b1..82f6d3b048 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -148,6 +148,7 @@ static int has_xcrs; + static int has_sregs2; + static int has_exception_payload; + static int has_triple_fault_event; ++static int has_map_gpa_range; + + static bool has_msr_mcg_ext_ctl; + +@@ -2191,6 +2192,17 @@ int kvm_arch_init_vcpu(CPUState *cs) + c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10); + } + ++ if (sev_enabled()) { ++ c = cpuid_find_entry(&cpuid_data.cpuid, ++ KVM_CPUID_FEATURES | kvm_base, 0); ++ if (c) { ++ c->eax |= (1 << KVM_FEATURE_MIGRATION_CONTROL); ++ if (has_map_gpa_range) { ++ c->eax |= (1 << KVM_FEATURE_HC_MAP_GPA_RANGE); ++ } ++ } ++ } ++ + cpuid_data.cpuid.nent = cpuid_i; + + cpuid_data.cpuid.padding = 0; +@@ -2584,6 +2596,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + #endif + } + ++ has_map_gpa_range = kvm_check_extension(s, KVM_CAP_EXIT_HYPERCALL); ++ if (has_map_gpa_range) { ++ ret = kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, ++ KVM_EXIT_HYPERCALL_VALID_MASK); ++ if (ret < 0) { ++ error_report("kvm: Failed to enable MAP_GPA_RANGE cap: %s", ++ strerror(-ret)); ++ return ret; ++ } ++ } ++ + ret = kvm_get_supported_msrs(s); + if (ret < 0) { + return ret; +@@ -4936,6 +4959,28 @@ static int kvm_handle_tpr_access(X86CPU *cpu) + return 1; + } + ++static int kvm_handle_exit_hypercall(X86CPU *cpu, struct kvm_run *run) ++{ ++ /* ++ * Currently this exit is only used by SEV guests for ++ * guest page encryption status tracking. ++ */ ++ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) { ++ unsigned long enc = run->hypercall.args[2]; ++ unsigned long gpa = run->hypercall.args[0]; ++ unsigned long npages = run->hypercall.args[1]; ++ unsigned long gfn_start = gpa >> TARGET_PAGE_BITS; ++ unsigned long gfn_end = gfn_start + npages; ++ ++ if (enc) { ++ sev_remove_shared_regions_list(gfn_start, gfn_end); ++ } else { ++ sev_add_shared_regions_list(gfn_start, gfn_end); ++ } ++ } ++ return 0; ++} ++ + int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) + { + static const uint8_t int3 = 0xcc; +@@ -5359,6 +5404,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_xen_handle_exit(cpu, &run->xen); + break; + #endif ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_handle_exit_hypercall(cpu, run); ++ break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; +diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c +index 1be5341e8a..1282d242a7 100644 +--- a/target/i386/kvm/sev-stub.c ++++ b/target/i386/kvm/sev-stub.c +@@ -19,3 +19,14 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + /* If we get here, cgs must be some non-SEV thing */ + return 0; + } ++ ++int sev_remove_shared_regions_list(unsigned long gfn_start, ++ unsigned long gfn_end) ++{ ++ return 0; ++} ++ ++int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end) ++{ ++ return 0; ++} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index de1a4b271e..8525a7351f 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -44,6 +44,11 @@ + #define TYPE_SEV_GUEST "sev-guest" + OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + ++struct shared_region { ++ unsigned long gfn_start, gfn_end; ++ QTAILQ_ENTRY(shared_region) list; ++}; ++ + + /** + * SevGuestState: +@@ -87,6 +92,8 @@ struct SevGuestState { + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; ++ ++ QTAILQ_HEAD(, shared_region) shared_regions_list; + }; + + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ +@@ -1136,6 +1143,7 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); + + cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; ++ QTAILQ_INIT(&sev->shared_regions_list); + + cgs->ready = true; + +@@ -1671,6 +1679,104 @@ int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) + return sev_receive_update_data(f, ptr); + } + ++int sev_remove_shared_regions_list(unsigned long start, unsigned long end) ++{ ++ SevGuestState *s = sev_guest; ++ struct shared_region *pos; ++ ++ QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { ++ unsigned long l, r; ++ unsigned long curr_gfn_end = pos->gfn_end; ++ ++ /* ++ * Find if any intersection exists ? ++ * left bound for intersecting segment ++ */ ++ l = MAX(start, pos->gfn_start); ++ /* right bound for intersecting segment */ ++ r = MIN(end, pos->gfn_end); ++ if (l <= r) { ++ if (pos->gfn_start == l && pos->gfn_end == r) { ++ QTAILQ_REMOVE(&s->shared_regions_list, pos, list); ++ } else if (l == pos->gfn_start) { ++ pos->gfn_start = r; ++ } else if (r == pos->gfn_end) { ++ pos->gfn_end = l; ++ } else { ++ /* Do a de-merge -- split linked list nodes */ ++ struct shared_region *shrd_region; ++ ++ pos->gfn_end = l; ++ shrd_region = g_malloc0(sizeof(*shrd_region)); ++ if (!shrd_region) { ++ return 0; ++ } ++ shrd_region->gfn_start = r; ++ shrd_region->gfn_end = curr_gfn_end; ++ QTAILQ_INSERT_AFTER(&s->shared_regions_list, pos, ++ shrd_region, list); ++ } ++ } ++ if (end <= curr_gfn_end) { ++ break; ++ } ++ } ++ return 0; ++} ++ ++int sev_add_shared_regions_list(unsigned long start, unsigned long end) ++{ ++ struct shared_region *shrd_region; ++ struct shared_region *pos; ++ SevGuestState *s = sev_guest; ++ ++ if (QTAILQ_EMPTY(&s->shared_regions_list)) { ++ shrd_region = g_malloc0(sizeof(*shrd_region)); ++ if (!shrd_region) { ++ return -1; ++ } ++ shrd_region->gfn_start = start; ++ shrd_region->gfn_end = end; ++ QTAILQ_INSERT_TAIL(&s->shared_regions_list, shrd_region, list); ++ return 0; ++ } ++ ++ /* ++ * shared regions list is a sorted list in ascending order ++ * of guest PA's and also merges consecutive range of guest PA's ++ */ ++ QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { ++ /* handle duplicate overlapping regions */ ++ if (start >= pos->gfn_start && end <= pos->gfn_end) { ++ return 0; ++ } ++ if (pos->gfn_end < start) { ++ continue; ++ } ++ /* merge consecutive guest PA(s) -- forward merge */ ++ if (pos->gfn_start <= start && pos->gfn_end >= start) { ++ pos->gfn_end = end; ++ return 0; ++ } ++ break; ++ } ++ /* ++ * Add a new node ++ */ ++ shrd_region = g_malloc0(sizeof(*shrd_region)); ++ if (!shrd_region) { ++ return -1; ++ } ++ shrd_region->gfn_start = start; ++ shrd_region->gfn_end = end; ++ if (pos) { ++ QTAILQ_INSERT_BEFORE(pos, shrd_region, list); ++ } else { ++ QTAILQ_INSERT_TAIL(&s->shared_regions_list, shrd_region, list); ++ } ++ return 1; ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +diff --git a/target/i386/sev.h b/target/i386/sev.h +index d94da2956b..acf69d4e6f 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -61,6 +61,9 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); ++int sev_remove_shared_regions_list(unsigned long gfn_start, ++ unsigned long gfn_end); ++int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end); + + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); + +-- +2.41.0.windows.1 + diff --git a/kvm-Add-support-for-userspace-MSR-filtering-and-hand.patch b/kvm-Add-support-for-userspace-MSR-filtering-and-hand.patch new file mode 100644 index 0000000000000000000000000000000000000000..f611f4a9445befdd24290b37cebf856531db88cc --- /dev/null +++ b/kvm-Add-support-for-userspace-MSR-filtering-and-hand.patch @@ -0,0 +1,123 @@ +From 7aced2a5fff91e0fcff97bb5eafddafece0cb983 Mon Sep 17 00:00:00 2001 +From: Ashish Kalra +Date: Tue, 27 Jul 2021 17:59:33 +0000 +Subject: [PATCH] kvm: Add support for userspace MSR filtering and handling of + MSR_KVM_MIGRATION_CONTROL. + +cherry-picked from https://github.com/AMDESE/qemu/commit/67935c3fd5f. + +Add support for userspace MSR filtering using KVM_X86_SET_MSR_FILTER +ioctl and handling of MSRs in userspace. Currently this is only used +for SEV guests which use MSR_KVM_MIGRATION_CONTROL to indicate if the +guest is enabled and ready for migration. + +KVM arch code calls into SEV guest specific code to delete the +SEV migrate blocker which has been setup at SEV_LAUNCH_FINISH. + +Signed-off-by: Ashish Kalra +[ Fix conflicts. ] +Signed-off-by: hanliyang +--- + target/i386/kvm/kvm.c | 35 +++++++++++++++++++++++++++++++++++ + target/i386/kvm/sev-stub.c | 4 ++++ + target/i386/sev.c | 6 ++++++ + target/i386/sev.h | 1 + + 4 files changed, 46 insertions(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 82f6d3b048..a5a755db01 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2488,6 +2488,32 @@ static bool kvm_rdmsr_core_thread_count(X86CPU *cpu, uint32_t msr, + return true; + } + ++/* ++ * Currently this exit is only used by SEV guests for ++ * MSR_KVM_MIGRATION_CONTROL to indicate if the guest ++ * is ready for migration. ++ */ ++static uint64_t msr_kvm_migration_control; ++ ++static bool kvm_rdmsr_kvm_migration_control(X86CPU *cpu, uint32_t msr, ++ uint64_t *val) ++{ ++ *val = msr_kvm_migration_control; ++ ++ return true; ++} ++ ++static bool kvm_wrmsr_kvm_migration_control(X86CPU *cpu, uint32_t msr, ++ uint64_t val) ++{ ++ msr_kvm_migration_control = val; ++ ++ if (val == KVM_MIGRATION_READY) ++ sev_del_migrate_blocker(); ++ ++ return true; ++} ++ + static Notifier smram_machine_done; + static KVMMemoryListener smram_listener; + static AddressSpace smram_address_space; +@@ -2735,6 +2761,15 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + strerror(-ret)); + exit(1); + } ++ ++ r = kvm_filter_msr(s, MSR_KVM_MIGRATION_CONTROL, ++ kvm_rdmsr_kvm_migration_control, ++ kvm_wrmsr_kvm_migration_control); ++ if (!r) { ++ error_report("Could not install MSR_KVM_MIGRATION_CONTROL handler: %s", ++ strerror(-ret)); ++ exit(1); ++ } + } + + return 0; +diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c +index 1282d242a7..99899688e4 100644 +--- a/target/i386/kvm/sev-stub.c ++++ b/target/i386/kvm/sev-stub.c +@@ -30,3 +30,7 @@ int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end) + { + return 0; + } ++ ++void sev_del_migrate_blocker(void) ++{ ++} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 47f41aefe7..98b0d3937a 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -925,6 +925,12 @@ sev_launch_finish(SevGuestState *sev) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++void ++sev_del_migrate_blocker(void) ++{ ++ migrate_del_blocker(&sev_mig_blocker); ++} ++ + static int + sev_receive_finish(SevGuestState *s) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index b9c2afb799..84e3bdf2df 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -70,6 +70,7 @@ int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end); + int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent); + int sev_load_incoming_shared_regions_list(QEMUFile *f); + bool sev_is_gfn_in_unshared_region(unsigned long gfn); ++void sev_del_migrate_blocker(void); + + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); + +-- +2.41.0.windows.1 + diff --git a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch deleted file mode 100644 index dfa8bf6a01201096881ec49e34ddf0ed18eec84f..0000000000000000000000000000000000000000 --- a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +++ /dev/null @@ -1,99 +0,0 @@ -From ccfc5c99103e2f633084c906197075392f625a80 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 21 Nov 2019 16:56:45 +0000 -Subject: [PATCH] kvm: Reallocate dirty_bmap when we change a slot - -kvm_set_phys_mem can be called to reallocate a slot by something the -guest does (e.g. writing to PAM and other chipset registers). -This can happen in the middle of a migration, and if we're unlucky -it can now happen between the split 'sync' and 'clear'; the clear -asserts if there's no bmap to clear. Recreate the bmap whenever -we change the slot, keeping the clear path happy. - -Typically this is triggered by the guest rebooting during a migrate. - -Corresponds to: -https://bugzilla.redhat.com/show_bug.cgi?id=1772774 -https://bugzilla.redhat.com/show_bug.cgi?id=1771032 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Peter Xu -Signed-off-by: Kunkun Jiang ---- - accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- - 1 file changed, 29 insertions(+), 15 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 6828f6a1f9..5a6b89cc2a 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -536,6 +536,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, - - #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) - -+/* Allocate the dirty bitmap for a slot */ -+static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) -+{ -+ /* -+ * XXX bad kernel interface alert -+ * For dirty bitmap, kernel allocates array of size aligned to -+ * bits-per-long. But for case when the kernel is 64bits and -+ * the userspace is 32bits, userspace can't align to the same -+ * bits-per-long, since sizeof(long) is different between kernel -+ * and user space. This way, userspace will provide buffer which -+ * may be 4 bytes less than the kernel will use, resulting in -+ * userspace memory corruption (which is not detectable by valgrind -+ * too, in most cases). -+ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -+ * a hope that sizeof(long) won't become >8 any time soon. -+ */ -+ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -+ /*HOST_LONG_BITS*/ 64) / 8; -+ mem->dirty_bmap = g_malloc0(bitmap_size); -+} -+ - /** - * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space - * -@@ -568,23 +589,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - goto out; - } - -- /* XXX bad kernel interface alert -- * For dirty bitmap, kernel allocates array of size aligned to -- * bits-per-long. But for case when the kernel is 64bits and -- * the userspace is 32bits, userspace can't align to the same -- * bits-per-long, since sizeof(long) is different between kernel -- * and user space. This way, userspace will provide buffer which -- * may be 4 bytes less than the kernel will use, resulting in -- * userspace memory corruption (which is not detectable by valgrind -- * too, in most cases). -- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -- * a hope that sizeof(long) won't become >8 any time soon. -- */ - if (!mem->dirty_bmap) { -- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -- /*HOST_LONG_BITS*/ 64) / 8; - /* Allocate on the first log_sync, once and for all */ -- mem->dirty_bmap = g_malloc0(bitmap_size); -+ kvm_memslot_init_dirty_bitmap(mem); - } - - d.dirty_bitmap = mem->dirty_bmap; -@@ -1066,6 +1073,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - mem->ram = ram; - mem->flags = kvm_mem_flags(mr); - -+ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { -+ /* -+ * Reallocate the bmap; it means it doesn't disappear in -+ * middle of a migrate. -+ */ -+ kvm_memslot_init_dirty_bitmap(mem); -+ } - err = kvm_set_user_memory_region(kml, mem, true); - if (err) { - fprintf(stderr, "%s: error registering slot: %s\n", __func__, --- -2.27.0 - diff --git a/kvm-Translate-MSI-doorbell-address-only-if-it-is-val.patch b/kvm-Translate-MSI-doorbell-address-only-if-it-is-val.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c0f313c4326e328c29fa7477fa25db1482b0e85 --- /dev/null +++ b/kvm-Translate-MSI-doorbell-address-only-if-it-is-val.patch @@ -0,0 +1,54 @@ +From cdd5c088ff46ebf423c926fe4c0b12e345ae0db0 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Thu, 23 Feb 2023 12:12:48 +0000 +Subject: [PATCH] =?UTF-8?q?kvm:=20Translate=20MSI=20doorbell=20address?= + =?UTF-8?q?=C2=A0only=20if=20it=20is=20valid?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Guest might have already set the MSI doorbell address to invalid +and if we try to translate the address again, Guest reports, + +[ 26.784082] arm-smmu-v3 arm-smmu-v3.0.auto: event 0x10 received: +[ 26.784088] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000001000000010 +[ 26.784090] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000 +[ 26.784092] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000 +[ 26.784094] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000 +[ 26.788082] arm-smmu-v3 arm-smmu-v3.0.auto: event 0x10 received: +[ 26.788085] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000001000000010 +[ 26.788087] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000 +.... + +eg: rmmod hisi_zip.ko. The sequence seems to be, + + - Write 0 to MSI Message Address register + - Disable MSI + +Hence check for address validity before we try to do the translation. + +Note: The fix is placed in generic code and hopefully is not a problem +for other architectures. + +Signed-off-by: Shameer Kolothum +--- + accel/kvm/kvm-all.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index a8e29f148e..6fa97d2cbf 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2074,7 +2074,8 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, + kroute.flags = KVM_MSI_VALID_DEVID; + kroute.u.msi.devid = pci_requester_id(dev); + } +- if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) { ++ if (msg.address && ++ kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) { + return -EINVAL; + } + +-- +2.41.0.windows.1 + diff --git a/kvm-Use-kvm_vm_check_extension-where-necessary.patch b/kvm-Use-kvm_vm_check_extension-where-necessary.patch new file mode 100644 index 0000000000000000000000000000000000000000..bcf22d2afe971e654093b3e75387002b2f1631f5 --- /dev/null +++ b/kvm-Use-kvm_vm_check_extension-where-necessary.patch @@ -0,0 +1,86 @@ +From 4242973f80d6779b2e4235bacc18d685bbfcfda8 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 4 Dec 2024 15:34:28 +0000 +Subject: [PATCH] kvm: Use kvm_vm_check_extension() where necessary + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/97b19c96743303418578785a230019b8b26b0131 + +The Arm KVM code can return different values from KVM_CHECK_EXTENSION +depending on the VM type. Use kvm_vm_check_extension() where necessary +to ensure we get the right response from KVM. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/kvm.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + accel/kvm/kvm-all.c | 6 +++--- + target/arm/kvm64.c | 8 ++++---- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 7d175d3262..2cdd615025 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2363,13 +2363,13 @@ static int kvm_recommended_vcpus(KVMState *s) + + static int kvm_max_vcpus(KVMState *s) + { +- int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); ++ int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS); + return (ret) ? ret : kvm_recommended_vcpus(s); + } + + static int kvm_max_vcpu_id(KVMState *s) + { +- int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID); ++ int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPU_ID); + return (ret) ? ret : kvm_max_vcpus(s); + } + +@@ -2625,7 +2625,7 @@ static int kvm_init(MachineState *ms) + + #ifdef KVM_CAP_SET_GUEST_DEBUG + kvm_has_guest_debug = +- (kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); ++ (kvm_vm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); + #endif + + kvm_sstep_flags = 0; +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index b099287ed0..651f603dd8 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -39,11 +39,11 @@ void kvm_arm_init_debug(KVMState *s) + have_guest_debug = kvm_check_extension(s, + KVM_CAP_SET_GUEST_DEBUG); + +- max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); ++ max_hw_wps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); + hw_watchpoints = g_array_sized_new(true, true, + sizeof(HWWatchpoint), max_hw_wps); + +- max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); ++ max_hw_bps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); + hw_breakpoints = g_array_sized_new(true, true, + sizeof(HWBreakpoint), max_hw_bps); + return; +@@ -513,12 +513,12 @@ bool kvm_arm_aarch32_supported(void) + + bool kvm_arm_sve_supported(void) + { +- return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); ++ return kvm_vm_check_extension(kvm_state, KVM_CAP_ARM_SVE); + } + + bool kvm_arm_steal_time_supported(void) + { +- return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); ++ return kvm_vm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); + } + + QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); +-- +2.33.0 + diff --git a/kvm-add-support-for-guest-physical-bits.patch b/kvm-add-support-for-guest-physical-bits.patch new file mode 100644 index 0000000000000000000000000000000000000000..ce70df945a731b337369df0e5c9f63ce67d0974b --- /dev/null +++ b/kvm-add-support-for-guest-physical-bits.patch @@ -0,0 +1,111 @@ +From a2383a2a0537750794223f21156241b1b1e78d2e Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:35 +0100 +Subject: [PATCH] kvm: add support for guest physical bits + +commit 0d08c423688edcca857f88dab20f1fc56de2b281 upstream. + +Query kvm for supported guest physical address bits, in cpuid +function 80000008, eax[23:16]. Usually this is identical to host +physical address bits. With NPT or EPT being used this might be +restricted to 48 (max 4-level paging address space size) even if +the host cpu supports more physical address bits. + +When set pass this to the guest, using cpuid too. Guest firmware +can use this to figure how big the usable guest physical address +space is, so PCI bar mapping are actually reachable. + +Intel-SIG: commit 0d08c423688e kvm: add support for guest physical bits + +Signed-off-by: Gerd Hoffmann +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <20240318155336.156197-2-kraxel@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm-cpu.c | 50 ++++++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 8 deletions(-) + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index 9c791b7b05..f76972e47e 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -18,10 +18,32 @@ + #include "kvm_i386.h" + #include "hw/core/accel-cpu.h" + ++static void kvm_set_guest_phys_bits(CPUState *cs) ++{ ++ X86CPU *cpu = X86_CPU(cs); ++ uint32_t eax, guest_phys_bits; ++ ++ eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX); ++ guest_phys_bits = (eax >> 16) & 0xff; ++ if (!guest_phys_bits) { ++ return; ++ } ++ cpu->guest_phys_bits = guest_phys_bits; ++ if (cpu->guest_phys_bits > cpu->phys_bits) { ++ cpu->guest_phys_bits = cpu->phys_bits; ++ } ++ ++ if (cpu->host_phys_bits && cpu->host_phys_bits_limit && ++ cpu->guest_phys_bits > cpu->host_phys_bits_limit) { ++ cpu->guest_phys_bits = cpu->host_phys_bits_limit; ++ } ++} ++ + static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + { + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; ++ bool ret; + + /* + * The realize order is important, since x86_cpu_realize() checks if +@@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + * + * realize order: + * +- * x86_cpu_realize(): +- * -> x86_cpu_expand_features() +- * -> cpu_exec_realizefn(): +- * -> accel_cpu_common_realize() +- * kvm_cpu_realizefn() -> host_cpu_realizefn() +- * -> cpu_common_realizefn() +- * -> check/update ucode_rev, phys_bits, mwait ++ * x86_cpu_realizefn(): ++ * x86_cpu_expand_features() ++ * cpu_exec_realizefn(): ++ * accel_cpu_common_realize() ++ * kvm_cpu_realizefn() ++ * host_cpu_realizefn() ++ * kvm_set_guest_phys_bits() ++ * check/update ucode_rev, phys_bits, guest_phys_bits, mwait ++ * cpu_common_realizefn() (via xcc->parent_realize) + */ + if (cpu->max_features) { + if (enable_cpu_pm && kvm_has_waitpkg()) { +@@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + MSR_IA32_UCODE_REV); + } + } +- return host_cpu_realizefn(cs, errp); ++ ret = host_cpu_realizefn(cs, errp); ++ if (!ret) { ++ return ret; ++ } ++ ++ if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) && ++ cpu->guest_phys_bits == -1) { ++ kvm_set_guest_phys_bits(cs); ++ } ++ ++ return true; + } + + static bool lmce_supported(void) +-- +2.41.0.windows.1 + diff --git a/kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch b/kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7c540706516d78212ea21f2455b3a0cac832ef2 --- /dev/null +++ b/kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch @@ -0,0 +1,42 @@ +From 1228f5c7cfcb78b19f163551aae0612602ac2d7d Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 28 Apr 2024 13:01:48 +0800 +Subject: [PATCH] kvm/arm: Fix SVE related logic for vcpu hotplug feature + +1. Must finalize SVE setting before kvm_arch_init_vcpu(). +2. Must not finalize KVM SVE repeatly for hotplugged vcpu. + +Signed-off-by: Keqian Zhu +--- + target/arm/kvm.c | 1 + + target/arm/kvm64.c | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 12c1b4b328..1ceb72a1c1 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -704,6 +704,7 @@ void kvm_arm_create_host_vcpu(ARMCPU *cpu) + * later while setting device attributes of the GICR during GICv3 + * reset + */ ++ arm_cpu_finalize_features(cpu, &error_abort); + ret = kvm_arch_init_vcpu(cs); + if (ret < 0) { + error_report("Failed to initialize host vcpu %ld", vcpu_id); +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 00b257bb4b..615e8bbbdf 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -647,7 +647,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + return ret; + } + +- if (cpu_isar_feature(aa64_sve, cpu)) { ++ if (cpu_isar_feature(aa64_sve, cpu) && !DEVICE(cpu)->hotplugged) { + ret = kvm_arm_sve_set_vls(cs); + if (ret) { + return ret; +-- +2.27.0 + diff --git a/kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch b/kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch new file mode 100644 index 0000000000000000000000000000000000000000..d248b18c61ef7113cd2ea16cd650db007632b943 --- /dev/null +++ b/kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch @@ -0,0 +1,63 @@ +From baacc5ed528a5259286622482a01e3e848aed57e Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 29 Apr 2024 17:14:47 +0800 +Subject: [PATCH] kvm/arm: Fix compatibility of cold-plug CPU with SVE + +For arm virt machine, besides hotplugged vcpu, the kvm state of +coldplugged CPU is also pre-inited and thus SVE is finalized. + +And a flag in ARMCPU state and skip finalize SVE again. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 5 +++++ + target/arm/cpu.h | 3 +++ + target/arm/kvm64.c | 2 +- + 3 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 507b09d96c..dfe4d9e129 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3282,6 +3282,11 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (!dev->hotplugged) { + cs->cold_booted = true; + } ++#ifdef CONFIG_KVM ++ if (cs->cpu_index >= ms->smp.cpus) { ++ cpu->kvm_sve_finalized = true; ++ } ++#endif + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index c51a0e3467..a5ba7f2a26 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -971,6 +971,9 @@ struct ArchCPU { + + /* KVM steal time */ + OnOffAuto kvm_steal_time; ++ ++ /* KVM SVE has been finalized for this CPU */ ++ bool kvm_sve_finalized; + #endif /* CONFIG_KVM */ + + /* Uniprocessor system with MP extensions */ +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 615e8bbbdf..8f01d485b0 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -647,7 +647,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + return ret; + } + +- if (cpu_isar_feature(aa64_sve, cpu) && !DEVICE(cpu)->hotplugged) { ++ if (cpu_isar_feature(aa64_sve, cpu) && !cpu->kvm_sve_finalized) { + ret = kvm_arm_sve_set_vls(cs); + if (ret) { + return ret; +-- +2.27.0 + diff --git a/kvm-msi-Mark-whether-there-is-an-IRQ-route-table-upd.patch b/kvm-msi-Mark-whether-there-is-an-IRQ-route-table-upd.patch new file mode 100644 index 0000000000000000000000000000000000000000..f0470e6a1c140483d44e383923dfebf112163bfb --- /dev/null +++ b/kvm-msi-Mark-whether-there-is-an-IRQ-route-table-upd.patch @@ -0,0 +1,218 @@ +From 66749037256732f369c387c136e14f727a51951f Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 1 Apr 2025 17:09:38 +0800 +Subject: [PATCH] kvm/msi: Mark whether there is an IRQ route table update + through changes + +This patch prevents unnecessary updates to the IRQ route without modification + +Signed-off-by: libai +--- + accel/kvm/kvm-all.c | 11 ++++++----- + accel/stubs/kvm-stub.c | 2 +- + hw/intc/ioapic.c | 5 +++-- + hw/misc/ivshmem.c | 6 ++++-- + hw/vfio/pci.c | 5 +++-- + hw/virtio/virtio-pci.c | 5 +++-- + include/sysemu/kvm.h | 2 +- + target/i386/kvm/kvm.c | 6 ++++-- + 8 files changed, 25 insertions(+), 17 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 825ecb99a8..aa41b42efc 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1902,10 +1902,11 @@ static void kvm_add_routing_entry(KVMState *s, + set_gsi(s, entry->gsi); + } + +-static int kvm_update_routing_entry(KVMState *s, ++static int kvm_update_routing_entry(KVMRouteChange *c, + struct kvm_irq_routing_entry *new_entry) + { + struct kvm_irq_routing_entry *entry; ++ KVMState *s = c->s; + int n; + + for (n = 0; n < s->irq_routes->nr; n++) { +@@ -1919,7 +1920,7 @@ static int kvm_update_routing_entry(KVMState *s, + } + + *entry = *new_entry; +- ++ c->changes++; + return 0; + } + +@@ -2051,7 +2052,7 @@ int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) + return virq; + } + +-int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, ++int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, + PCIDevice *dev) + { + struct kvm_irq_routing_entry kroute = {}; +@@ -2081,7 +2082,7 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, + + trace_kvm_irqchip_update_msi_route(virq); + +- return kvm_update_routing_entry(s, &kroute); ++ return kvm_update_routing_entry(c, &kroute); + } + + static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event, +@@ -2223,7 +2224,7 @@ static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event, + abort(); + } + +-int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) ++int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg) + { + return -ENOSYS; + } +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index b071afee45..1fffdc0ea2 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -65,7 +65,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) + { + } + +-int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, ++int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, + PCIDevice *dev) + { + return -ENOSYS; +diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c +index 716ffc8bbb..0b43aec8fa 100644 +--- a/hw/intc/ioapic.c ++++ b/hw/intc/ioapic.c +@@ -195,6 +195,7 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) + int i; + + if (kvm_irqchip_is_split()) { ++ KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + MSIMessage msg; + struct ioapic_entry_info info; +@@ -202,10 +203,10 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) + if (!info.masked) { + msg.address = info.addr; + msg.data = info.data; +- kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); ++ kvm_irqchip_update_msi_route(&c, i, msg, NULL); + } + } +- kvm_irqchip_commit_routes(kvm_state); ++ kvm_irqchip_commit_route_changes(&c); + } + #endif + } +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index ad9a3c546e..f66491a7a7 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -278,6 +278,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, + IVShmemState *s = IVSHMEM_COMMON(dev); + EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; + MSIVector *v = &s->msi_vectors[vector]; ++ KVMRouteChange c; + int ret; + + IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); +@@ -287,11 +288,12 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, + } + assert(!v->unmasked); + +- ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); ++ c = kvm_irqchip_begin_route_changes(kvm_state); ++ ret = kvm_irqchip_update_msi_route(&c, v->virq, msg, dev); + if (ret < 0) { + return ret; + } +- kvm_irqchip_commit_routes(kvm_state); ++ kvm_irqchip_commit_route_changes(&c); + + ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); + if (ret < 0) { +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 293deb8737..ce958848b6 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -507,8 +507,9 @@ static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) + static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, + PCIDevice *pdev) + { +- kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev); +- kvm_irqchip_commit_routes(kvm_state); ++ KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); ++ kvm_irqchip_update_msi_route(&c, vector->virq, msg, pdev); ++ kvm_irqchip_commit_route_changes(&c); + } + + static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 7cd15f70e3..a677fa0736 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1044,12 +1044,13 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + if (proxy->vector_irqfd) { + irqfd = &proxy->vector_irqfd[vector]; + if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { +- ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg, ++ KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); ++ ret = kvm_irqchip_update_msi_route(&c, irqfd->virq, msg, + &proxy->pci_dev); + if (ret < 0) { + return ret; + } +- kvm_irqchip_commit_routes(kvm_state); ++ kvm_irqchip_commit_route_changes(&c); + } + } + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 176aa53cbe..16cccc881e 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -501,7 +501,7 @@ void kvm_init_cpu_signals(CPUState *cpu); + * @return: virq (>=0) when success, errno (<0) when failed. + */ + int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev); +-int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, ++int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, + PCIDevice *dev); + void kvm_irqchip_commit_routes(KVMState *s); + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 2df3ff99c3..3a88e65635 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5700,9 +5700,11 @@ void kvm_update_msi_routes_all(void *private, bool global, + { + int cnt = 0, vector; + MSIRouteEntry *entry; ++ KVMRouteChange c; + MSIMessage msg; + PCIDevice *dev; + ++ c = kvm_irqchip_begin_route_changes(kvm_state); + /* TODO: explicit route update */ + QLIST_FOREACH(entry, &msi_route_list, list) { + cnt++; +@@ -5719,9 +5721,9 @@ void kvm_update_msi_routes_all(void *private, bool global, + */ + continue; + } +- kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev); ++ kvm_irqchip_update_msi_route(&c, entry->virq, msg, dev); + } +- kvm_irqchip_commit_routes(kvm_state); ++ kvm_irqchip_commit_route_changes(&c); + trace_kvm_x86_update_msi_routes(cnt); + } + +-- +2.41.0.windows.1 + diff --git a/kvm-split-too-big-memory-section-on-several-memslots.patch b/kvm-split-too-big-memory-section-on-several-memslots.patch deleted file mode 100644 index 9a94e21a773498e07764996501664313b9c98522..0000000000000000000000000000000000000000 --- a/kvm-split-too-big-memory-section-on-several-memslots.patch +++ /dev/null @@ -1,246 +0,0 @@ -From 33f5a810b0edc1ac67163f396bd345e04b5c11e8 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 24 Sep 2019 10:47:50 -0400 -Subject: [PATCH] kvm: split too big memory section on several memslots - -Max memslot size supported by kvm on s390 is 8Tb, -move logic of splitting RAM in chunks upto 8T to KVM code. - -This way it will hide KVM specific restrictions in KVM code -and won't affect board level design decisions. Which would allow -us to avoid misusing memory_region_allocate_system_memory() API -and eventually use a single hostmem backend for guest RAM. - -Signed-off-by: Igor Mammedov -Message-Id: <20190924144751.24149-4-imammedo@redhat.com> -Reviewed-by: Peter Xu -Acked-by: Paolo Bonzini -Signed-off-by: Christian Borntraeger -Signed-off-by: Kunkun Jiang ---- - accel/kvm/kvm-all.c | 124 +++++++++++++++++++++++++-------------- - include/sysemu/kvm_int.h | 1 + - 2 files changed, 81 insertions(+), 44 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 84edbe8bb1..6828f6a1f9 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed; - bool kvm_ioeventfd_any_length_allowed; - bool kvm_msi_use_devid; - static bool kvm_immediate_exit; -+static hwaddr kvm_max_slot_size = ~0; - - static const KVMCapabilityInfo kvm_required_capabilites[] = { - KVM_CAP_INFO(USER_MEMORY), -@@ -458,7 +459,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, - static int kvm_section_update_flags(KVMMemoryListener *kml, - MemoryRegionSection *section) - { -- hwaddr start_addr, size; -+ hwaddr start_addr, size, slot_size; - KVMSlot *mem; - int ret = 0; - -@@ -469,13 +470,18 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, - - kvm_slots_lock(kml); - -- mem = kvm_lookup_matching_slot(kml, start_addr, size); -- if (!mem) { -- /* We don't have a slot if we want to trap every access. */ -- goto out; -- } -+ while (size && !ret) { -+ slot_size = MIN(kvm_max_slot_size, size); -+ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); -+ if (!mem) { -+ /* We don't have a slot if we want to trap every access. */ -+ goto out; -+ } - -- ret = kvm_slot_update_flags(kml, mem, section->mr); -+ ret = kvm_slot_update_flags(kml, mem, section->mr); -+ start_addr += slot_size; -+ size -= slot_size; -+ } - - out: - kvm_slots_unlock(kml); -@@ -548,11 +554,15 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - struct kvm_dirty_log d = {}; - KVMSlot *mem; - hwaddr start_addr, size; -+ hwaddr slot_size, slot_offset = 0; - int ret = 0; - - size = kvm_align_section(section, &start_addr); -- if (size) { -- mem = kvm_lookup_matching_slot(kml, start_addr, size); -+ while (size) { -+ MemoryRegionSection subsection = *section; -+ -+ slot_size = MIN(kvm_max_slot_size, size); -+ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); - if (!mem) { - /* We don't have a slot if we want to trap every access. */ - goto out; -@@ -570,11 +580,11 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - * So for now, let's align to 64 instead of HOST_LONG_BITS here, in - * a hope that sizeof(long) won't become >8 any time soon. - */ -- size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -- /*HOST_LONG_BITS*/ 64) / 8; - if (!mem->dirty_bmap) { -+ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -+ /*HOST_LONG_BITS*/ 64) / 8; - /* Allocate on the first log_sync, once and for all */ -- mem->dirty_bmap = g_malloc0(size); -+ mem->dirty_bmap = g_malloc0(bitmap_size); - } - - d.dirty_bitmap = mem->dirty_bmap; -@@ -585,7 +595,13 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - goto out; - } - -- kvm_get_dirty_pages_log_range(section, d.dirty_bitmap); -+ subsection.offset_within_region += slot_offset; -+ subsection.size = int128_make64(slot_size); -+ kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap); -+ -+ slot_offset += slot_size; -+ start_addr += slot_size; -+ size -= slot_size; - } - out: - return ret; -@@ -974,6 +990,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) - return NULL; - } - -+void kvm_set_max_memslot_size(hwaddr max_slot_size) -+{ -+ g_assert( -+ ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size -+ ); -+ kvm_max_slot_size = max_slot_size; -+} -+ - static void kvm_set_phys_mem(KVMMemoryListener *kml, - MemoryRegionSection *section, bool add) - { -@@ -981,7 +1005,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - int err; - MemoryRegion *mr = section->mr; - bool writeable = !mr->readonly && !mr->rom_device; -- hwaddr start_addr, size; -+ hwaddr start_addr, size, slot_size; - void *ram; - - if (!memory_region_is_ram(mr)) { -@@ -1006,41 +1030,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - kvm_slots_lock(kml); - - if (!add) { -- mem = kvm_lookup_matching_slot(kml, start_addr, size); -- if (!mem) { -- goto out; -- } -- if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { -- kvm_physical_sync_dirty_bitmap(kml, section); -- } -+ do { -+ slot_size = MIN(kvm_max_slot_size, size); -+ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); -+ if (!mem) { -+ goto out; -+ } -+ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { -+ kvm_physical_sync_dirty_bitmap(kml, section); -+ } - -- /* unregister the slot */ -- g_free(mem->dirty_bmap); -- mem->dirty_bmap = NULL; -- mem->memory_size = 0; -- mem->flags = 0; -- err = kvm_set_user_memory_region(kml, mem, false); -- if (err) { -- fprintf(stderr, "%s: error unregistering slot: %s\n", -- __func__, strerror(-err)); -- abort(); -- } -+ /* unregister the slot */ -+ g_free(mem->dirty_bmap); -+ mem->dirty_bmap = NULL; -+ mem->memory_size = 0; -+ mem->flags = 0; -+ err = kvm_set_user_memory_region(kml, mem, false); -+ if (err) { -+ fprintf(stderr, "%s: error unregistering slot: %s\n", -+ __func__, strerror(-err)); -+ abort(); -+ } -+ start_addr += slot_size; -+ size -= slot_size; -+ } while (size); - goto out; - } - - /* register the new slot */ -- mem = kvm_alloc_slot(kml); -- mem->memory_size = size; -- mem->start_addr = start_addr; -- mem->ram = ram; -- mem->flags = kvm_mem_flags(mr); -- -- err = kvm_set_user_memory_region(kml, mem, true); -- if (err) { -- fprintf(stderr, "%s: error registering slot: %s\n", __func__, -- strerror(-err)); -- abort(); -- } -+ do { -+ slot_size = MIN(kvm_max_slot_size, size); -+ mem = kvm_alloc_slot(kml); -+ mem->memory_size = slot_size; -+ mem->start_addr = start_addr; -+ mem->ram = ram; -+ mem->flags = kvm_mem_flags(mr); -+ -+ err = kvm_set_user_memory_region(kml, mem, true); -+ if (err) { -+ fprintf(stderr, "%s: error registering slot: %s\n", __func__, -+ strerror(-err)); -+ abort(); -+ } -+ start_addr += slot_size; -+ ram += slot_size; -+ size -= slot_size; -+ } while (size); - - out: - kvm_slots_unlock(kml); -@@ -2880,6 +2915,7 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, - - for (i = 0; i < kvm->nr_as; ++i) { - if (kvm->as[i].as == as && kvm->as[i].ml) { -+ size = MIN(kvm_max_slot_size, size); - return NULL != kvm_lookup_matching_slot(kvm->as[i].ml, - start_addr, size); - } -diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h -index 787dbc7770..f8e884f146 100644 ---- a/include/sysemu/kvm_int.h -+++ b/include/sysemu/kvm_int.h -@@ -43,4 +43,5 @@ typedef struct KVMMemoryListener { - void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, - AddressSpace *as, int as_id); - -+void kvm_set_max_memslot_size(hwaddr max_slot_size); - #endif --- -2.27.0 - diff --git a/libvhost-user-Fix-some-memtable-remap-cases.patch b/libvhost-user-Fix-some-memtable-remap-cases.patch deleted file mode 100644 index 4f4d0c9f7d4d55064785426f3014ee6efbac1d63..0000000000000000000000000000000000000000 --- a/libvhost-user-Fix-some-memtable-remap-cases.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 8fa62daca5978e77ed690797a882c3d0aad8d0d4 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 12 Aug 2019 17:35:19 +0100 -Subject: [PATCH] libvhost-user: Fix some memtable remap cases - -If a new setmemtable command comes in once the vhost threads are -running, it will remap the guests address space and the threads -will now be looking in the wrong place. - -Fortunately we're running this command under lock, so we can -update the queue mappings so that threads will look in the new-right -place. - -Note: This doesn't fix things that the threads might be doing -without a lock (e.g. a readv/writev!) That's for another time. - -Signed-off-by: Dr. David Alan Gilbert ---- - contrib/libvhost-user/libvhost-user.c | 33 ++++++++++++++++++++------- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index fb75837032..164e6d1df8 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -559,6 +559,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) - return false; - } - -+static bool -+map_ring(VuDev *dev, VuVirtq *vq) -+{ -+ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); -+ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); -+ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); -+ -+ DPRINT("Setting virtq addresses:\n"); -+ DPRINT(" vring_desc at %p\n", vq->vring.desc); -+ DPRINT(" vring_used at %p\n", vq->vring.used); -+ DPRINT(" vring_avail at %p\n", vq->vring.avail); -+ -+ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); -+} -+ - static bool - vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) - { -@@ -762,6 +777,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) - close(vmsg->fds[i]); - } - -+ for (i = 0; i < dev->max_queues; i++) { -+ if (dev->vq[i].vring.desc) { -+ if (map_ring(dev, &dev->vq[i])) { -+ vu_panic(dev, "remaping queue %d during setmemtable", i); -+ } -+ } -+ } -+ - return false; - } - -@@ -848,18 +871,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) - DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); - DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); - -+ vq->vra = *vra; - vq->vring.flags = vra->flags; -- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); -- vq->vring.used = qva_to_va(dev, vra->used_user_addr); -- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); - vq->vring.log_guest_addr = vra->log_guest_addr; - -- DPRINT("Setting virtq addresses:\n"); -- DPRINT(" vring_desc at %p\n", vq->vring.desc); -- DPRINT(" vring_used at %p\n", vq->vring.used); -- DPRINT(" vring_avail at %p\n", vq->vring.avail); - -- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { -+ if (map_ring(dev, vq)) { - vu_panic(dev, "Invalid vring_addr message"); - return false; - } -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 1844b6f8d4..5cb7708559 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -327,6 +327,9 @@ typedef struct VuVirtq { - int err_fd; - unsigned int enable; - bool started; -+ -+ /* Guest addresses of our ring */ -+ struct vhost_vring_addr vra; - } VuVirtq; - - enum VuWatchCondtion { --- -2.27.0 - diff --git a/libvhost-user-fix-SLAVE_SEND_FD-handling.patch b/libvhost-user-fix-SLAVE_SEND_FD-handling.patch deleted file mode 100644 index 71cbf7baa7b59006c74a8eadb9b74b10079a9a9d..0000000000000000000000000000000000000000 --- a/libvhost-user-fix-SLAVE_SEND_FD-handling.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 28a9a3558a427493049723fff390add7026653eb Mon Sep 17 00:00:00 2001 -From: Johannes Berg -Date: Tue, 3 Sep 2019 23:04:22 +0300 -Subject: [PATCH] libvhost-user: fix SLAVE_SEND_FD handling - -It doesn't look like this could possibly work properly since -VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD is defined to 10, but the -dev->protocol_features has a bitmap. I suppose the peer this -was tested with also supported VHOST_USER_PROTOCOL_F_LOG_SHMFD, -in which case the test would always be false, but nevertheless -the code seems wrong. - -Use has_feature() to fix this. - -Fixes: d84599f56c82 ("libvhost-user: support host notifier") -Signed-off-by: Johannes Berg -Message-Id: <20190903200422.11693-1-johannes@sipsolutions.net> -Reviewed-by: Tiwei Bie -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8726b70b449896f1211f869ec4f608904f027207) -Signed-off-by: Michael Roth ---- - contrib/libvhost-user/libvhost-user.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index 4b36e35a82..cb5f5770e4 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -1097,7 +1097,8 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, - - vmsg.fd_num = fd_num; - -- if ((dev->protocol_features & VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) == 0) { -+ if (!has_feature(dev->protocol_features, -+ VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { - return false; - } - --- -2.23.0 diff --git a/linux-headers-Add-KVM-Arm-RME-definitions-to-Linux-h.patch b/linux-headers-Add-KVM-Arm-RME-definitions-to-Linux-h.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d1d025fd120761372bee66da399812761d4fa0d --- /dev/null +++ b/linux-headers-Add-KVM-Arm-RME-definitions-to-Linux-h.patch @@ -0,0 +1,178 @@ +From d08cc1efcdf47b6cb3edece889cc36904ccf932d Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Fri, 13 May 2022 09:08:54 +0100 +Subject: [PATCH] linux-headers: Add KVM Arm RME definitions to Linux headers + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b1872e38b35f4e7b820880694ad876c41aabaa85 + +Copy the KVM definitions for Arm RME from the development branch. +Don't merge, they will be added from the periodic Linux header sync. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + linux-headers/asm-arm64/kvm.h + linux-headers/linux/kvm.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + linux-headers/asm-arm64/kvm.h | 60 +++++++++++++++++++++++++++++++++++ + linux-headers/linux/kvm.h | 28 +++++++++++++--- + 2 files changed, 84 insertions(+), 4 deletions(-) + +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index 552fdcb18f..aed56ef371 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -111,6 +111,8 @@ struct kvm_regs { + #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ + #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ + #define KVM_ARM_VCPU_TEC 8 /* VCPU TEC state as part of cvm */ ++#define KVM_ARM_VCPU_HAS_EL2_E2H0 9 /* Limit NV support to E2H RES0 */ ++#define KVM_ARM_VCPU_REC 10 /* VCPU REC state as part of Realm */ + + struct kvm_vcpu_init { + __u32 target; +@@ -366,6 +368,7 @@ enum { + KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0, + }; + ++/* Vendor hyper call function numbers 0-63 */ + #define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) + + enum { +@@ -373,6 +376,14 @@ enum { + KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, + }; + ++/* Vendor hyper call function numbers 64-127 */ ++#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3) ++ ++enum { ++ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0, ++ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1, ++}; ++ + /* Device Control API on vm fd */ + #define KVM_ARM_VM_SMCCC_CTRL 0 + #define KVM_ARM_VM_SMCCC_FILTER 0 +@@ -395,6 +406,7 @@ enum { + #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 + #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 + #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 ++#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 + #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 + #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +@@ -407,6 +419,54 @@ enum { + #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 + #define KVM_DEV_ARM_ITS_CTRL_RESET 4 + ++/* KVM_CAP_ARM_RME on VM fd */ ++#define KVM_CAP_ARM_RME_CONFIG_REALM 0 ++#define KVM_CAP_ARM_RME_CREATE_REALM 1 ++#define KVM_CAP_ARM_RME_INIT_RIPAS_REALM 2 ++#define KVM_CAP_ARM_RME_POPULATE_REALM 3 ++#define KVM_CAP_ARM_RME_ACTIVATE_REALM 4 ++ ++/* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ ++#define ARM_RME_CONFIG_RPV 0 ++#define ARM_RME_CONFIG_HASH_ALGO 1 ++ ++#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256 0 ++#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512 1 ++ ++#define ARM_RME_CONFIG_RPV_SIZE 64 ++ ++struct arm_rme_config { ++ __u32 cfg; ++ union { ++ /* cfg == ARM_RME_CONFIG_RPV */ ++ struct { ++ __u8 rpv[ARM_RME_CONFIG_RPV_SIZE]; ++ }; ++ ++ /* cfg == ARM_RME_CONFIG_HASH_ALGO */ ++ struct { ++ __u32 hash_algo; ++ }; ++ ++ /* Fix the size of the union */ ++ __u8 reserved[256]; ++ }; ++}; ++ ++#define KVM_ARM_RME_POPULATE_FLAGS_MEASURE (1 << 0) ++struct arm_rme_populate_realm { ++ __u64 base; ++ __u64 size; ++ __u32 flags; ++ __u32 reserved[3]; ++}; ++ ++struct arm_rme_init_ripas { ++ __u64 base; ++ __u64 size; ++ __u64 reserved[2]; ++}; ++ + /* Device Control API on vcpu fd */ + #define KVM_ARM_VCPU_PMU_V3_CTRL 0 + #define KVM_ARM_VCPU_PMU_V3_IRQ 0 +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index d3bf7fac00..beb41f7433 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -924,14 +924,25 @@ struct kvm_ppc_resize_hpt { + #define KVM_S390_SIE_PAGE_OFFSET 1 + + /* +- * On arm64, machine type can be used to request the physical +- * address size for the VM. Bits[7-0] are reserved for the guest +- * PA size shift (i.e, log2(PA_Size)). For backward compatibility, +- * value 0 implies the default IPA size, 40bits. ++ * On arm64, machine type can be used to request both the machine type and ++ * the physical address size for the VM. ++ * ++ * Bits[11-8] are reserved for the ARM specific machine type. ++ * ++ * Bits[7-0] are reserved for the guest PA size shift (i.e, log2(PA_Size)). ++ * For backward compatibility, value 0 implies the default IPA size, 40bits. + */ ++#define KVM_VM_TYPE_ARM_SHIFT 8 ++#define KVM_VM_TYPE_ARM_MASK (0xfULL << KVM_VM_TYPE_ARM_SHIFT) ++#define KVM_VM_TYPE_ARM(_type) \ ++ (((_type) << KVM_VM_TYPE_ARM_SHIFT) & KVM_VM_TYPE_ARM_MASK) ++#define KVM_VM_TYPE_ARM_NORMAL KVM_VM_TYPE_ARM(0) ++#define KVM_VM_TYPE_ARM_REALM KVM_VM_TYPE_ARM(1) ++ + #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL + #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ + ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) ++ + /* + * ioctls for /dev/kvm fds: + */ +@@ -1206,6 +1217,8 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 + #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 ++#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 ++#define KVM_CAP_ARM_RME 240 + + #define KVM_CAP_ARM_TMM 300 + +@@ -2451,4 +2464,11 @@ struct kvm_s390_zpci_op { + #define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, uint64_t) + #define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001 + ++/* Available with KVM_CAP_ARM_RME, only for VMs with KVM_VM_TYPE_ARM_REALM */ ++struct kvm_arm_rmm_psci_complete { ++ __u64 target_mpidr; ++ __u32 psci_status; ++ __u32 padding[3]; ++}; ++ + #endif /* __LINUX_KVM_H */ +-- +2.33.0 + diff --git a/linux-headers-Synchronize-linux-headers-from-linux-v.patch b/linux-headers-Synchronize-linux-headers-from-linux-v.patch new file mode 100644 index 0000000000000000000000000000000000000000..53c7260544c09873445f131d7dfd368d13217c7c --- /dev/null +++ b/linux-headers-Synchronize-linux-headers-from-linux-v.patch @@ -0,0 +1,57 @@ +From 280cba84e3eaed10f095f0c88dab27b7799558e5 Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:56 +0800 +Subject: [PATCH] linux-headers: Synchronize linux headers from linux + v6.7.0-rc8 + +Use the scripts/update-linux-headers.sh to synchronize linux +headers from linux v6.7.0-rc8. We mainly want to add the +loongarch linux headers and then add the loongarch kvm support +based on it. + +Signed-off-by: Tianrui Zhao +Acked-by: Song Gao +Message-Id: <20240105075804.1228596-2-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + include/standard-headers/linux/fuse.h | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index 6b9793842c..fc0dcd10ae 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -209,7 +209,7 @@ + * - add FUSE_HAS_EXPIRE_ONLY + * + * 7.39 +- * - add FUSE_DIRECT_IO_RELAX ++ * - add FUSE_DIRECT_IO_ALLOW_MMAP + * - add FUSE_STATX and related structures + */ + +@@ -405,8 +405,7 @@ struct fuse_file_lock { + * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, + * symlink and mknod (single group that matches parent) + * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation +- * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now +- * allow shared mmap ++ * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -445,7 +444,10 @@ struct fuse_file_lock { + #define FUSE_HAS_INODE_DAX (1ULL << 33) + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) + #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) +-#define FUSE_DIRECT_IO_RELAX (1ULL << 36) ++#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) ++ ++/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ ++#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP + + /** + * CUSE INIT request/reply flags +-- +2.27.0 + diff --git a/linux-headers-Update-against-Add-migration-support-f.patch b/linux-headers-Update-against-Add-migration-support-f.patch deleted file mode 100644 index 1bfef98c0c9b6771ccbe2fbd700a233e09cd9baf..0000000000000000000000000000000000000000 --- a/linux-headers-Update-against-Add-migration-support-f.patch +++ /dev/null @@ -1,517 +0,0 @@ -From 7ab9ce4016ec48e0af8010f742ee39fc84342d00 Mon Sep 17 00:00:00 2001 -From: Jinhao Gao -Date: Fri, 23 Jul 2021 14:55:12 +0800 -Subject: [PATCH] linux headers: Update against "Add migration support for VFIO - devices" - -Update linux-headers/linux/vfio.h against Linux 5.9-rc7 for the -VFIO migration support series. - -Signed-off-by: Jinhao Gao -Signed-off-by: Shenming Lu ---- - linux-headers/linux/vfio.h | 420 +++++++++++++++++++++++++++++++++++-- - 1 file changed, 405 insertions(+), 15 deletions(-) - -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index 24f505199f..a90672494d 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -295,15 +295,39 @@ struct vfio_region_info_cap_type { - __u32 subtype; /* type specific */ - }; - -+/* -+ * List of region types, global per bus driver. -+ * If you introduce a new type, please add it here. -+ */ -+ -+/* PCI region type containing a PCI vendor part */ - #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) - #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) -+#define VFIO_REGION_TYPE_GFX (1) -+#define VFIO_REGION_TYPE_CCW (2) -+#define VFIO_REGION_TYPE_MIGRATION (3) -+ -+/* sub-types for VFIO_REGION_TYPE_PCI_* */ - --/* 8086 Vendor sub-types */ -+/* 8086 vendor PCI sub-types */ - #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) - #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) - #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) - --#define VFIO_REGION_TYPE_GFX (1) -+/* 10de vendor PCI sub-types */ -+/* -+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. -+ */ -+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) -+ -+/* 1014 vendor PCI sub-types */ -+/* -+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU -+ * to do TLB invalidation on a GPU. -+ */ -+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) -+ -+/* sub-types for VFIO_REGION_TYPE_GFX */ - #define VFIO_REGION_SUBTYPE_GFX_EDID (1) - - /** -@@ -353,24 +377,237 @@ struct vfio_region_gfx_edid { - #define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 - }; - --#define VFIO_REGION_TYPE_CCW (2) --/* ccw sub-types */ -+/* sub-types for VFIO_REGION_TYPE_CCW */ - #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) -+#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) -+#define VFIO_REGION_SUBTYPE_CCW_CRW (3) - --/* -- * 10de vendor sub-type -- * -- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. -- */ --#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) -+/* sub-types for VFIO_REGION_TYPE_MIGRATION */ -+#define VFIO_REGION_SUBTYPE_MIGRATION (1) - - /* -- * 1014 vendor sub-type -+ * The structure vfio_device_migration_info is placed at the 0th offset of -+ * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related -+ * migration information. Field accesses from this structure are only supported -+ * at their native width and alignment. Otherwise, the result is undefined and -+ * vendor drivers should return an error. - * -- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU -- * to do TLB invalidation on a GPU. -+ * device_state: (read/write) -+ * - The user application writes to this field to inform the vendor driver -+ * about the device state to be transitioned to. -+ * - The vendor driver should take the necessary actions to change the -+ * device state. After successful transition to a given state, the -+ * vendor driver should return success on write(device_state, state) -+ * system call. If the device state transition fails, the vendor driver -+ * should return an appropriate -errno for the fault condition. -+ * - On the user application side, if the device state transition fails, -+ * that is, if write(device_state, state) returns an error, read -+ * device_state again to determine the current state of the device from -+ * the vendor driver. -+ * - The vendor driver should return previous state of the device unless -+ * the vendor driver has encountered an internal error, in which case -+ * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. -+ * - The user application must use the device reset ioctl to recover the -+ * device from VFIO_DEVICE_STATE_ERROR state. If the device is -+ * indicated to be in a valid device state by reading device_state, the -+ * user application may attempt to transition the device to any valid -+ * state reachable from the current state or terminate itself. -+ * -+ * device_state consists of 3 bits: -+ * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, -+ * it indicates the _STOP state. When the device state is changed to -+ * _STOP, driver should stop the device before write() returns. -+ * - If bit 1 is set, it indicates the _SAVING state, which means that the -+ * driver should start gathering device state information that will be -+ * provided to the VFIO user application to save the device's state. -+ * - If bit 2 is set, it indicates the _RESUMING state, which means that -+ * the driver should prepare to resume the device. Data provided through -+ * the migration region should be used to resume the device. -+ * Bits 3 - 31 are reserved for future use. To preserve them, the user -+ * application should perform a read-modify-write operation on this -+ * field when modifying the specified bits. -+ * -+ * +------- _RESUMING -+ * |+------ _SAVING -+ * ||+----- _RUNNING -+ * ||| -+ * 000b => Device Stopped, not saving or resuming -+ * 001b => Device running, which is the default state -+ * 010b => Stop the device & save the device state, stop-and-copy state -+ * 011b => Device running and save the device state, pre-copy state -+ * 100b => Device stopped and the device state is resuming -+ * 101b => Invalid state -+ * 110b => Error state -+ * 111b => Invalid state -+ * -+ * State transitions: -+ * -+ * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP -+ * (100b) (001b) (011b) (010b) (000b) -+ * 0. Running or default state -+ * | -+ * -+ * 1. Normal Shutdown (optional) -+ * |------------------------------------->| -+ * -+ * 2. Save the state or suspend -+ * |------------------------->|---------->| -+ * -+ * 3. Save the state during live migration -+ * |----------->|------------>|---------->| -+ * -+ * 4. Resuming -+ * |<---------| -+ * -+ * 5. Resumed -+ * |--------->| -+ * -+ * 0. Default state of VFIO device is _RUNNNG when the user application starts. -+ * 1. During normal shutdown of the user application, the user application may -+ * optionally change the VFIO device state from _RUNNING to _STOP. This -+ * transition is optional. The vendor driver must support this transition but -+ * must not require it. -+ * 2. When the user application saves state or suspends the application, the -+ * device state transitions from _RUNNING to stop-and-copy and then to _STOP. -+ * On state transition from _RUNNING to stop-and-copy, driver must stop the -+ * device, save the device state and send it to the application through the -+ * migration region. The sequence to be followed for such transition is given -+ * below. -+ * 3. In live migration of user application, the state transitions from _RUNNING -+ * to pre-copy, to stop-and-copy, and to _STOP. -+ * On state transition from _RUNNING to pre-copy, the driver should start -+ * gathering the device state while the application is still running and send -+ * the device state data to application through the migration region. -+ * On state transition from pre-copy to stop-and-copy, the driver must stop -+ * the device, save the device state and send it to the user application -+ * through the migration region. -+ * Vendor drivers must support the pre-copy state even for implementations -+ * where no data is provided to the user before the stop-and-copy state. The -+ * user must not be required to consume all migration data before the device -+ * transitions to a new state, including the stop-and-copy state. -+ * The sequence to be followed for above two transitions is given below. -+ * 4. To start the resuming phase, the device state should be transitioned from -+ * the _RUNNING to the _RESUMING state. -+ * In the _RESUMING state, the driver should use the device state data -+ * received through the migration region to resume the device. -+ * 5. After providing saved device data to the driver, the application should -+ * change the state from _RESUMING to _RUNNING. -+ * -+ * reserved: -+ * Reads on this field return zero and writes are ignored. -+ * -+ * pending_bytes: (read only) -+ * The number of pending bytes still to be migrated from the vendor driver. -+ * -+ * data_offset: (read only) -+ * The user application should read data_offset field from the migration -+ * region. The user application should read the device data from this -+ * offset within the migration region during the _SAVING state or write -+ * the device data during the _RESUMING state. See below for details of -+ * sequence to be followed. -+ * -+ * data_size: (read/write) -+ * The user application should read data_size to get the size in bytes of -+ * the data copied in the migration region during the _SAVING state and -+ * write the size in bytes of the data copied in the migration region -+ * during the _RESUMING state. -+ * -+ * The format of the migration region is as follows: -+ * ------------------------------------------------------------------ -+ * |vfio_device_migration_info| data section | -+ * | | /////////////////////////////// | -+ * ------------------------------------------------------------------ -+ * ^ ^ -+ * offset 0-trapped part data_offset -+ * -+ * The structure vfio_device_migration_info is always followed by the data -+ * section in the region, so data_offset will always be nonzero. The offset -+ * from where the data is copied is decided by the kernel driver. The data -+ * section can be trapped, mmapped, or partitioned, depending on how the kernel -+ * driver defines the data section. The data section partition can be defined -+ * as mapped by the sparse mmap capability. If mmapped, data_offset must be -+ * page aligned, whereas initial section which contains the -+ * vfio_device_migration_info structure, might not end at the offset, which is -+ * page aligned. The user is not required to access through mmap regardless -+ * of the capabilities of the region mmap. -+ * The vendor driver should determine whether and how to partition the data -+ * section. The vendor driver should return data_offset accordingly. -+ * -+ * The sequence to be followed while in pre-copy state and stop-and-copy state -+ * is as follows: -+ * a. Read pending_bytes, indicating the start of a new iteration to get device -+ * data. Repeated read on pending_bytes at this stage should have no side -+ * effects. -+ * If pending_bytes == 0, the user application should not iterate to get data -+ * for that device. -+ * If pending_bytes > 0, perform the following steps. -+ * b. Read data_offset, indicating that the vendor driver should make data -+ * available through the data section. The vendor driver should return this -+ * read operation only after data is available from (region + data_offset) -+ * to (region + data_offset + data_size). -+ * c. Read data_size, which is the amount of data in bytes available through -+ * the migration region. -+ * Read on data_offset and data_size should return the offset and size of -+ * the current buffer if the user application reads data_offset and -+ * data_size more than once here. -+ * d. Read data_size bytes of data from (region + data_offset) from the -+ * migration region. -+ * e. Process the data. -+ * f. Read pending_bytes, which indicates that the data from the previous -+ * iteration has been read. If pending_bytes > 0, go to step b. -+ * -+ * The user application can transition from the _SAVING|_RUNNING -+ * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the -+ * number of pending bytes. The user application should iterate in _SAVING -+ * (stop-and-copy) until pending_bytes is 0. -+ * -+ * The sequence to be followed while _RESUMING device state is as follows: -+ * While data for this device is available, repeat the following steps: -+ * a. Read data_offset from where the user application should write data. -+ * b. Write migration data starting at the migration region + data_offset for -+ * the length determined by data_size from the migration source. -+ * c. Write data_size, which indicates to the vendor driver that data is -+ * written in the migration region. Vendor driver must return this write -+ * operations on consuming data. Vendor driver should apply the -+ * user-provided migration region data to the device resume state. -+ * -+ * If an error occurs during the above sequences, the vendor driver can return -+ * an error code for next read() or write() operation, which will terminate the -+ * loop. The user application should then take the next necessary action, for -+ * example, failing migration or terminating the user application. -+ * -+ * For the user application, data is opaque. The user application should write -+ * data in the same order as the data is received and the data should be of -+ * same transaction size at the source. - */ --#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) -+ -+struct vfio_device_migration_info { -+ __u32 device_state; /* VFIO device state */ -+#define VFIO_DEVICE_STATE_STOP (0) -+#define VFIO_DEVICE_STATE_RUNNING (1 << 0) -+#define VFIO_DEVICE_STATE_SAVING (1 << 1) -+#define VFIO_DEVICE_STATE_RESUMING (1 << 2) -+#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ -+ VFIO_DEVICE_STATE_SAVING | \ -+ VFIO_DEVICE_STATE_RESUMING) -+ -+#define VFIO_DEVICE_STATE_VALID(state) \ -+ (state & VFIO_DEVICE_STATE_RESUMING ? \ -+ (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) -+ -+#define VFIO_DEVICE_STATE_IS_ERROR(state) \ -+ ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ -+ VFIO_DEVICE_STATE_RESUMING)) -+ -+#define VFIO_DEVICE_STATE_SET_ERROR(state) \ -+ ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ -+ VFIO_DEVICE_STATE_RESUMING) -+ -+ __u32 reserved; -+ __u64 pending_bytes; -+ __u64 data_offset; -+ __u64 data_size; -+}; - - /* - * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped -@@ -570,6 +807,7 @@ enum { - - enum { - VFIO_CCW_IO_IRQ_INDEX, -+ VFIO_CCW_CRW_IRQ_INDEX, - VFIO_CCW_NUM_IRQS - }; - -@@ -700,6 +938,43 @@ struct vfio_device_ioeventfd { - - #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) - -+/** -+ * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, -+ * struct vfio_device_feature) -+ * -+ * Get, set, or probe feature data of the device. The feature is selected -+ * using the FEATURE_MASK portion of the flags field. Support for a feature -+ * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe -+ * may optionally include the GET and/or SET bits to determine read vs write -+ * access of the feature respectively. Probing a feature will return success -+ * if the feature is supported and all of the optionally indicated GET/SET -+ * methods are supported. The format of the data portion of the structure is -+ * specific to the given feature. The data portion is not required for -+ * probing. GET and SET are mutually exclusive, except for use with PROBE. -+ * -+ * Return 0 on success, -errno on failure. -+ */ -+struct vfio_device_feature { -+ __u32 argsz; -+ __u32 flags; -+#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ -+#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ -+#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ -+#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ -+ __u8 data[]; -+}; -+ -+#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) -+ -+/* -+ * Provide support for setting a PCI VF Token, which is used as a shared -+ * secret between PF and VF drivers. This feature may only be set on a -+ * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing -+ * open VFs. Data provided when setting this feature is a 16-byte array -+ * (__u8 b[16]), representing a UUID. -+ */ -+#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) -+ - /* -------- API for Type1 VFIO IOMMU -------- */ - - /** -@@ -714,7 +989,54 @@ struct vfio_iommu_type1_info { - __u32 argsz; - __u32 flags; - #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ -- __u64 iova_pgsizes; /* Bitmap of supported page sizes */ -+#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ -+ __u64 iova_pgsizes; /* Bitmap of supported page sizes */ -+ __u32 cap_offset; /* Offset within info struct of first cap */ -+}; -+ -+/* -+ * The IOVA capability allows to report the valid IOVA range(s) -+ * excluding any non-relaxable reserved regions exposed by -+ * devices attached to the container. Any DMA map attempt -+ * outside the valid iova range will return error. -+ * -+ * The structures below define version 1 of this capability. -+ */ -+#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 -+ -+struct vfio_iova_range { -+ __u64 start; -+ __u64 end; -+}; -+ -+struct vfio_iommu_type1_info_cap_iova_range { -+ struct vfio_info_cap_header header; -+ __u32 nr_iovas; -+ __u32 reserved; -+ struct vfio_iova_range iova_ranges[]; -+}; -+ -+/* -+ * The migration capability allows to report supported features for migration. -+ * -+ * The structures below define version 1 of this capability. -+ * -+ * The existence of this capability indicates that IOMMU kernel driver supports -+ * dirty page logging. -+ * -+ * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty -+ * page logging. -+ * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap -+ * size in bytes that can be used by user applications when getting the dirty -+ * bitmap. -+ */ -+#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 -+ -+struct vfio_iommu_type1_info_cap_migration { -+ struct vfio_info_cap_header header; -+ __u32 flags; -+ __u64 pgsize_bitmap; -+ __u64 max_dirty_bitmap_size; /* in bytes */ - }; - - #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) -@@ -737,6 +1059,12 @@ struct vfio_iommu_type1_dma_map { - - #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) - -+struct vfio_bitmap { -+ __u64 pgsize; /* page size for bitmap in bytes */ -+ __u64 size; /* in bytes */ -+ __u64 *data; /* one bit per page */ -+}; -+ - /** - * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, - * struct vfio_dma_unmap) -@@ -746,12 +1074,23 @@ struct vfio_iommu_type1_dma_map { - * field. No guarantee is made to the user that arbitrary unmaps of iova - * or size different from those used in the original mapping call will - * succeed. -+ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap -+ * before unmapping IO virtual addresses. When this flag is set, the user must -+ * provide a struct vfio_bitmap in data[]. User must provide zero-allocated -+ * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. -+ * A bit in the bitmap represents one page, of user provided page size in -+ * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set -+ * indicates that the page at that offset from iova is dirty. A Bitmap of the -+ * pages in the range of unmapped size is returned in the user-provided -+ * vfio_bitmap.data. - */ - struct vfio_iommu_type1_dma_unmap { - __u32 argsz; - __u32 flags; -+#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) - __u64 iova; /* IO virtual address */ - __u64 size; /* Size of mapping (bytes) */ -+ __u8 data[]; - }; - - #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) -@@ -763,6 +1102,57 @@ struct vfio_iommu_type1_dma_unmap { - #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) - #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) - -+/** -+ * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, -+ * struct vfio_iommu_type1_dirty_bitmap) -+ * IOCTL is used for dirty pages logging. -+ * Caller should set flag depending on which operation to perform, details as -+ * below: -+ * -+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs -+ * the IOMMU driver to log pages that are dirtied or potentially dirtied by -+ * the device; designed to be used when a migration is in progress. Dirty pages -+ * are logged until logging is disabled by user application by calling the IOCTL -+ * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. -+ * -+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs -+ * the IOMMU driver to stop logging dirtied pages. -+ * -+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set -+ * returns the dirty pages bitmap for IOMMU container for a given IOVA range. -+ * The user must specify the IOVA range and the pgsize through the structure -+ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface -+ * supports getting a bitmap of the smallest supported pgsize only and can be -+ * modified in future to get a bitmap of any specified supported pgsize. The -+ * user must provide a zeroed memory area for the bitmap memory and specify its -+ * size in bitmap.size. One bit is used to represent one page consecutively -+ * starting from iova offset. The user should provide page size in bitmap.pgsize -+ * field. A bit set in the bitmap indicates that the page at that offset from -+ * iova is dirty. The caller must set argsz to a value including the size of -+ * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the -+ * actual bitmap. If dirty pages logging is not enabled, an error will be -+ * returned. -+ * -+ * Only one of the flags _START, _STOP and _GET may be specified at a time. -+ * -+ */ -+struct vfio_iommu_type1_dirty_bitmap { -+ __u32 argsz; -+ __u32 flags; -+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) -+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) -+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) -+ __u8 data[]; -+}; -+ -+struct vfio_iommu_type1_dirty_bitmap_get { -+ __u64 iova; /* IO virtual address */ -+ __u64 size; /* Size of iova range */ -+ struct vfio_bitmap bitmap; -+}; -+ -+#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) -+ - /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ - - /* --- -2.27.0 - diff --git a/linux-headers-Update-to-Linux-v6.7-rc5.patch b/linux-headers-Update-to-Linux-v6.7-rc5.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a740ab691ca0dd4f91840ac3f817d9751e9a126 --- /dev/null +++ b/linux-headers-Update-to-Linux-v6.7-rc5.patch @@ -0,0 +1,1109 @@ +From 9904eb7d4559baca2da713346cd505a80af7e776 Mon Sep 17 00:00:00 2001 +From: Daniel Henrique Barboza +Date: Mon, 18 Dec 2023 17:43:18 -0300 +Subject: [PATCH] linux-headers: Update to Linux v6.7-rc5 + +We'll add a new RISC-V linux-header file, but first let's update all +headers. + +Headers for 'asm-loongarch' were added in this update. + +Signed-off-by: Daniel Henrique Barboza +Acked-by: Alistair Francis +Message-ID: <20231218204321.75757-2-dbarboza@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + include/standard-headers/drm/drm_fourcc.h | 2 + + include/standard-headers/linux/pci_regs.h | 24 ++- + include/standard-headers/linux/vhost_types.h | 7 + + .../standard-headers/linux/virtio_config.h | 5 + + include/standard-headers/linux/virtio_pci.h | 11 ++ + linux-headers/asm-arm64/kvm.h | 32 ++++ + linux-headers/asm-generic/unistd.h | 14 +- + linux-headers/asm-loongarch/bitsperlong.h | 1 + + linux-headers/asm-loongarch/kvm.h | 108 +++++++++++ + linux-headers/asm-loongarch/mman.h | 1 + + linux-headers/asm-loongarch/unistd.h | 5 + + linux-headers/asm-mips/unistd_n32.h | 4 + + linux-headers/asm-mips/unistd_n64.h | 4 + + linux-headers/asm-mips/unistd_o32.h | 4 + + linux-headers/asm-powerpc/unistd_32.h | 4 + + linux-headers/asm-powerpc/unistd_64.h | 4 + + linux-headers/asm-riscv/kvm.h | 12 ++ + linux-headers/asm-s390/unistd_32.h | 4 + + linux-headers/asm-s390/unistd_64.h | 4 + + linux-headers/asm-x86/unistd_32.h | 4 + + linux-headers/asm-x86/unistd_64.h | 3 + + linux-headers/asm-x86/unistd_x32.h | 3 + + linux-headers/linux/iommufd.h | 180 +++++++++++++++++- + linux-headers/linux/kvm.h | 11 ++ + linux-headers/linux/psp-sev.h | 1 + + linux-headers/linux/stddef.h | 9 +- + linux-headers/linux/userfaultfd.h | 9 +- + linux-headers/linux/vfio.h | 47 +++-- + linux-headers/linux/vhost.h | 8 + + 29 files changed, 498 insertions(+), 27 deletions(-) + create mode 100644 linux-headers/asm-loongarch/bitsperlong.h + create mode 100644 linux-headers/asm-loongarch/kvm.h + create mode 100644 linux-headers/asm-loongarch/mman.h + create mode 100644 linux-headers/asm-loongarch/unistd.h + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 72279f4d25..3afb70160f 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -322,6 +322,8 @@ extern "C" { + * index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian + */ + #define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */ ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') /* 2x1 subsampled Cr:Cb plane */ ++#define DRM_FORMAT_NV30 fourcc_code('N', 'V', '3', '0') /* non-subsampled Cr:Cb plane */ + + /* + * 2 plane YCbCr MSB aligned +diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h +index e5f558d964..a39193213f 100644 +--- a/include/standard-headers/linux/pci_regs.h ++++ b/include/standard-headers/linux/pci_regs.h +@@ -80,6 +80,7 @@ + #define PCI_HEADER_TYPE_NORMAL 0 + #define PCI_HEADER_TYPE_BRIDGE 1 + #define PCI_HEADER_TYPE_CARDBUS 2 ++#define PCI_HEADER_TYPE_MFD 0x80 /* Multi-Function Device (possible) */ + + #define PCI_BIST 0x0f /* 8 bits */ + #define PCI_BIST_CODE_MASK 0x0f /* Return result */ +@@ -637,6 +638,7 @@ + #define PCI_EXP_RTCAP 0x1e /* Root Capabilities */ + #define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ + #define PCI_EXP_RTSTA 0x20 /* Root Status */ ++#define PCI_EXP_RTSTA_PME_RQ_ID 0x0000ffff /* PME Requester ID */ + #define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ + #define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ + /* +@@ -930,12 +932,13 @@ + + /* Process Address Space ID */ + #define PCI_PASID_CAP 0x04 /* PASID feature register */ +-#define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ +-#define PCI_PASID_CAP_PRIV 0x04 /* Privilege Mode Supported */ ++#define PCI_PASID_CAP_EXEC 0x0002 /* Exec permissions Supported */ ++#define PCI_PASID_CAP_PRIV 0x0004 /* Privilege Mode Supported */ ++#define PCI_PASID_CAP_WIDTH 0x1f00 + #define PCI_PASID_CTRL 0x06 /* PASID control register */ +-#define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ +-#define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ +-#define PCI_PASID_CTRL_PRIV 0x04 /* Privilege Mode Enable */ ++#define PCI_PASID_CTRL_ENABLE 0x0001 /* Enable bit */ ++#define PCI_PASID_CTRL_EXEC 0x0002 /* Exec permissions Enable */ ++#define PCI_PASID_CTRL_PRIV 0x0004 /* Privilege Mode Enable */ + #define PCI_EXT_CAP_PASID_SIZEOF 8 + + /* Single Root I/O Virtualization */ +@@ -975,6 +978,8 @@ + #define PCI_LTR_VALUE_MASK 0x000003ff + #define PCI_LTR_SCALE_MASK 0x00001c00 + #define PCI_LTR_SCALE_SHIFT 10 ++#define PCI_LTR_NOSNOOP_VALUE 0x03ff0000 /* Max No-Snoop Latency Value */ ++#define PCI_LTR_NOSNOOP_SCALE 0x1c000000 /* Scale for Max Value */ + #define PCI_EXT_CAP_LTR_SIZEOF 8 + + /* Access Control Service */ +@@ -1042,9 +1047,16 @@ + #define PCI_EXP_DPC_STATUS 0x08 /* DPC Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR 0x0000 /* Uncorrectable error */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE 0x0002 /* Rcvd ERR_NONFATAL */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE 0x0004 /* Rcvd ERR_FATAL */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT 0x0006 /* Reason in Trig Reason Extension field */ + #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ + #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO 0x0000 /* RP PIO error */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER 0x0020 /* DPC SW Trigger bit */ ++#define PCI_EXP_DPC_RP_PIO_FEP 0x1f00 /* RP PIO First Err Ptr */ + + #define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ + +@@ -1088,6 +1100,8 @@ + #define PCI_L1SS_CTL1_LTR_L12_TH_VALUE 0x03ff0000 /* LTR_L1.2_THRESHOLD_Value */ + #define PCI_L1SS_CTL1_LTR_L12_TH_SCALE 0xe0000000 /* LTR_L1.2_THRESHOLD_Scale */ + #define PCI_L1SS_CTL2 0x0c /* Control 2 Register */ ++#define PCI_L1SS_CTL2_T_PWR_ON_SCALE 0x00000003 /* T_POWER_ON Scale */ ++#define PCI_L1SS_CTL2_T_PWR_ON_VALUE 0x000000f8 /* T_POWER_ON Value */ + + /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ + #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h +index 5ad07e134a..fd54044936 100644 +--- a/include/standard-headers/linux/vhost_types.h ++++ b/include/standard-headers/linux/vhost_types.h +@@ -185,5 +185,12 @@ struct vhost_vdpa_iova_range { + * DRIVER_OK + */ + #define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6 ++/* Device may expose the virtqueue's descriptor area, driver area and ++ * device area to a different group for ASID binding than where its ++ * buffers may reside. Requires VHOST_BACKEND_F_IOTLB_ASID. ++ */ ++#define VHOST_BACKEND_F_DESC_ASID 0x7 ++/* IOTLB don't flush memory mapping across device reset */ ++#define VHOST_BACKEND_F_IOTLB_PERSIST 0x8 + + #endif +diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h +index 8a7d0dc8b0..bfd1ca643e 100644 +--- a/include/standard-headers/linux/virtio_config.h ++++ b/include/standard-headers/linux/virtio_config.h +@@ -103,6 +103,11 @@ + */ + #define VIRTIO_F_NOTIFICATION_DATA 38 + ++/* This feature indicates that the driver uses the data provided by the device ++ * as a virtqueue identifier in available buffer notifications. ++ */ ++#define VIRTIO_F_NOTIF_CONFIG_DATA 39 ++ + /* + * This feature indicates that the driver can reset a queue individually. + */ +diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h +index be912cfc95..b7fdfd0668 100644 +--- a/include/standard-headers/linux/virtio_pci.h ++++ b/include/standard-headers/linux/virtio_pci.h +@@ -166,6 +166,17 @@ struct virtio_pci_common_cfg { + uint32_t queue_used_hi; /* read-write */ + }; + ++/* ++ * Warning: do not use sizeof on this: use offsetofend for ++ * specific fields you need. ++ */ ++struct virtio_pci_modern_common_cfg { ++ struct virtio_pci_common_cfg cfg; ++ ++ uint16_t queue_notify_data; /* read-write */ ++ uint16_t queue_reset; /* read-write */ ++}; ++ + /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ + struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index 38e5957526..c59ea55cd8 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -491,6 +491,38 @@ struct kvm_smccc_filter { + #define KVM_HYPERCALL_EXIT_SMC (1U << 0) + #define KVM_HYPERCALL_EXIT_16BIT (1U << 1) + ++/* ++ * Get feature ID registers userspace writable mask. ++ * ++ * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model ++ * Feature Register 2"): ++ * ++ * "The Feature ID space is defined as the System register space in ++ * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, ++ * op2=={0-7}." ++ * ++ * This covers all currently known R/O registers that indicate ++ * anything useful feature wise, including the ID registers. ++ * ++ * If we ever need to introduce a new range, it will be described as ++ * such in the range field. ++ */ ++#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \ ++ ({ \ ++ __u64 __op1 = (op1) & 3; \ ++ __op1 -= (__op1 == 3); \ ++ (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \ ++ }) ++ ++#define KVM_ARM_FEATURE_ID_RANGE 0 ++#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) ++ ++struct reg_mask_range { ++ __u64 addr; /* Pointer to mask array */ ++ __u32 range; /* Requested range */ ++ __u32 reserved[13]; ++}; ++ + #endif + + #endif /* __ARM_KVM_H__ */ +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index abe087c53b..756b013fb8 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -71,7 +71,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr) + #define __NR_getcwd 17 + __SYSCALL(__NR_getcwd, sys_getcwd) + #define __NR_lookup_dcookie 18 +-__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie) ++__SYSCALL(__NR_lookup_dcookie, sys_ni_syscall) + #define __NR_eventfd2 19 + __SYSCALL(__NR_eventfd2, sys_eventfd2) + #define __NR_epoll_create1 20 +@@ -816,15 +816,21 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) + __SYSCALL(__NR_futex_waitv, sys_futex_waitv) + #define __NR_set_mempolicy_home_node 450 + __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +- + #define __NR_cachestat 451 + __SYSCALL(__NR_cachestat, sys_cachestat) +- + #define __NR_fchmodat2 452 + __SYSCALL(__NR_fchmodat2, sys_fchmodat2) ++#define __NR_map_shadow_stack 453 ++__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack) ++#define __NR_futex_wake 454 ++__SYSCALL(__NR_futex_wake, sys_futex_wake) ++#define __NR_futex_wait 455 ++__SYSCALL(__NR_futex_wait, sys_futex_wait) ++#define __NR_futex_requeue 456 ++__SYSCALL(__NR_futex_requeue, sys_futex_requeue) + + #undef __NR_syscalls +-#define __NR_syscalls 453 ++#define __NR_syscalls 457 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-loongarch/bitsperlong.h b/linux-headers/asm-loongarch/bitsperlong.h +new file mode 100644 +index 0000000000..6dc0bb0c13 +--- /dev/null ++++ b/linux-headers/asm-loongarch/bitsperlong.h +@@ -0,0 +1 @@ ++#include +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +new file mode 100644 +index 0000000000..c6ad2ee610 +--- /dev/null ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -0,0 +1,108 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited ++ */ ++ ++#ifndef __UAPI_ASM_LOONGARCH_KVM_H ++#define __UAPI_ASM_LOONGARCH_KVM_H ++ ++#include ++ ++/* ++ * KVM LoongArch specific structures and definitions. ++ * ++ * Some parts derived from the x86 version of this file. ++ */ ++ ++#define __KVM_HAVE_READONLY_MEM ++ ++#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 ++#define KVM_DIRTY_LOG_PAGE_OFFSET 64 ++ ++/* ++ * for KVM_GET_REGS and KVM_SET_REGS ++ */ ++struct kvm_regs { ++ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ ++ __u64 gpr[32]; ++ __u64 pc; ++}; ++ ++/* ++ * for KVM_GET_FPU and KVM_SET_FPU ++ */ ++struct kvm_fpu { ++ __u32 fcsr; ++ __u64 fcc; /* 8x8 */ ++ struct kvm_fpureg { ++ __u64 val64[4]; ++ } fpr[32]; ++}; ++ ++/* ++ * For LoongArch, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various ++ * registers. The id field is broken down as follows: ++ * ++ * bits[63..52] - As per linux/kvm.h ++ * bits[51..32] - Must be zero. ++ * bits[31..16] - Register set. ++ * ++ * Register set = 0: GP registers from kvm_regs (see definitions below). ++ * ++ * Register set = 1: CSR registers. ++ * ++ * Register set = 2: KVM specific registers (see definitions below). ++ * ++ * Register set = 3: FPU / SIMD registers (see definitions below). ++ * ++ * Other sets registers may be added in the future. Each set would ++ * have its own identifier in bits[31..16]. ++ */ ++ ++#define KVM_REG_LOONGARCH_GPR (KVM_REG_LOONGARCH | 0x00000ULL) ++#define KVM_REG_LOONGARCH_CSR (KVM_REG_LOONGARCH | 0x10000ULL) ++#define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL) ++#define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL) ++#define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL) ++#define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL) ++#define KVM_CSR_IDX_MASK 0x7fff ++#define KVM_CPUCFG_IDX_MASK 0x7fff ++ ++/* ++ * KVM_REG_LOONGARCH_KVM - KVM specific control registers. ++ */ ++ ++#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) ++#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++ ++#define LOONGARCH_REG_SHIFT 3 ++#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) ++#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) ++#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) ++ ++struct kvm_debug_exit_arch { ++}; ++ ++/* for KVM_SET_GUEST_DEBUG */ ++struct kvm_guest_debug_arch { ++}; ++ ++/* definition of registers in kvm_run */ ++struct kvm_sync_regs { ++}; ++ ++/* dummy definition */ ++struct kvm_sregs { ++}; ++ ++struct kvm_iocsr_entry { ++ __u32 addr; ++ __u32 pad; ++ __u64 data; ++}; ++ ++#define KVM_NR_IRQCHIPS 1 ++#define KVM_IRQCHIP_NUM_PINS 64 ++#define KVM_MAX_CORES 256 ++ ++#endif /* __UAPI_ASM_LOONGARCH_KVM_H */ +diff --git a/linux-headers/asm-loongarch/mman.h b/linux-headers/asm-loongarch/mman.h +new file mode 100644 +index 0000000000..8eebf89f5a +--- /dev/null ++++ b/linux-headers/asm-loongarch/mman.h +@@ -0,0 +1 @@ ++#include +diff --git a/linux-headers/asm-loongarch/unistd.h b/linux-headers/asm-loongarch/unistd.h +new file mode 100644 +index 0000000000..fcb668984f +--- /dev/null ++++ b/linux-headers/asm-loongarch/unistd.h +@@ -0,0 +1,5 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#define __ARCH_WANT_SYS_CLONE ++#define __ARCH_WANT_SYS_CLONE3 ++ ++#include +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 46d8500654..994b6f008f 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -381,5 +381,9 @@ + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) + #define __NR_fchmodat2 (__NR_Linux + 452) ++#define __NR_map_shadow_stack (__NR_Linux + 453) ++#define __NR_futex_wake (__NR_Linux + 454) ++#define __NR_futex_wait (__NR_Linux + 455) ++#define __NR_futex_requeue (__NR_Linux + 456) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index c2f7ac673b..41dcf5877a 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -357,5 +357,9 @@ + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) + #define __NR_fchmodat2 (__NR_Linux + 452) ++#define __NR_map_shadow_stack (__NR_Linux + 453) ++#define __NR_futex_wake (__NR_Linux + 454) ++#define __NR_futex_wait (__NR_Linux + 455) ++#define __NR_futex_requeue (__NR_Linux + 456) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index 757c68f2ad..ae9d334d96 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -427,5 +427,9 @@ + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) + #define __NR_fchmodat2 (__NR_Linux + 452) ++#define __NR_map_shadow_stack (__NR_Linux + 453) ++#define __NR_futex_wake (__NR_Linux + 454) ++#define __NR_futex_wait (__NR_Linux + 455) ++#define __NR_futex_requeue (__NR_Linux + 456) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index 8ef94bbac1..b9b23d66d7 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -434,6 +434,10 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index 0e7ee43e88..cbb4b3e8f7 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -406,6 +406,10 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 992c5e4071..60d3b21dea 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -80,6 +80,7 @@ struct kvm_riscv_csr { + unsigned long sip; + unsigned long satp; + unsigned long scounteren; ++ unsigned long senvcfg; + }; + + /* AIA CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +@@ -93,6 +94,11 @@ struct kvm_riscv_aia_csr { + unsigned long iprio2h; + }; + ++/* Smstateen CSR for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ ++struct kvm_riscv_smstateen_csr { ++ unsigned long sstateen0; ++}; ++ + /* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ + struct kvm_riscv_timer { + __u64 frequency; +@@ -131,6 +137,8 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZICSR, + KVM_RISCV_ISA_EXT_ZIFENCEI, + KVM_RISCV_ISA_EXT_ZIHPM, ++ KVM_RISCV_ISA_EXT_SMSTATEEN, ++ KVM_RISCV_ISA_EXT_ZICOND, + KVM_RISCV_ISA_EXT_MAX, + }; + +@@ -148,6 +156,7 @@ enum KVM_RISCV_SBI_EXT_ID { + KVM_RISCV_SBI_EXT_PMU, + KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_RISCV_SBI_EXT_VENDOR, ++ KVM_RISCV_SBI_EXT_DBCN, + KVM_RISCV_SBI_EXT_MAX, + }; + +@@ -178,10 +187,13 @@ enum KVM_RISCV_SBI_EXT_ID { + #define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT) + #define KVM_REG_RISCV_CSR_GENERAL (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) + #define KVM_REG_RISCV_CSR_AIA (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) ++#define KVM_REG_RISCV_CSR_SMSTATEEN (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT) + #define KVM_REG_RISCV_CSR_REG(name) \ + (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long)) + #define KVM_REG_RISCV_CSR_AIA_REG(name) \ + (offsetof(struct kvm_riscv_aia_csr, name) / sizeof(unsigned long)) ++#define KVM_REG_RISCV_CSR_SMSTATEEN_REG(name) \ ++ (offsetof(struct kvm_riscv_smstateen_csr, name) / sizeof(unsigned long)) + + /* Timer registers are mapped as type 4 */ + #define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT) +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index 716fa368ca..c093e6d5f9 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -425,5 +425,9 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index b2a11b1d13..114c0569a4 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -373,5 +373,9 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index d749ad1c24..329649c377 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -443,6 +443,10 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index cea67282eb..4583606ce6 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -366,6 +366,9 @@ + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 + #define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index 5b2e79bf4c..146d74d8e4 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -318,6 +318,9 @@ + #define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) + #define __NR_cachestat (__X32_SYSCALL_BIT + 451) + #define __NR_fchmodat2 (__X32_SYSCALL_BIT + 452) ++#define __NR_futex_wake (__X32_SYSCALL_BIT + 454) ++#define __NR_futex_wait (__X32_SYSCALL_BIT + 455) ++#define __NR_futex_requeue (__X32_SYSCALL_BIT + 456) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h +index 218bf7ac98..806d98d09c 100644 +--- a/linux-headers/linux/iommufd.h ++++ b/linux-headers/linux/iommufd.h +@@ -47,6 +47,8 @@ enum { + IOMMUFD_CMD_VFIO_IOAS, + IOMMUFD_CMD_HWPT_ALLOC, + IOMMUFD_CMD_GET_HW_INFO, ++ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, ++ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, + }; + + /** +@@ -347,20 +349,86 @@ struct iommu_vfio_ioas { + }; + #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) + ++/** ++ * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation ++ * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as ++ * the parent HWPT in a nesting configuration. ++ * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is ++ * enforced on device attachment ++ */ ++enum iommufd_hwpt_alloc_flags { ++ IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, ++ IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, ++}; ++ ++/** ++ * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table ++ * entry attributes ++ * @IOMMU_VTD_S1_SRE: Supervisor request ++ * @IOMMU_VTD_S1_EAFE: Extended access enable ++ * @IOMMU_VTD_S1_WPE: Write protect enable ++ */ ++enum iommu_hwpt_vtd_s1_flags { ++ IOMMU_VTD_S1_SRE = 1 << 0, ++ IOMMU_VTD_S1_EAFE = 1 << 1, ++ IOMMU_VTD_S1_WPE = 1 << 2, ++}; ++ ++/** ++ * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table ++ * info (IOMMU_HWPT_DATA_VTD_S1) ++ * @flags: Combination of enum iommu_hwpt_vtd_s1_flags ++ * @pgtbl_addr: The base address of the stage-1 page table. ++ * @addr_width: The address width of the stage-1 page table ++ * @__reserved: Must be 0 ++ */ ++struct iommu_hwpt_vtd_s1 { ++ __aligned_u64 flags; ++ __aligned_u64 pgtbl_addr; ++ __u32 addr_width; ++ __u32 __reserved; ++}; ++ ++/** ++ * enum iommu_hwpt_data_type - IOMMU HWPT Data Type ++ * @IOMMU_HWPT_DATA_NONE: no data ++ * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table ++ */ ++enum iommu_hwpt_data_type { ++ IOMMU_HWPT_DATA_NONE, ++ IOMMU_HWPT_DATA_VTD_S1, ++}; ++ + /** + * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) + * @size: sizeof(struct iommu_hwpt_alloc) +- * @flags: Must be 0 ++ * @flags: Combination of enum iommufd_hwpt_alloc_flags + * @dev_id: The device to allocate this HWPT for +- * @pt_id: The IOAS to connect this HWPT to ++ * @pt_id: The IOAS or HWPT to connect this HWPT to + * @out_hwpt_id: The ID of the new HWPT + * @__reserved: Must be 0 ++ * @data_type: One of enum iommu_hwpt_data_type ++ * @data_len: Length of the type specific data ++ * @data_uptr: User pointer to the type specific data + * + * Explicitly allocate a hardware page table object. This is the same object + * type that is returned by iommufd_device_attach() and represents the + * underlying iommu driver's iommu_domain kernel object. + * +- * A HWPT will be created with the IOVA mappings from the given IOAS. ++ * A kernel-managed HWPT will be created with the mappings from the given ++ * IOAS via the @pt_id. The @data_type for this allocation must be set to ++ * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a ++ * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. ++ * ++ * A user-managed nested HWPT will be created from a given parent HWPT via ++ * @pt_id, in which the parent HWPT must be allocated previously via the ++ * same ioctl from a given IOAS (@pt_id). In this case, the @data_type ++ * must be set to a pre-defined type corresponding to an I/O page table ++ * type supported by the underlying IOMMU hardware. ++ * ++ * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and ++ * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr ++ * must be given. + */ + struct iommu_hwpt_alloc { + __u32 size; +@@ -369,13 +437,26 @@ struct iommu_hwpt_alloc { + __u32 pt_id; + __u32 out_hwpt_id; + __u32 __reserved; ++ __u32 data_type; ++ __u32 data_len; ++ __aligned_u64 data_uptr; + }; + #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) + ++/** ++ * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info ++ * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings ++ * on a nested_parent domain. ++ * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html ++ */ ++enum iommu_hw_info_vtd_flags { ++ IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0, ++}; ++ + /** + * struct iommu_hw_info_vtd - Intel VT-d hardware information + * +- * @flags: Must be 0 ++ * @flags: Combination of enum iommu_hw_info_vtd_flags + * @__reserved: Must be 0 + * + * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec +@@ -404,6 +485,20 @@ enum iommu_hw_info_type { + IOMMU_HW_INFO_TYPE_INTEL_VTD, + }; + ++/** ++ * enum iommufd_hw_capabilities ++ * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking ++ * If available, it means the following APIs ++ * are supported: ++ * ++ * IOMMU_HWPT_GET_DIRTY_BITMAP ++ * IOMMU_HWPT_SET_DIRTY_TRACKING ++ * ++ */ ++enum iommufd_hw_capabilities { ++ IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, ++}; ++ + /** + * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) + * @size: sizeof(struct iommu_hw_info) +@@ -415,6 +510,8 @@ enum iommu_hw_info_type { + * the iommu type specific hardware information data + * @out_data_type: Output the iommu hardware info type as defined in the enum + * iommu_hw_info_type. ++ * @out_capabilities: Output the generic iommu capability info type as defined ++ * in the enum iommu_hw_capabilities. + * @__reserved: Must be 0 + * + * Query an iommu type specific hardware information data from an iommu behind +@@ -439,6 +536,81 @@ struct iommu_hw_info { + __aligned_u64 data_uptr; + __u32 out_data_type; + __u32 __reserved; ++ __aligned_u64 out_capabilities; + }; + #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) ++ ++/* ++ * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty ++ * tracking ++ * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking ++ */ ++enum iommufd_hwpt_set_dirty_tracking_flags { ++ IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, ++}; ++ ++/** ++ * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) ++ * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) ++ * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags ++ * @hwpt_id: HW pagetable ID that represents the IOMMU domain ++ * @__reserved: Must be 0 ++ * ++ * Toggle dirty tracking on an HW pagetable. ++ */ ++struct iommu_hwpt_set_dirty_tracking { ++ __u32 size; ++ __u32 flags; ++ __u32 hwpt_id; ++ __u32 __reserved; ++}; ++#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ ++ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) ++ ++/** ++ * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits ++ * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing ++ * any dirty bits metadata. This flag ++ * can be passed in the expectation ++ * where the next operation is an unmap ++ * of the same IOVA range. ++ * ++ */ ++enum iommufd_hwpt_get_dirty_bitmap_flags { ++ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, ++}; ++ ++/** ++ * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) ++ * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) ++ * @hwpt_id: HW pagetable ID that represents the IOMMU domain ++ * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags ++ * @__reserved: Must be 0 ++ * @iova: base IOVA of the bitmap first bit ++ * @length: IOVA range size ++ * @page_size: page size granularity of each bit in the bitmap ++ * @data: bitmap where to set the dirty bits. The bitmap bits each ++ * represent a page_size which you deviate from an arbitrary iova. ++ * ++ * Checking a given IOVA is dirty: ++ * ++ * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) ++ * ++ * Walk the IOMMU pagetables for a given IOVA range to return a bitmap ++ * with the dirty IOVAs. In doing so it will also by default clear any ++ * dirty bit metadata set in the IOPTE. ++ */ ++struct iommu_hwpt_get_dirty_bitmap { ++ __u32 size; ++ __u32 hwpt_id; ++ __u32 flags; ++ __u32 __reserved; ++ __aligned_u64 iova; ++ __aligned_u64 length; ++ __aligned_u64 page_size; ++ __aligned_u64 data; ++}; ++#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ ++ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) ++ + #endif +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 0d74ee999a..549fea3a97 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -264,6 +264,7 @@ struct kvm_xen_exit { + #define KVM_EXIT_RISCV_SBI 35 + #define KVM_EXIT_RISCV_CSR 36 + #define KVM_EXIT_NOTIFY 37 ++#define KVM_EXIT_LOONGARCH_IOCSR 38 + + /* For KVM_EXIT_INTERNAL_ERROR */ + /* Emulate instruction failed. */ +@@ -336,6 +337,13 @@ struct kvm_run { + __u32 len; + __u8 is_write; + } mmio; ++ /* KVM_EXIT_LOONGARCH_IOCSR */ ++ struct { ++ __u64 phys_addr; ++ __u8 data[8]; ++ __u32 len; ++ __u8 is_write; ++ } iocsr_io; + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 nr; +@@ -1188,6 +1196,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_COUNTER_OFFSET 227 + #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 ++#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1358,6 +1367,7 @@ struct kvm_dirty_tlb { + #define KVM_REG_ARM64 0x6000000000000000ULL + #define KVM_REG_MIPS 0x7000000000000000ULL + #define KVM_REG_RISCV 0x8000000000000000ULL ++#define KVM_REG_LOONGARCH 0x9000000000000000ULL + + #define KVM_REG_SIZE_SHIFT 52 + #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL +@@ -1558,6 +1568,7 @@ struct kvm_s390_ucas_mapping { + #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) + /* Available with KVM_CAP_COUNTER_OFFSET */ + #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) ++#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range) + + /* ioctl for vm fd */ + #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) +diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h +index 12ccb70099..bcb21339ee 100644 +--- a/linux-headers/linux/psp-sev.h ++++ b/linux-headers/linux/psp-sev.h +@@ -68,6 +68,7 @@ typedef enum { + SEV_RET_INVALID_PARAM, + SEV_RET_RESOURCE_LIMIT, + SEV_RET_SECURE_DATA_INVALID, ++ SEV_RET_INVALID_KEY = 0x27, + SEV_RET_MAX, + } sev_ret_code; + +diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h +index 9bb07083ac..bf9749dd14 100644 +--- a/linux-headers/linux/stddef.h ++++ b/linux-headers/linux/stddef.h +@@ -27,8 +27,13 @@ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ +- } ++ } ATTRS + ++#ifdef __cplusplus ++/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ ++#define __DECLARE_FLEX_ARRAY(T, member) \ ++ T member[0] ++#else + /** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * +@@ -49,3 +54,5 @@ + #ifndef __counted_by + #define __counted_by(m) + #endif ++ ++#endif /* _LINUX_STDDEF_H */ +diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h +index 59978fbaae..953c75feda 100644 +--- a/linux-headers/linux/userfaultfd.h ++++ b/linux-headers/linux/userfaultfd.h +@@ -40,7 +40,8 @@ + UFFD_FEATURE_EXACT_ADDRESS | \ + UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ + UFFD_FEATURE_WP_UNPOPULATED | \ +- UFFD_FEATURE_POISON) ++ UFFD_FEATURE_POISON | \ ++ UFFD_FEATURE_WP_ASYNC) + #define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ +@@ -216,6 +217,11 @@ struct uffdio_api { + * (i.e. empty ptes). This will be the default behavior for shmem + * & hugetlbfs, so this flag only affects anonymous memory behavior + * when userfault write-protection mode is registered. ++ * ++ * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection ++ * asynchronous mode is supported in which the write fault is ++ * automatically resolved and write-protection is un-set. ++ * It implies UFFD_FEATURE_WP_UNPOPULATED. + */ + #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) + #define UFFD_FEATURE_EVENT_FORK (1<<1) +@@ -232,6 +238,7 @@ struct uffdio_api { + #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) + #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) + #define UFFD_FEATURE_POISON (1<<14) ++#define UFFD_FEATURE_WP_ASYNC (1<<15) + __u64 features; + + __u64 ioctls; +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index acf72b4999..8e175ece31 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -277,8 +277,8 @@ struct vfio_region_info { + #define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */ + __u32 index; /* Region index */ + __u32 cap_offset; /* Offset within info struct of first cap */ +- __u64 size; /* Region size (bytes) */ +- __u64 offset; /* Region offset from start of device fd */ ++ __aligned_u64 size; /* Region size (bytes) */ ++ __aligned_u64 offset; /* Region offset from start of device fd */ + }; + #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) + +@@ -294,8 +294,8 @@ struct vfio_region_info { + #define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1 + + struct vfio_region_sparse_mmap_area { +- __u64 offset; /* Offset of mmap'able area within region */ +- __u64 size; /* Size of mmap'able area */ ++ __aligned_u64 offset; /* Offset of mmap'able area within region */ ++ __aligned_u64 size; /* Size of mmap'able area */ + }; + + struct vfio_region_info_cap_sparse_mmap { +@@ -450,9 +450,9 @@ struct vfio_device_migration_info { + VFIO_DEVICE_STATE_V1_RESUMING) + + __u32 reserved; +- __u64 pending_bytes; +- __u64 data_offset; +- __u64 data_size; ++ __aligned_u64 pending_bytes; ++ __aligned_u64 data_offset; ++ __aligned_u64 data_size; + }; + + /* +@@ -476,7 +476,7 @@ struct vfio_device_migration_info { + + struct vfio_region_info_cap_nvlink2_ssatgt { + struct vfio_info_cap_header header; +- __u64 tgt; ++ __aligned_u64 tgt; + }; + + /* +@@ -816,7 +816,7 @@ struct vfio_device_gfx_plane_info { + __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ + /* out */ + __u32 drm_format; /* drm format of plane */ +- __u64 drm_format_mod; /* tiled mode */ ++ __aligned_u64 drm_format_mod; /* tiled mode */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ +@@ -829,6 +829,7 @@ struct vfio_device_gfx_plane_info { + __u32 region_index; /* region index */ + __u32 dmabuf_id; /* dma-buf id */ + }; ++ __u32 reserved; + }; + + #define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) +@@ -863,9 +864,10 @@ struct vfio_device_ioeventfd { + #define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */ + #define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */ + #define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf) +- __u64 offset; /* device fd offset of write */ +- __u64 data; /* data to be written */ ++ __aligned_u64 offset; /* device fd offset of write */ ++ __aligned_u64 data; /* data to be written */ + __s32 fd; /* -1 for de-assignment */ ++ __u32 reserved; + }; + + #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) +@@ -1434,6 +1436,27 @@ struct vfio_device_feature_mig_data_size { + + #define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9 + ++/** ++ * Upon VFIO_DEVICE_FEATURE_SET, set or clear the BUS mastering for the device ++ * based on the operation specified in op flag. ++ * ++ * The functionality is incorporated for devices that needs bus master control, ++ * but the in-band device interface lacks the support. Consequently, it is not ++ * applicable to PCI devices, as bus master control for PCI devices is managed ++ * in-band through the configuration space. At present, this feature is supported ++ * only for CDX devices. ++ * When the device's BUS MASTER setting is configured as CLEAR, it will result in ++ * blocking all incoming DMA requests from the device. On the other hand, configuring ++ * the device's BUS MASTER setting as SET (enable) will grant the device the ++ * capability to perform DMA to the host memory. ++ */ ++struct vfio_device_feature_bus_master { ++ __u32 op; ++#define VFIO_DEVICE_FEATURE_CLEAR_MASTER 0 /* Clear Bus Master */ ++#define VFIO_DEVICE_FEATURE_SET_MASTER 1 /* Set Bus Master */ ++}; ++#define VFIO_DEVICE_FEATURE_BUS_MASTER 10 ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +@@ -1449,7 +1472,7 @@ struct vfio_iommu_type1_info { + __u32 flags; + #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ + #define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ +- __u64 iova_pgsizes; /* Bitmap of supported page sizes */ ++ __aligned_u64 iova_pgsizes; /* Bitmap of supported page sizes */ + __u32 cap_offset; /* Offset within info struct of first cap */ + __u32 pad; + }; +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index f5c48b61ab..649560c685 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -219,4 +219,12 @@ + */ + #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) + ++/* Get the group for the descriptor table including driver & device areas ++ * of a virtqueue: read index, write group in num. ++ * The virtqueue index is stored in the index field of vhost_vring_state. ++ * The group ID of the descriptor table for this specific virtqueue ++ * is returned via num field of vhost_vring_state. ++ */ ++#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ ++ struct vhost_vring_state) + #endif +-- +2.27.0 + diff --git a/linux-headers-loongarch-Add-kvm_para.h-and-unistd_64.patch b/linux-headers-loongarch-Add-kvm_para.h-and-unistd_64.patch new file mode 100644 index 0000000000000000000000000000000000000000..88d04a7dbffcf482e3530f22a301c4ee833434da --- /dev/null +++ b/linux-headers-loongarch-Add-kvm_para.h-and-unistd_64.patch @@ -0,0 +1,40 @@ +From 734b877ee97c73c7cbeeb02c560b9b4e6a8c0dda Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Thu, 17 Oct 2024 10:07:07 +0800 +Subject: [PATCH 60/78] linux-headers: loongarch: Add kvm_para.h and + unistd_64.h + +KVM LBT supports on LoongArch depends on the linux-header file +kvm_para.h, also unistd_64.h is required by unistd.h on LoongArch +since 6.11, otherwise there will be compiling error such as: + +linux-headers/asm/unistd.h:3:10: fatal error: asm/unistd_64.h: No such file or directory + #include + +Signed-off-by: Bibo Mao +Acked-by: Song Gao +Message-Id: <20241017020708.1728620-2-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + scripts/update-linux-headers.sh | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 34295c0fe5..88c76b8f69 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -156,6 +156,10 @@ for arch in $ARCHLIST; do + cp_portable "$tmpdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" + fi ++ if [ $arch = loongarch ]; then ++ cp "$hdrdir/include/asm/kvm_para.h" "$output/linux-headers/asm-loongarch/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-loongarch/" ++ fi + done + + rm -rf "$output/linux-headers/linux" +-- +2.39.1 + diff --git a/linux-headers-update-against-5.10-and-manual-clear-v.patch b/linux-headers-update-against-5.10-and-manual-clear-v.patch index 0315fc2c1a30be23b4643c30d783e5259ef11931..911475272126f45878b57fb991ca7551761dc565 100644 --- a/linux-headers-update-against-5.10-and-manual-clear-v.patch +++ b/linux-headers-update-against-5.10-and-manual-clear-v.patch @@ -1,4 +1,4 @@ -From 79efeccd41d761b68946df68e5431eff399ccbd5 Mon Sep 17 00:00:00 2001 +From 2ccd1ec0d18070727ad9b9647da6b6937f16de2a Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 8 May 2021 17:31:03 +0800 Subject: [PATCH] linux-headers: update against 5.10 and manual clear vfio @@ -12,16 +12,16 @@ the kernel, update the header to add them. Signed-off-by: Zenghui Yu Signed-off-by: Kunkun Jiang --- - linux-headers/linux/vfio.h | 37 ++++++++++++++++++++++++++++++++++++- - 1 file changed, 36 insertions(+), 1 deletion(-) + linux-headers/linux/vfio.h | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index a90672494d..120387ba58 100644 +index 8e175ece31..956154e509 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h -@@ -46,6 +46,16 @@ +@@ -56,6 +56,16 @@ */ - #define VFIO_NOIOMMU_IOMMU 8 + #define VFIO_UPDATE_VADDR 10 +/* + * The vfio_iommu driver may support user clears dirty log manually, which means @@ -36,15 +36,7 @@ index a90672494d..120387ba58 100644 /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between -@@ -1074,6 +1084,7 @@ struct vfio_bitmap { - * field. No guarantee is made to the user that arbitrary unmaps of iova - * or size different from those used in the original mapping call will - * succeed. -+ * - * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap - * before unmapping IO virtual addresses. When this flag is set, the user must - * provide a struct vfio_bitmap in data[]. User must provide zero-allocated -@@ -1133,8 +1144,30 @@ struct vfio_iommu_type1_dma_unmap { +@@ -1651,8 +1661,30 @@ struct vfio_iommu_type1_dma_unmap { * actual bitmap. If dirty pages logging is not enabled, an error will be * returned. * @@ -76,7 +68,7 @@ index a90672494d..120387ba58 100644 */ struct vfio_iommu_type1_dirty_bitmap { __u32 argsz; -@@ -1142,6 +1175,8 @@ struct vfio_iommu_type1_dirty_bitmap { +@@ -1660,6 +1692,8 @@ struct vfio_iommu_type1_dirty_bitmap { #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) diff --git a/linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch b/linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch deleted file mode 100644 index 731d06a74024c81bcc1ececeb79da2b873c2546f..0000000000000000000000000000000000000000 --- a/linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 27a9f40b308efd8ddcb81e286441865b5a0cb541 Mon Sep 17 00:00:00 2001 -From: Zenghui Yu -Date: Tue, 14 Apr 2020 21:52:42 +0800 -Subject: [PATCH] linux headers: update against "KVM/ARM: Fix >256 vcpus" - -This is part of upstream commit f363d039e883 ("linux headers: update -against v5.4-rc1"), authored by Eric Auger . - -Signed-off-by: Zenghui Yu ---- - linux-headers/asm-arm/kvm.h | 4 +++- - linux-headers/asm-arm64/kvm.h | 4 +++- - linux-headers/linux/kvm.h | 1 + - 3 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h -index e1f8b745..137a2730 100644 ---- a/linux-headers/asm-arm/kvm.h -+++ b/linux-headers/asm-arm/kvm.h -@@ -254,8 +254,10 @@ struct kvm_vcpu_events { - #define KVM_DEV_ARM_ITS_CTRL_RESET 4 - - /* KVM_IRQ_LINE irq field index values */ -+#define KVM_ARM_IRQ_VCPU2_SHIFT 28 -+#define KVM_ARM_IRQ_VCPU2_MASK 0xf - #define KVM_ARM_IRQ_TYPE_SHIFT 24 --#define KVM_ARM_IRQ_TYPE_MASK 0xff -+#define KVM_ARM_IRQ_TYPE_MASK 0xf - #define KVM_ARM_IRQ_VCPU_SHIFT 16 - #define KVM_ARM_IRQ_VCPU_MASK 0xff - #define KVM_ARM_IRQ_NUM_SHIFT 0 -diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h -index 2431ec35..cdfd5f33 100644 ---- a/linux-headers/asm-arm64/kvm.h -+++ b/linux-headers/asm-arm64/kvm.h -@@ -308,8 +308,10 @@ struct kvm_vcpu_events { - #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 - - /* KVM_IRQ_LINE irq field index values */ -+#define KVM_ARM_IRQ_VCPU2_SHIFT 28 -+#define KVM_ARM_IRQ_VCPU2_MASK 0xf - #define KVM_ARM_IRQ_TYPE_SHIFT 24 --#define KVM_ARM_IRQ_TYPE_MASK 0xff -+#define KVM_ARM_IRQ_TYPE_MASK 0xf - #define KVM_ARM_IRQ_VCPU_SHIFT 16 - #define KVM_ARM_IRQ_VCPU_MASK 0xff - #define KVM_ARM_IRQ_NUM_SHIFT 0 -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index c8423e76..744e888e 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_ARM_VM_IPA_SIZE 165 - #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ - #define KVM_CAP_HYPERV_CPUID 167 -+#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 - #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 - #define KVM_CAP_PPC_IRQ_XIVE 169 - #define KVM_CAP_ARM_SVE 170 --- -2.23.0 diff --git a/linux-headers-update-kernel-headers-to-include-CSV3-.patch b/linux-headers-update-kernel-headers-to-include-CSV3-.patch new file mode 100644 index 0000000000000000000000000000000000000000..24af3210ec0e9c202b00518ff8b43998f75630f1 --- /dev/null +++ b/linux-headers-update-kernel-headers-to-include-CSV3-.patch @@ -0,0 +1,79 @@ +From 454079664e1492eeb9b90d1d05598e84dc436f11 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Fri, 17 Jun 2022 09:25:19 +0800 +Subject: [PATCH] linux-headers: update kernel headers to include CSV3 + migration cmds + +Four new migration commands are added to support CSV3 migration. + +KVM_CSV3_SEND_ENCRYPT_DATA/KVM_CSV3_RECEIVE_ENCRYPT_DATA cmds are +used to migrate guest's pages. + +KVM_CSV3_SEND_ENCRYPT_CONTEXT/KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT cmds +are used to migration guest's runtime context. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 8487d0889b..8543db844e 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2115,6 +2115,12 @@ enum csv3_cmd_id { + KVM_CSV3_INIT = KVM_CSV3_NR_MIN, + KVM_CSV3_LAUNCH_ENCRYPT_DATA, + KVM_CSV3_LAUNCH_ENCRYPT_VMCB, ++ KVM_CSV3_SEND_ENCRYPT_DATA, ++ KVM_CSV3_SEND_ENCRYPT_CONTEXT, ++ KVM_CSV3_RECEIVE_ENCRYPT_DATA, ++ KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT, ++ ++ KVM_CSV3_NR_MAX, + }; + + struct kvm_csv3_launch_encrypt_data { +@@ -2127,6 +2133,38 @@ struct kvm_csv3_init_data { + __u64 nodemask; + }; + ++struct kvm_csv3_send_encrypt_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 guest_addr_data; ++ __u32 guest_addr_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ ++struct kvm_csv3_send_encrypt_context { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ ++struct kvm_csv3_receive_encrypt_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 guest_addr_data; ++ __u32 guest_addr_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ ++struct kvm_csv3_receive_encrypt_context { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +-- +2.41.0.windows.1 + diff --git a/linux-user-Clean-up-unused-header.patch b/linux-user-Clean-up-unused-header.patch new file mode 100644 index 0000000000000000000000000000000000000000..4546921231c45711f0a6f90a4a64476e377363ab --- /dev/null +++ b/linux-user-Clean-up-unused-header.patch @@ -0,0 +1,35 @@ +From 34af051406f75bdef6f2ef598cde51e756ea8489 Mon Sep 17 00:00:00 2001 +From: Susanooo +Date: Fri, 25 Oct 2024 09:26:25 +0800 +Subject: [PATCH] linux-user: Clean up unused header +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Clean up unused (already commented-out) header from syscall.c. + +Signed-off-by: Gustavo Romero +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +Signed-off-by: zhangchujun +--- + linux-user/syscall.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index e384e14248..513996e6fa 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -53,7 +53,6 @@ + #include + #include + #include +-//#include + #include + #include + #include +-- +2.41.0.windows.1 + diff --git a/linux-user-Honor-elf-alignment-when-placing-images.patch b/linux-user-Honor-elf-alignment-when-placing-images.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7b8a49fd261bd8cba9411a3537a33dcfa709a45 --- /dev/null +++ b/linux-user-Honor-elf-alignment-when-placing-images.patch @@ -0,0 +1,105 @@ +From ad5b05def5521a9cbbdd750c915fccaba391f53b Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Tue, 12 Nov 2024 11:32:01 -0800 +Subject: [PATCH] linux-user: Honor elf alignment when placing images + +Most binaries don't actually depend on more than page alignment, +but any binary can request it. Not honoring this was a bug. + +This became obvious when gdb reported + + Failed to read a valid object file image from memory + +when examining some vdso which are marked as needing more +than page alignment. + +Signed-off-by: Richard Henderson +Signed-off-by: Zhongrui Tang +--- + linux-user/elfload.c | 35 ++++++++++++++++++++++++++++------- + 1 file changed, 28 insertions(+), 7 deletions(-) + +diff --git a/linux-user/elfload.c b/linux-user/elfload.c +index cf9e74468b..2a82468079 100644 +--- a/linux-user/elfload.c ++++ b/linux-user/elfload.c +@@ -3263,7 +3263,8 @@ static void load_elf_image(const char *image_name, const ImageSource *src, + char **pinterp_name) + { + g_autofree struct elf_phdr *phdr = NULL; +- abi_ulong load_addr, load_bias, loaddr, hiaddr, error; ++ abi_ulong load_addr, load_bias, loaddr, hiaddr, error, align; ++ size_t reserve_size, align_size; + int i, prot_exec; + Error *err = NULL; + +@@ -3347,6 +3348,9 @@ static void load_elf_image(const char *image_name, const ImageSource *src, + + load_addr = loaddr; + ++ align = pow2ceil(info->alignment); ++ info->alignment = align; ++ + if (pinterp_name != NULL) { + if (ehdr->e_type == ET_EXEC) { + /* +@@ -3355,8 +3359,6 @@ static void load_elf_image(const char *image_name, const ImageSource *src, + */ + probe_guest_base(image_name, loaddr, hiaddr); + } else { +- abi_ulong align; +- + /* + * The binary is dynamic, but we still need to + * select guest_base. In this case we pass a size. +@@ -3374,10 +3376,7 @@ static void load_elf_image(const char *image_name, const ImageSource *src, + * Since we do not have complete control over the guest + * address space, we prefer the kernel to choose some address + * rather than force the use of LOAD_ADDR via MAP_FIXED. +- * But without MAP_FIXED we cannot guarantee alignment, +- * only suggest it. + */ +- align = pow2ceil(info->alignment); + if (align) { + load_addr &= -align; + } +@@ -3401,13 +3400,35 @@ static void load_elf_image(const char *image_name, const ImageSource *src, + * In both cases, we will overwrite pages in this range with mappings + * from the executable. + */ +- load_addr = target_mmap(load_addr, (size_t)hiaddr - loaddr + 1, PROT_NONE, ++ reserve_size = (size_t)hiaddr - loaddr + 1; ++ align_size = reserve_size; ++ ++ if (ehdr->e_type != ET_EXEC && align > qemu_real_host_page_size()) { ++ align_size += align - 1; ++ } ++ ++ load_addr = target_mmap(load_addr, align_size, PROT_NONE, + MAP_PRIVATE | MAP_ANON | MAP_NORESERVE | + (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0), + -1, 0); + if (load_addr == -1) { + goto exit_mmap; + } ++ ++ if (align_size != reserve_size) { ++ abi_ulong align_addr = ROUND_UP(load_addr, align); ++ abi_ulong align_end = align_addr + reserve_size; ++ abi_ulong load_end = load_addr + align_size; ++ ++ if (align_addr != load_addr) { ++ target_munmap(load_addr, align_addr - load_addr); ++ } ++ if (align_end != load_end) { ++ target_munmap(align_end, load_end - align_end); ++ } ++ load_addr = align_addr; ++ } ++ + load_bias = load_addr - loaddr; + + if (elf_is_fdpic(ehdr)) { +-- +2.41.0.windows.1 + diff --git a/linux-user-Print-tid-not-pid-with-strace.patch b/linux-user-Print-tid-not-pid-with-strace.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c60527a26dec867a8659bb0dd69e6b09272579b --- /dev/null +++ b/linux-user-Print-tid-not-pid-with-strace.patch @@ -0,0 +1,36 @@ +From 2f37362de1d971cc90c35405705bfa22a33f6cd8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= +Date: Wed, 20 Nov 2024 14:20:24 -0600 +Subject: [PATCH] linux-user: Print tid not pid with strace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This aligns with strace, and is very useful when tracing multi-threaded +programs. The result is the same in single-threaded programs. + +Signed-off-by: J. Neuschäfer +Message-Id: 20241024-strace-v1-1-56c4161431cd@gmx.net +[rth: Use TaskState.ts_tid via get_task_state()] +Signed-off-by: Richard Henderson +Signed-off-by: Zhongrui Tang +--- + linux-user/strace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/linux-user/strace.c b/linux-user/strace.c +index cf26e55264..ac9177ebe4 100644 +--- a/linux-user/strace.c ++++ b/linux-user/strace.c +@@ -4176,7 +4176,7 @@ print_syscall(CPUArchState *cpu_env, int num, + if (!f) { + return; + } +- fprintf(f, "%d ", getpid()); ++ fprintf(f, "%d ", get_task_state(env_cpu(cpu_env))->ts_tid); + + for (i = 0; i < nsyscalls; i++) { + if (scnames[i].nr == num) { +-- +2.41.0.windows.1 + diff --git a/linux-user-Tolerate-CONFIG_LSM_MMAP_MIN_ADDR.patch b/linux-user-Tolerate-CONFIG_LSM_MMAP_MIN_ADDR.patch new file mode 100644 index 0000000000000000000000000000000000000000..74d8b6898205e356518b3c0b5d5711c45ebd3b9a --- /dev/null +++ b/linux-user-Tolerate-CONFIG_LSM_MMAP_MIN_ADDR.patch @@ -0,0 +1,52 @@ +From 6d4db685ae8b4cbffab80c61c01ef56c57b67eb4 Mon Sep 17 00:00:00 2001 +From: guping +Date: Mon, 18 Nov 2024 03:09:59 +0000 +Subject: [PATCH] linux-user: Tolerate CONFIG_LSM_MMAP_MIN_ADDR cherry-pick + from fb7f3572b111ffb6c2dd2c7f6c5b4dc57dd8a3f5 + +Running qemu-i386 on a system running with SELinux in enforcing mode +(more precisely: s390x trixie container on Fedora 40) fails with: + + qemu-i386: tests/tcg/i386-linux-user/sigreturn-sigmask: Unable to find a guest_base to satisfy all guest address mapping requirements + 00000000-ffffffff + +The reason is that main() determines mmap_min_addr from +/proc/sys/vm/mmap_min_addr, but SELinux additionally defines +CONFIG_LSM_MMAP_MIN_ADDR, which is normally larger: 32K or 64K, but, +in general, can be anything. There is no portable way to query its +value: /boot/config, /proc/config and /proc/config.gz are distro- and +environment-specific. + +Once the identity map fails, the magnitude of guest_base does not +matter, so fix by starting the search from 1M or 1G. + +Cc: qemu-stable@nongnu.org +Resolves: #2598 + + +Suggested-by: default avatarRichard Henderson +Signed-off-by: default avatarIlya Leoshkevich +Message-ID: <20241023002558.34589-1-iii@linux.ibm.com> +Signed-off-by: default avatarRichard Henderson + +Signed-off-by: guping +--- + linux-user/elfload.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/linux-user/elfload.c b/linux-user/elfload.c +index cf9e74468b..0df64c6442 100644 +--- a/linux-user/elfload.c ++++ b/linux-user/elfload.c +@@ -2980,7 +2980,7 @@ static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base, + static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root, + uintptr_t align, uintptr_t brk) + { +- uintptr_t last = mmap_min_addr; ++ uintptr_t last = sizeof(uintptr_t) == 4 ? MiB : GiB; + uintptr_t base, skip; + + while (true) { +-- +2.41.0.windows.1 + diff --git a/linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch b/linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch deleted file mode 100644 index 2d0c6abf3d233a0694cec23a2097011c39d4fd1f..0000000000000000000000000000000000000000 --- a/linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 7b4aded3f772ef43e2b600594f755eadd5da5958 Mon Sep 17 00:00:00 2001 -From: Jonathan Marler -Date: Sat, 2 May 2020 10:12:25 -0600 -Subject: [PATCH 3/5] linux-user/mmap.c: fix integer underflow in target_mremap - -Fixes: https://bugs.launchpad.net/bugs/1876373 - -This code path in mmap occurs when a page size is decreased with mremap. When a section of pages is shrunk, qemu calls mmap_reserve on the pages that were released. However, it has the diff operation reversed, subtracting the larger old_size from the smaller new_size. Instead, it should be subtracting the smaller new_size from the larger old_size. You can also see in the previous line of the change that this mmap_reserve call only occurs when old_size > new_size. - -Bug: https://bugs.launchpad.net/qemu/+bug/1876373 -Signed-off-by: Jonathan Marler -Reviewded-by: Laurent Vivier -Message-Id: <20200502161225.14346-1-johnnymarler@gmail.com> -Signed-off-by: Laurent Vivier ---- - linux-user/mmap.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/linux-user/mmap.c b/linux-user/mmap.c -index 46a6e3a7..2a9ca0c3 100644 ---- a/linux-user/mmap.c -+++ b/linux-user/mmap.c -@@ -740,7 +740,7 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, - if (prot == 0) { - host_addr = mremap(g2h(old_addr), old_size, new_size, flags); - if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) { -- mmap_reserve(old_addr + old_size, new_size - old_size); -+ mmap_reserve(old_addr + old_size, old_size - new_size); - } - } else { - errno = ENOMEM; --- -2.23.0 - diff --git a/lm32-do-not-leak-memory-on-object_new-object_unref.patch b/lm32-do-not-leak-memory-on-object_new-object_unref.patch deleted file mode 100644 index 7ccc53684bb3d3224757209a4c1710883214fcc8..0000000000000000000000000000000000000000 --- a/lm32-do-not-leak-memory-on-object_new-object_unref.patch +++ /dev/null @@ -1,77 +0,0 @@ -From d50be5295c49be1b6024f5902948b52e683b4c23 Mon Sep 17 00:00:00 2001 -From: lizhengui -Date: Wed, 9 Sep 2020 14:18:35 +0800 -Subject: [PATCH] lm32: do not leak memory on object_new/object_unref -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Bottom halves and ptimers are malloced, but nothing in these -files is freeing memory allocated by instance_init. Since -these are sysctl devices that are never unrealized, just moving -the allocations to realize is enough to avoid the leak in -practice (and also to avoid upsetting asan when running -device-introspect-test). - -Signed-off-by: Paolo Bonzini -Reviewed-by: Philippe Mathieu-Daudé ---- - hw/timer/lm32_timer.c | 6 +++--- - hw/timer/milkymist-sysctl.c | 10 +++++----- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/hw/timer/lm32_timer.c b/hw/timer/lm32_timer.c -index 6ce876c6..13f15825 100644 ---- a/hw/timer/lm32_timer.c -+++ b/hw/timer/lm32_timer.c -@@ -184,9 +184,6 @@ static void lm32_timer_init(Object *obj) - - sysbus_init_irq(dev, &s->irq); - -- s->bh = qemu_bh_new(timer_hit, s); -- s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT); -- - memory_region_init_io(&s->iomem, obj, &timer_ops, s, - "timer", R_MAX * 4); - sysbus_init_mmio(dev, &s->iomem); -@@ -196,6 +193,9 @@ static void lm32_timer_realize(DeviceState *dev, Error **errp) - { - LM32TimerState *s = LM32_TIMER(dev); - -+ s->bh = qemu_bh_new(timer_hit, s); -+ s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT); -+ - ptimer_set_freq(s->ptimer, s->freq_hz); - } - -diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c -index a9d25087..2f1ecc6d 100644 ---- a/hw/timer/milkymist-sysctl.c -+++ b/hw/timer/milkymist-sysctl.c -@@ -280,11 +280,6 @@ static void milkymist_sysctl_init(Object *obj) - sysbus_init_irq(dev, &s->timer0_irq); - sysbus_init_irq(dev, &s->timer1_irq); - -- s->bh0 = qemu_bh_new(timer0_hit, s); -- s->bh1 = qemu_bh_new(timer1_hit, s); -- s->ptimer0 = ptimer_init(s->bh0, PTIMER_POLICY_DEFAULT); -- s->ptimer1 = ptimer_init(s->bh1, PTIMER_POLICY_DEFAULT); -- - memory_region_init_io(&s->regs_region, obj, &sysctl_mmio_ops, s, - "milkymist-sysctl", R_MAX * 4); - sysbus_init_mmio(dev, &s->regs_region); -@@ -294,6 +289,11 @@ static void milkymist_sysctl_realize(DeviceState *dev, Error **errp) - { - MilkymistSysctlState *s = MILKYMIST_SYSCTL(dev); - -+ s->bh0 = qemu_bh_new(timer0_hit, s); -+ s->bh1 = qemu_bh_new(timer1_hit, s); -+ s->ptimer0 = ptimer_init(s->bh0, PTIMER_POLICY_DEFAULT); -+ s->ptimer1 = ptimer_init(s->bh1, PTIMER_POLICY_DEFAULT); -+ - ptimer_set_freq(s->ptimer0, s->freq_hz); - ptimer_set_freq(s->ptimer1, s->freq_hz); - } --- -2.19.1 - diff --git a/load_elf-fix-iterator-s-type-for-elf-file-processing.patch b/load_elf-fix-iterator-s-type-for-elf-file-processing.patch new file mode 100644 index 0000000000000000000000000000000000000000..a78b0f1756e1de911b32e35beda4fb450ece46e2 --- /dev/null +++ b/load_elf-fix-iterator-s-type-for-elf-file-processing.patch @@ -0,0 +1,43 @@ +From 2651409cf43002dc497483ae3ae227d4c602ca45 Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 17:02:38 +0800 +Subject: [PATCH] load_elf: fix iterator's type for elf file processing + +cherry picked from commit 410c2a4d75f52f6a2fe978eda5a9b6f854afe5ea + +j is used while loading an ELF file to byteswap segments' +data. If data is larger than 2GB an overflow may happen. +So j should be elf_word. + +This commit fixes a minor bug: it's unlikely anybody is trying to +load ELF files with 2GB+ segments for wrong-endianness targets, +but if they did, it wouldn't work correctly. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Cc: qemu-stable@nongnu.org +Fixes: 7ef295e ("loader: Add data swap option to load-elf") +Signed-off-by: Anastasia Belova +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +Signed-off-by: Gao Jiazhen +--- + include/hw/elf_ops.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h +index 0a5c258fe6..9c35d1b9da 100644 +--- a/include/hw/elf_ops.h ++++ b/include/hw/elf_ops.h +@@ -500,7 +500,7 @@ static ssize_t glue(load_elf, SZ)(const char *name, int fd, + } + + if (data_swab) { +- int j; ++ elf_word j; + for (j = 0; j < file_size; j += (1 << data_swab)) { + uint8_t *dp = data + j; + switch (data_swab) { +-- +2.41.0.windows.1 + diff --git a/log-Add-log-at-boot-cpu-init-for-aarch64.patch b/log-Add-log-at-boot-cpu-init-for-aarch64.patch new file mode 100644 index 0000000000000000000000000000000000000000..73ddaae920abb4e64793dfd332ba4ba34c3a9b81 --- /dev/null +++ b/log-Add-log-at-boot-cpu-init-for-aarch64.patch @@ -0,0 +1,68 @@ +From 16c4b8946903985e3dfd470d0e04b79d473505bc Mon Sep 17 00:00:00 2001 +From: "wanghaibin.wang" +Date: Sun, 17 Mar 2024 15:53:57 +0800 +Subject: [PATCH] log: Add log at boot & cpu init for aarch64 + +Add log at boot & cpu init for aarch64 + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + hw/arm/boot.c | 4 ++++ + hw/arm/virt.c | 3 +++ + 2 files changed, 7 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 84ea6a807a..d1671e1d42 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -11,6 +11,7 @@ + #include "qemu/datadir.h" + #include "qemu/error-report.h" + #include "qapi/error.h" ++#include "qemu/log.h" + #include + #include "hw/arm/boot.h" + #include "hw/arm/linux-boot-if.h" +@@ -1226,6 +1227,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) + * doesn't support secure. + */ + assert(!(info->secure_board_setup && kvm_enabled())); ++ ++ qemu_log("load the kernel\n"); ++ + info->kernel_filename = ms->kernel_filename; + info->kernel_cmdline = ms->kernel_cmdline; + info->initrd_filename = ms->initrd_filename; +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c19cacec8b..f4c3d47f30 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -32,6 +32,7 @@ + #include "qemu/datadir.h" + #include "qemu/units.h" + #include "qemu/option.h" ++#include "qemu/log.h" + #include "monitor/qdev.h" + #include "hw/sysbus.h" + #include "hw/arm/boot.h" +@@ -1020,6 +1021,7 @@ static void virt_powerdown_req(Notifier *n, void *opaque) + { + VirtMachineState *s = container_of(n, VirtMachineState, powerdown_notifier); + ++ qemu_log("send powerdown to vm.\n"); + if (s->acpi_dev) { + acpi_send_event(s->acpi_dev, ACPI_POWER_DOWN_STATUS); + } else { +@@ -2240,6 +2242,7 @@ static void machvirt_init(MachineState *machine) + } + + create_fdt(vms); ++ qemu_log("cpu init start\n"); + + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { +-- +2.27.0 + diff --git a/log-Add-some-logs-on-VM-runtime-path.patch b/log-Add-some-logs-on-VM-runtime-path.patch index 80eb8c39b4bcc4884c5a8fbfa43f28b808efb912..b72b9bd9a8ec1fa44eab197f4a5864c0b93e5e40 100644 --- a/log-Add-some-logs-on-VM-runtime-path.patch +++ b/log-Add-some-logs-on-VM-runtime-path.patch @@ -1,25 +1,26 @@ -From 0c83403e6e3ab21a01941be4ec57b02388eeb9c4 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Fri, 22 May 2020 18:56:09 +0800 +From 9d683f1ea8961d89cececf1fdc3345663744067f Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 8 Feb 2022 15:48:01 +0800 Subject: [PATCH] log: Add some logs on VM runtime path Add logs on VM runtime path, to make it easier to do trouble shooting. Signed-off-by: Ying Fang +Signed-off-by: Yan Wang +Signed-off-by: Adttil +--- + hw/virtio/virtio-pci.c | 2 ++ + hw/virtio/virtio.c | 14 ++++++++++++-- + monitor/monitor.c | 9 +++++++++ + qapi/qmp-dispatch.c | 15 +++++++++++++++ + system/qdev-monitor.c | 4 +++- + 5 files changed, 41 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index f6d2223..b4b0ed2 100644 +index e433879542..134a8eaef6 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c -@@ -32,6 +32,7 @@ - #include "qemu/range.h" - #include "hw/virtio/virtio-bus.h" - #include "qapi/visitor.h" -+#include "qemu/log.h" - - #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev)) - -@@ -1659,7 +1660,9 @@ static void virtio_pci_device_unplugged(DeviceState *d) +@@ -2082,7 +2082,9 @@ static void virtio_pci_device_unplugged(DeviceState *d) VirtIOPCIProxy *proxy = VIRTIO_PCI(d); bool modern = virtio_pci_modern(proxy); bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; @@ -30,10 +31,10 @@ index f6d2223..b4b0ed2 100644 if (modern) { diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 7c3822c..79c2dcf 100644 +index 3a160f86ed..a9aa0c4f66 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c -@@ -1172,7 +1172,14 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) +@@ -2048,7 +2048,14 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) k->set_status(vdev, val); } vdev->status = val; @@ -49,7 +50,7 @@ index 7c3822c..79c2dcf 100644 return 0; } -@@ -1614,8 +1621,11 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, +@@ -2326,8 +2333,11 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, break; } @@ -63,62 +64,60 @@ index 7c3822c..79c2dcf 100644 vdev->vq[i].vring.num = queue_size; vdev->vq[i].vring.num_default = queue_size; diff --git a/monitor/monitor.c b/monitor/monitor.c -index 3ef2817..6f726e8 100644 +index 01ede1babd..e540c1334a 100644 --- a/monitor/monitor.c +++ b/monitor/monitor.c -@@ -28,6 +28,7 @@ +@@ -29,6 +29,7 @@ #include "qapi/qapi-emit-events.h" + #include "qapi/qapi-visit-control.h" #include "qapi/qmp/qdict.h" - #include "qapi/qmp/qstring.h" +#include "qapi/qmp/qjson.h" #include "qemu/error-report.h" #include "qemu/option.h" #include "sysemu/qtest.h" -@@ -254,6 +255,7 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) +@@ -338,6 +339,7 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) { Monitor *mon; MonitorQMP *qmp_mon; -+ QString *json; ++ GString *json; trace_monitor_protocol_event_emit(event, qdict); QTAILQ_FOREACH(mon, &mon_list, entry) { -@@ -264,6 +266,13 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) +@@ -348,6 +350,13 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) qmp_mon = container_of(mon, MonitorQMP, common); if (qmp_mon->commands != &qmp_cap_negotiation_commands) { qmp_send_response(qmp_mon, qdict); + json = qobject_to_json(QOBJECT(qdict)); + if (json) { -+ if (!strstr(json->string, "RTC_CHANGE")) { -+ qemu_log("%s\n", qstring_get_str(json)); ++ if (!strstr(json->str, "RTC_CHANGE")) { ++ qemu_log("%s\n", json->str); + } -+ qobject_unref(json); ++ g_string_free(json, true); + } } } } diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c -index e2c366e..6dfdad5 100644 +index 555528b6bb..7a215cbfd7 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c -@@ -17,7 +17,9 @@ - #include "qapi/qmp/qdict.h" - #include "qapi/qmp/qjson.h" +@@ -24,6 +24,7 @@ #include "qapi/qmp/qbool.h" -+#include "qapi/qmp/qstring.h" - #include "sysemu/sysemu.h" + #include "qemu/coroutine.h" + #include "qemu/main-loop.h" +#include "qemu/log.h" - static QDict *qmp_dispatch_check_obj(const QObject *request, bool allow_oob, - Error **errp) -@@ -83,6 +85,7 @@ static QObject *do_qmp_dispatch(QmpCommandList *cmds, QObject *request, - const char *command; - QDict *args, *dict; - QmpCommand *cmd; -+ QString *json; + Visitor *qobject_input_visitor_new_qmp(QObject *obj) + { +@@ -146,6 +147,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + QObject *id; QObject *ret = NULL; + QDict *rsp = NULL; ++ GString *json; - dict = qmp_dispatch_check_obj(request, allow_oob, errp); -@@ -128,6 +131,19 @@ static QObject *do_qmp_dispatch(QmpCommandList *cmds, QObject *request, + dict = qobject_to(QDict, request); + if (!dict) { +@@ -203,6 +205,19 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ qobject_ref(args); } @@ -130,27 +129,27 @@ index e2c366e..6dfdad5 100644 + && (strcmp(command, "query-balloon") != 0) + && (strcmp(command, "set_password") != 0)) { + qemu_log("qmp_cmd_name: %s, arguments: %s\n", -+ command, qstring_get_str(json)); ++ command, json->str); + } -+ qobject_unref(json); ++ g_string_free(json, true); + } + - cmd->fn(args, &ret, &local_err); - if (local_err) { - error_propagate(errp, local_err); -diff --git a/qdev-monitor.c b/qdev-monitor.c -index 58222c2..c6c1d3f 100644 ---- a/qdev-monitor.c -+++ b/qdev-monitor.c -@@ -34,6 +34,7 @@ + assert(!(oob && qemu_in_coroutine())); + assert(monitor_cur() == NULL); + if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index a13db763e5..c885175b66 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -36,6 +36,7 @@ + #include "qemu/option.h" #include "qemu/qemu-print.h" + #include "qemu/option_int.h" ++#include "qemu/log.h" #include "sysemu/block-backend.h" #include "migration/misc.h" -+#include "qemu/log.h" - - /* - * Aliases were a bad idea from the start. Let's keep them -@@ -586,6 +587,7 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + #include "migration/migration.h" +@@ -643,6 +644,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, if (path != NULL) { bus = qbus_find(path, errp); if (!bus) { @@ -158,24 +157,15 @@ index 58222c2..c6c1d3f 100644 return NULL; } if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { -@@ -627,6 +629,8 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) - - /* set properties */ - if (qemu_opt_foreach(opts, set_property, dev, &err)) { -+ error_setg(errp, "the bus %s -driver %s set property failed", -+ bus ? bus->name : "None", driver); +@@ -715,7 +717,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (*errp) { goto err_del_dev; } - -@@ -636,6 +640,8 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) - dev->opts = NULL; +- ++ qemu_log("add qdev %s:%s success\n", driver, dev->id ? dev->id : "none"); + if (!qdev_realize(dev, bus, errp)) { goto err_del_dev; } -+ qemu_log("add qdev %s:%s success\n", driver, -+ qemu_opts_id(opts) ? qemu_opts_id(opts) : "none"); - return dev; - - err_del_dev: -- -1.8.3.1 +2.27.0 diff --git a/loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch b/loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch new file mode 100644 index 0000000000000000000000000000000000000000..09be6fe85fc6ebdabf86c767d224315bd996dc6d --- /dev/null +++ b/loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch @@ -0,0 +1,287 @@ +From 4a5a9bef6eff5837dcccd216172957d8470b6245 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 19 Feb 2024 18:34:14 +0800 +Subject: [PATCH] loongarch: Change the UEFI loading mode to loongarch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The UEFI loading mode in loongarch is very different +from that in other architectures:loongarch's UEFI code +is in rom, while other architectures' UEFI code is in flash. + +loongarch UEFI can be loaded as follows: +-machine virt,pflash=pflash0-format +-bios ./QEMU_EFI.fd + +Other architectures load UEFI using the following methods: +-machine virt,pflash0=pflash0-format,pflash1=pflash1-format + +loongarch's UEFI loading method makes qemu and libvirt incompatible +when using NVRAM, and the cost of loongarch's current loading method +far outweighs the benefits, so we decided to use the same UEFI loading +scheme as other architectures. + +Cc: Andrea Bolognani +Cc: maobibo@loongson.cn +Cc: Philippe Mathieu-Daudé +Cc: Song Gao +Cc: zhaotianrui@loongson.cn +Signed-off-by: Xianglai Li +Tested-by: Andrea Bolognani +Reviewed-by: Song Gao +Message-Id: <0bd892aa9b88e0f4cc904cb70efd0251fc1cde29.1708336919.git.lixianglai@loongson.cn> +Signed-off-by: Song Gao +--- + hw/loongarch/acpi-build.c | 29 +++++++++-- + hw/loongarch/virt.c | 101 ++++++++++++++++++++++++++---------- + include/hw/loongarch/virt.h | 10 ++-- + 3 files changed, 107 insertions(+), 33 deletions(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index ae292fc543..f990405d04 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -314,16 +314,39 @@ static void build_pci_device_aml(Aml *scope, LoongArchMachineState *lams) + static void build_flash_aml(Aml *scope, LoongArchMachineState *lams) + { + Aml *dev, *crs; ++ MemoryRegion *flash_mem; + +- hwaddr flash_base = VIRT_FLASH_BASE; +- hwaddr flash_size = VIRT_FLASH_SIZE; ++ hwaddr flash0_base; ++ hwaddr flash0_size; ++ ++ hwaddr flash1_base; ++ hwaddr flash1_size; ++ ++ flash_mem = pflash_cfi01_get_memory(lams->flash[0]); ++ flash0_base = flash_mem->addr; ++ flash0_size = memory_region_size(flash_mem); ++ ++ flash_mem = pflash_cfi01_get_memory(lams->flash[1]); ++ flash1_base = flash_mem->addr; ++ flash1_size = memory_region_size(flash_mem); + + dev = aml_device("FLS0"); + aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0015"))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); + + crs = aml_resource_template(); +- aml_append(crs, aml_memory32_fixed(flash_base, flash_size, AML_READ_WRITE)); ++ aml_append(crs, aml_memory32_fixed(flash0_base, flash0_size, ++ AML_READ_WRITE)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ aml_append(scope, dev); ++ ++ dev = aml_device("FLS1"); ++ aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0015"))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(1))); ++ ++ crs = aml_resource_template(); ++ aml_append(crs, aml_memory32_fixed(flash1_base, flash1_size, ++ AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index c9a680e61a..6ef40fa24a 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -54,7 +54,9 @@ struct loaderparams { + const char *initrd_filename; + }; + +-static void virt_flash_create(LoongArchMachineState *lams) ++static PFlashCFI01 *virt_flash_create1(LoongArchMachineState *lams, ++ const char *name, ++ const char *alias_prop_name) + { + DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01); + +@@ -66,45 +68,78 @@ static void virt_flash_create(LoongArchMachineState *lams) + qdev_prop_set_uint16(dev, "id1", 0x18); + qdev_prop_set_uint16(dev, "id2", 0x00); + qdev_prop_set_uint16(dev, "id3", 0x00); +- qdev_prop_set_string(dev, "name", "virt.flash"); +- object_property_add_child(OBJECT(lams), "virt.flash", OBJECT(dev)); +- object_property_add_alias(OBJECT(lams), "pflash", ++ qdev_prop_set_string(dev, "name", name); ++ object_property_add_child(OBJECT(lams), name, OBJECT(dev)); ++ object_property_add_alias(OBJECT(lams), alias_prop_name, + OBJECT(dev), "drive"); ++ return PFLASH_CFI01(dev); ++} + +- lams->flash = PFLASH_CFI01(dev); ++static void virt_flash_create(LoongArchMachineState *lams) ++{ ++ lams->flash[0] = virt_flash_create1(lams, "virt.flash0", "pflash0"); ++ lams->flash[1] = virt_flash_create1(lams, "virt.flash1", "pflash1"); + } + +-static void virt_flash_map(LoongArchMachineState *lams, +- MemoryRegion *sysmem) ++static void virt_flash_map1(PFlashCFI01 *flash, ++ hwaddr base, hwaddr size, ++ MemoryRegion *sysmem) + { +- PFlashCFI01 *flash = lams->flash; + DeviceState *dev = DEVICE(flash); +- hwaddr base = VIRT_FLASH_BASE; +- hwaddr size = VIRT_FLASH_SIZE; ++ BlockBackend *blk; ++ hwaddr real_size = size; ++ ++ blk = pflash_cfi01_get_blk(flash); ++ if (blk) { ++ real_size = blk_getlength(blk); ++ assert(real_size && real_size <= size); ++ } + +- assert(QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE)); +- assert(size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX); ++ assert(QEMU_IS_ALIGNED(real_size, VIRT_FLASH_SECTOR_SIZE)); ++ assert(real_size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX); + +- qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE); ++ qdev_prop_set_uint32(dev, "num-blocks", real_size / VIRT_FLASH_SECTOR_SIZE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + memory_region_add_subregion(sysmem, base, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0)); ++} + ++static void virt_flash_map(LoongArchMachineState *lams, ++ MemoryRegion *sysmem) ++{ ++ PFlashCFI01 *flash0 = lams->flash[0]; ++ PFlashCFI01 *flash1 = lams->flash[1]; ++ ++ virt_flash_map1(flash0, VIRT_FLASH0_BASE, VIRT_FLASH0_SIZE, sysmem); ++ virt_flash_map1(flash1, VIRT_FLASH1_BASE, VIRT_FLASH1_SIZE, sysmem); + } + + static void fdt_add_flash_node(LoongArchMachineState *lams) + { + MachineState *ms = MACHINE(lams); + char *nodename; ++ MemoryRegion *flash_mem; ++ ++ hwaddr flash0_base; ++ hwaddr flash0_size; + +- hwaddr flash_base = VIRT_FLASH_BASE; +- hwaddr flash_size = VIRT_FLASH_SIZE; ++ hwaddr flash1_base; ++ hwaddr flash1_size; + +- nodename = g_strdup_printf("/flash@%" PRIx64, flash_base); ++ flash_mem = pflash_cfi01_get_memory(lams->flash[0]); ++ flash0_base = flash_mem->addr; ++ flash0_size = memory_region_size(flash_mem); ++ ++ flash_mem = pflash_cfi01_get_memory(lams->flash[1]); ++ flash1_base = flash_mem->addr; ++ flash1_size = memory_region_size(flash_mem); ++ ++ nodename = g_strdup_printf("/flash@%" PRIx64, flash0_base); + qemu_fdt_add_subnode(ms->fdt, nodename); + qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cfi-flash"); + qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", +- 2, flash_base, 2, flash_size); ++ 2, flash0_base, 2, flash0_size, ++ 2, flash1_base, 2, flash1_size); + qemu_fdt_setprop_cell(ms->fdt, nodename, "bank-width", 4); + g_free(nodename); + } +@@ -639,12 +674,32 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + { + char *filename = MACHINE(lams)->firmware; + char *bios_name = NULL; +- int bios_size; ++ int bios_size, i; ++ BlockBackend *pflash_blk0; ++ MemoryRegion *mr; + + lams->bios_loaded = false; + ++ /* Map legacy -drive if=pflash to machine properties */ ++ for (i = 0; i < ARRAY_SIZE(lams->flash); i++) { ++ pflash_cfi01_legacy_drive(lams->flash[i], ++ drive_get(IF_PFLASH, 0, i)); ++ } ++ + virt_flash_map(lams, get_system_memory()); + ++ pflash_blk0 = pflash_cfi01_get_blk(lams->flash[0]); ++ ++ if (pflash_blk0) { ++ if (filename) { ++ error_report("cannot use both '-bios' and '-drive if=pflash'" ++ "options at once"); ++ exit(1); ++ } ++ lams->bios_loaded = true; ++ return; ++ } ++ + if (filename) { + bios_name = qemu_find_file(QEMU_FILE_TYPE_BIOS, filename); + if (!bios_name) { +@@ -652,21 +707,15 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + exit(1); + } + +- bios_size = load_image_targphys(bios_name, VIRT_BIOS_BASE, VIRT_BIOS_SIZE); ++ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(lams->flash[0]), 0); ++ bios_size = load_image_mr(bios_name, mr); + if (bios_size < 0) { + error_report("Could not load ROM image '%s'", bios_name); + exit(1); + } +- + g_free(bios_name); +- +- memory_region_init_ram(&lams->bios, NULL, "loongarch.bios", +- VIRT_BIOS_SIZE, &error_fatal); +- memory_region_set_readonly(&lams->bios, true); +- memory_region_add_subregion(get_system_memory(), VIRT_BIOS_BASE, &lams->bios); + lams->bios_loaded = true; + } +- + } + + static void reset_load_elf(void *opaque) +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 6ef9a92394..252f7df7f4 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -18,10 +18,12 @@ + + #define VIRT_FWCFG_BASE 0x1e020000UL + #define VIRT_BIOS_BASE 0x1c000000UL +-#define VIRT_BIOS_SIZE (4 * MiB) ++#define VIRT_BIOS_SIZE (16 * MiB) + #define VIRT_FLASH_SECTOR_SIZE (128 * KiB) +-#define VIRT_FLASH_BASE 0x1d000000UL +-#define VIRT_FLASH_SIZE (16 * MiB) ++#define VIRT_FLASH0_BASE VIRT_BIOS_BASE ++#define VIRT_FLASH0_SIZE VIRT_BIOS_SIZE ++#define VIRT_FLASH1_BASE 0x1d000000UL ++#define VIRT_FLASH1_SIZE (16 * MiB) + + #define VIRT_LOWMEM_BASE 0 + #define VIRT_LOWMEM_SIZE 0x10000000 +@@ -49,7 +51,7 @@ struct LoongArchMachineState { + int fdt_size; + DeviceState *platform_bus_dev; + PCIBus *pci_bus; +- PFlashCFI01 *flash; ++ PFlashCFI01 *flash[2]; + MemoryRegion system_iocsr; + MemoryRegion iocsr_mem; + AddressSpace as_iocsr; +-- +2.27.0 + diff --git a/loongarch-switch-boards-to-default-y.patch b/loongarch-switch-boards-to-default-y.patch new file mode 100644 index 0000000000000000000000000000000000000000..305436b3ece883594467499e853fcd7763068e88 --- /dev/null +++ b/loongarch-switch-boards-to-default-y.patch @@ -0,0 +1,60 @@ +From 0e0326de88282a601ea5178d421242d5b77afbfa Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 25 Jan 2024 13:36:37 +0100 +Subject: [PATCH 17/78] loongarch: switch boards to "default y" + +Some targets use "default y" for boards to filter out those that require +TCG. For consistency we are switching all other targets to do the same. +Continue with Loongarch. + +No changes to generated config-devices.mak file. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Xianglai Li +--- + .gitlab-ci.d/buildtest.yml | 2 ++ + configs/devices/loongarch64-softmmu/default.mak | 6 +++++- + hw/loongarch/Kconfig | 2 ++ + 3 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml +index 91663946de..3fb99e79e9 100644 +--- a/.gitlab-ci.d/buildtest.yml ++++ b/.gitlab-ci.d/buildtest.yml +@@ -579,6 +579,8 @@ build-tci: + - make check-tcg + + # Check our reduced build configurations ++# requires libfdt: aarch64, arm, i386, loongarch64, x86_64 ++# does not build without boards: i386, loongarch64, x86_64 + build-without-defaults: + extends: .native_build_job_template + needs: +diff --git a/configs/devices/loongarch64-softmmu/default.mak b/configs/devices/loongarch64-softmmu/default.mak +index 928bc117ef..ffe705836f 100644 +--- a/configs/devices/loongarch64-softmmu/default.mak ++++ b/configs/devices/loongarch64-softmmu/default.mak +@@ -1,3 +1,7 @@ + # Default configuration for loongarch64-softmmu + +-CONFIG_LOONGARCH_VIRT=y ++# Uncomment the following lines to disable these optional devices: ++# CONFIG_PCI_DEVICES=n ++ ++# Boards are selected by default, uncomment to keep out of the build. ++# CONFIG_LOONGARCH_VIRT=n +diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig +index 5727efed6d..7864050563 100644 +--- a/hw/loongarch/Kconfig ++++ b/hw/loongarch/Kconfig +@@ -1,5 +1,7 @@ + config LOONGARCH_VIRT + bool ++ default y ++ depends on LOONGARCH64 + select PCI + select PCI_EXPRESS_GENERIC_BRIDGE + imply VIRTIO_VGA +-- +2.39.1 + diff --git a/mac_dbdma-Remove-leftover-dma_memory_unmap-calls-CVE.patch b/mac_dbdma-Remove-leftover-dma_memory_unmap-calls-CVE.patch new file mode 100644 index 0000000000000000000000000000000000000000..d5f98647fd919fa39ccc05951a1fa1327bbc0067 --- /dev/null +++ b/mac_dbdma-Remove-leftover-dma_memory_unmap-calls-CVE.patch @@ -0,0 +1,71 @@ +From 234034ba7e8ab516f12cb199fc45cfe7229eb281 Mon Sep 17 00:00:00 2001 +From: Mattias Nissler +Date: Mon, 16 Sep 2024 10:57:08 -0700 +Subject: [PATCH 4/4] mac_dbdma: Remove leftover `dma_memory_unmap` + calls(CVE-2024-8612) + +cherry-pick from 2d0a071e625d7234e8c5623b7e7bf445e1bef72c + +These were passing a NULL buffer pointer unconditionally, which happens +to behave in a mostly benign way (except for the chance of an excess +memory region unref and a bounce buffer leak). Per the function comment, +this was never meant to be accepted though, and triggers an assertion +with the "softmmu: Support concurrent bounce buffers" change. + +Given that the code in question never sets up any mappings, just remove +the unnecessary dma_memory_unmap calls along with the DBDMA_io struct +fields that are now entirely unused. + +Signed-off-by: Mattias Nissler +Message-Id: <20240916175708.1829059-1-mnissler@rivosinc.com> +Fixes: be1e343995 ("macio: switch over to new byte-aligned DMA helpers") +Reviewed-by: Mark Cave-Ayland +Tested-by: Mark Cave-Ayland +Signed-off-by: Mark Cave-Ayland +--- + hw/ide/macio.c | 6 ------ + include/hw/ppc/mac_dbdma.h | 4 ---- + 2 files changed, 10 deletions(-) + +diff --git a/hw/ide/macio.c b/hw/ide/macio.c +index dca1cc9efc..3d895c07f4 100644 +--- a/hw/ide/macio.c ++++ b/hw/ide/macio.c +@@ -119,9 +119,6 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret) + return; + + done: +- dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, +- io->dir, io->dma_len); +- + if (ret < 0) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); + } else { +@@ -202,9 +199,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) + return; + + done: +- dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, +- io->dir, io->dma_len); +- + if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { + if (ret < 0) { + block_acct_failed(blk_get_stats(s->blk), &s->acct); +diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h +index 4a3f644516..c774f6bf84 100644 +--- a/include/hw/ppc/mac_dbdma.h ++++ b/include/hw/ppc/mac_dbdma.h +@@ -44,10 +44,6 @@ struct DBDMA_io { + DBDMA_end dma_end; + /* DMA is in progress, don't start another one */ + bool processing; +- /* DMA request */ +- void *dma_mem; +- dma_addr_t dma_len; +- DMADirection dir; + }; + + /* +-- +2.45.1.windows.1 + diff --git a/make-check-unit-use-after-free-in-test-opts-visitor.patch b/make-check-unit-use-after-free-in-test-opts-visitor.patch deleted file mode 100644 index 590970004769b464b68977639a0e5e823bb9b9ac..0000000000000000000000000000000000000000 --- a/make-check-unit-use-after-free-in-test-opts-visitor.patch +++ /dev/null @@ -1,102 +0,0 @@ -From e3dfb5d2848975e9e947cb894afac87ce386a2bc Mon Sep 17 00:00:00 2001 -From: lizhengui -Date: Wed, 9 Sep 2020 15:18:52 +0800 -Subject: [PATCH] make check-unit: use after free in test-opts-visitor - -In the struct OptsVisitor, the 'repeated_opts' member points to a list -in the 'unprocessed_opts' hash table after the list has been destroyed. -A subsequent call to visit_type_int() references the deleted list. -It results in use-after-free issue reproduced by running the test case -under the Valgrind: valgrind tests/test-opts-visitor. -A new mode ListMode::LM_TRAVERSED is declared to mark the list -traversal completed. - -Suggested-by: Markus Armbruster -Signed-off-by: Andrey Shinkevich -Message-Id: <1565024586-387112-1-git-send-email-andrey.shinkevich@virtuozzo.com> ---- - qapi/opts-visitor.c | 26 ++++++++++++++++++++++---- - 1 file changed, 22 insertions(+), 4 deletions(-) - -diff --git a/qapi/opts-visitor.c b/qapi/opts-visitor.c -index 324b1974..42d87df6 100644 ---- a/qapi/opts-visitor.c -+++ b/qapi/opts-visitor.c -@@ -24,7 +24,8 @@ enum ListMode - { - LM_NONE, /* not traversing a list of repeated options */ - -- LM_IN_PROGRESS, /* opts_next_list() ready to be called. -+ LM_IN_PROGRESS, /* -+ * opts_next_list() ready to be called. - * - * Generating the next list link will consume the most - * recently parsed QemuOpt instance of the repeated -@@ -36,7 +37,8 @@ enum ListMode - * LM_UNSIGNED_INTERVAL. - */ - -- LM_SIGNED_INTERVAL, /* opts_next_list() has been called. -+ LM_SIGNED_INTERVAL, /* -+ * opts_next_list() has been called. - * - * Generating the next list link will consume the most - * recently stored element from the signed interval, -@@ -48,7 +50,14 @@ enum ListMode - * next element of the signed interval. - */ - -- LM_UNSIGNED_INTERVAL /* Same as above, only for an unsigned interval. */ -+ LM_UNSIGNED_INTERVAL, /* Same as above, only for an unsigned interval. */ -+ -+ LM_TRAVERSED /* -+ * opts_next_list() has been called. -+ * -+ * No more QemuOpt instance in the list. -+ * The traversal has been completed. -+ */ - }; - - typedef enum ListMode ListMode; -@@ -238,6 +247,8 @@ opts_next_list(Visitor *v, GenericList *tail, size_t size) - OptsVisitor *ov = to_ov(v); - - switch (ov->list_mode) { -+ case LM_TRAVERSED: -+ return NULL; - case LM_SIGNED_INTERVAL: - case LM_UNSIGNED_INTERVAL: - if (ov->list_mode == LM_SIGNED_INTERVAL) { -@@ -258,6 +269,8 @@ opts_next_list(Visitor *v, GenericList *tail, size_t size) - opt = g_queue_pop_head(ov->repeated_opts); - if (g_queue_is_empty(ov->repeated_opts)) { - g_hash_table_remove(ov->unprocessed_opts, opt->name); -+ ov->repeated_opts = NULL; -+ ov->list_mode = LM_TRAVERSED; - return NULL; - } - break; -@@ -289,7 +302,8 @@ opts_end_list(Visitor *v, void **obj) - - assert(ov->list_mode == LM_IN_PROGRESS || - ov->list_mode == LM_SIGNED_INTERVAL || -- ov->list_mode == LM_UNSIGNED_INTERVAL); -+ ov->list_mode == LM_UNSIGNED_INTERVAL || -+ ov->list_mode == LM_TRAVERSED); - ov->repeated_opts = NULL; - ov->list_mode = LM_NONE; - } -@@ -306,6 +320,10 @@ lookup_scalar(const OptsVisitor *ov, const char *name, Error **errp) - list = lookup_distinct(ov, name, errp); - return list ? g_queue_peek_tail(list) : NULL; - } -+ if (ov->list_mode == LM_TRAVERSED) { -+ error_setg(errp, "Fewer list elements than expected"); -+ return NULL; -+ } - assert(ov->list_mode == LM_IN_PROGRESS); - return g_queue_peek_head(ov->repeated_opts); - } --- -2.19.1 - diff --git a/make-release-pull-in-edk2-submodules-so-we-can-build.patch b/make-release-pull-in-edk2-submodules-so-we-can-build.patch deleted file mode 100644 index 70bcc864d0fd976919e540165bc7167e5026c46e..0000000000000000000000000000000000000000 --- a/make-release-pull-in-edk2-submodules-so-we-can-build.patch +++ /dev/null @@ -1,60 +0,0 @@ -From c5c9b1362d1652a9d0f79f6d9ae2f80d4b5fe432 Mon Sep 17 00:00:00 2001 -From: Michael Roth -Date: Thu, 12 Sep 2019 18:12:01 -0500 -Subject: [PATCH] make-release: pull in edk2 submodules so we can build it from - tarballs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The `make efi` target added by 536d2173 is built from the roms/edk2 -submodule, which in turn relies on additional submodules nested under -roms/edk2. - -The make-release script currently only pulls in top-level submodules, -so these nested submodules are missing in the resulting tarball. - -We could try to address this situation more generally by recursively -pulling in all submodules, but this doesn't necessarily ensure the -end-result will build properly (this case also required other changes). - -Additionally, due to the nature of submodules, we may not always have -control over how these sorts of things are dealt with, so for now we -continue to handle it on a case-by-case in the make-release script. - -Cc: Laszlo Ersek -Cc: Bruce Rogers -Cc: qemu-stable@nongnu.org # v4.1.0 -Reported-by: Bruce Rogers -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Michael Roth -Message-Id: <20190912231202.12327-2-mdroth@linux.vnet.ibm.com> -Signed-off-by: Philippe Mathieu-Daudé -(cherry picked from commit 45c61c6c23918e3b05ed9ecac5b2328ebae5f774) -Signed-off-by: Michael Roth ---- - scripts/make-release | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/scripts/make-release b/scripts/make-release -index b4af9c9e52..a2a8cda33c 100755 ---- a/scripts/make-release -+++ b/scripts/make-release -@@ -20,6 +20,14 @@ git checkout "v${version}" - git submodule update --init - (cd roms/seabios && git describe --tags --long --dirty > .version) - (cd roms/skiboot && ./make_version.sh > .version) -+# Fetch edk2 submodule's submodules, since it won't have access to them via -+# the tarball later. -+# -+# A more uniform way to handle this sort of situation would be nice, but we -+# don't necessarily have much control over how a submodule handles its -+# submodule dependencies, so we continue to handle these on a case-by-case -+# basis for now. -+(cd roms/edk2 && git submodule update --init) - popd - tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination} - rm -rf ${destination} --- -2.23.0 diff --git a/mcf5208-fix-leak-from-qemu_allocate_irqs.patch b/mcf5208-fix-leak-from-qemu_allocate_irqs.patch deleted file mode 100644 index 7e254f577e4f08bc332bb94dda769ce9a584c623..0000000000000000000000000000000000000000 --- a/mcf5208-fix-leak-from-qemu_allocate_irqs.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 07b7cdb648124748c34be299fbfdfe3b6e38a521 Mon Sep 17 00:00:00 2001 -From: lizhengui -Date: Wed, 9 Sep 2020 14:53:00 +0800 -Subject: [PATCH] mcf5208: fix leak from qemu_allocate_irqs - -The array returned by qemu_allocate_irqs is malloced, free it. - -Signed-off-by: Paolo Bonzini -Reviewed-by: Thomas Huth ---- - hw/m68k/mcf5208.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c -index 6f6efae9..cc765eac 100644 ---- a/hw/m68k/mcf5208.c -+++ b/hw/m68k/mcf5208.c -@@ -270,6 +270,8 @@ static void mcf5208evb_init(MachineState *machine) - 0xfc030000, pic + 36); - } - -+ g_free(pic); -+ - /* 0xfc000000 SCM. */ - /* 0xfc004000 XBS. */ - /* 0xfc008000 FlexBus CS. */ --- -2.19.1 - diff --git a/megasas-avoid-NULL-pointer-dereference.patch b/megasas-avoid-NULL-pointer-dereference.patch deleted file mode 100644 index c7bc95901d82110b49e65ccab6cd9a84dc562aa0..0000000000000000000000000000000000000000 --- a/megasas-avoid-NULL-pointer-dereference.patch +++ /dev/null @@ -1,36 +0,0 @@ -From cf7f42b21aaa7694c6232a9a5027de9df341f299 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 14 May 2020 00:55:39 +0530 -Subject: [PATCH 5/9] megasas: avoid NULL pointer dereference - -While in megasas_handle_frame(), megasas_enqueue_frame() may -set a NULL frame into MegasasCmd object for a given 'frame_addr' -address. Add check to avoid a NULL pointer dereference issue. - -Reported-by: Alexander Bulekov -Fixes: https://bugs.launchpad.net/qemu/+bug/1878259 -Signed-off-by: Prasad J Pandit -Acked-by: Alexander Bulekov -Reviewed-by: Darren Kenny -Message-Id: <20200513192540.1583887-3-ppandit@redhat.com> -Signed-off-by: Paolo Bonzini ---- - hw/scsi/megasas.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c -index 7ee331d9da..5923ffbd22 100644 ---- a/hw/scsi/megasas.c -+++ b/hw/scsi/megasas.c -@@ -503,7 +503,7 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s, - cmd->pa = frame; - /* Map all possible frames */ - cmd->frame = pci_dma_map(pcid, frame, &frame_size_p, 0); -- if (frame_size_p != frame_size) { -+ if (!cmd->frame || frame_size_p != frame_size) { - trace_megasas_qf_map_failed(cmd->index, (unsigned long)frame); - if (cmd->frame) { - megasas_unmap_frame(s, cmd); --- -2.25.1 - diff --git a/megasas-use-unsigned-type-for-positive-numeric-field.patch b/megasas-use-unsigned-type-for-positive-numeric-field.patch deleted file mode 100644 index 7e194395193623e061917b0a5e6315d6b8564a61..0000000000000000000000000000000000000000 --- a/megasas-use-unsigned-type-for-positive-numeric-field.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 7bad515189482d289d3efe4133c8af9f184662e4 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 14 May 2020 00:55:40 +0530 -Subject: [PATCH 6/9] megasas: use unsigned type for positive numeric fields - -Use unsigned type for the MegasasState fields which hold positive -numeric values. - -Signed-off-by: Prasad J Pandit -Reviewed-by: Darren Kenny -Message-Id: <20200513192540.1583887-4-ppandit@redhat.com> -Signed-off-by: Paolo Bonzini ---- - hw/scsi/megasas.c | 38 +++++++++++++++++++------------------- - 1 file changed, 19 insertions(+), 19 deletions(-) - -diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c -index 5923ffbd22..94469e8169 100644 ---- a/hw/scsi/megasas.c -+++ b/hw/scsi/megasas.c -@@ -85,34 +85,34 @@ typedef struct MegasasState { - MemoryRegion queue_io; - uint32_t frame_hi; - -- int fw_state; -+ uint32_t fw_state; - uint32_t fw_sge; - uint32_t fw_cmds; - uint32_t flags; -- int fw_luns; -- int intr_mask; -- int doorbell; -- int busy; -- int diag; -- int adp_reset; -+ uint32_t fw_luns; -+ uint32_t intr_mask; -+ uint32_t doorbell; -+ uint32_t busy; -+ uint32_t diag; -+ uint32_t adp_reset; - OnOffAuto msi; - OnOffAuto msix; - - MegasasCmd *event_cmd; -- int event_locale; -+ uint16_t event_locale; - int event_class; -- int event_count; -- int shutdown_event; -- int boot_event; -+ uint32_t event_count; -+ uint32_t shutdown_event; -+ uint32_t boot_event; - - uint64_t sas_addr; - char *hba_serial; - - uint64_t reply_queue_pa; - void *reply_queue; -- int reply_queue_len; -+ uint16_t reply_queue_len; - uint16_t reply_queue_head; -- int reply_queue_tail; -+ uint16_t reply_queue_tail; - uint64_t consumer_pa; - uint64_t producer_pa; - -@@ -2258,9 +2258,9 @@ static const VMStateDescription vmstate_megasas_gen1 = { - VMSTATE_PCI_DEVICE(parent_obj, MegasasState), - VMSTATE_MSIX(parent_obj, MegasasState), - -- VMSTATE_INT32(fw_state, MegasasState), -- VMSTATE_INT32(intr_mask, MegasasState), -- VMSTATE_INT32(doorbell, MegasasState), -+ VMSTATE_UINT32(fw_state, MegasasState), -+ VMSTATE_UINT32(intr_mask, MegasasState), -+ VMSTATE_UINT32(doorbell, MegasasState), - VMSTATE_UINT64(reply_queue_pa, MegasasState), - VMSTATE_UINT64(consumer_pa, MegasasState), - VMSTATE_UINT64(producer_pa, MegasasState), -@@ -2277,9 +2277,9 @@ static const VMStateDescription vmstate_megasas_gen2 = { - VMSTATE_PCI_DEVICE(parent_obj, MegasasState), - VMSTATE_MSIX(parent_obj, MegasasState), - -- VMSTATE_INT32(fw_state, MegasasState), -- VMSTATE_INT32(intr_mask, MegasasState), -- VMSTATE_INT32(doorbell, MegasasState), -+ VMSTATE_UINT32(fw_state, MegasasState), -+ VMSTATE_UINT32(intr_mask, MegasasState), -+ VMSTATE_UINT32(doorbell, MegasasState), - VMSTATE_UINT64(reply_queue_pa, MegasasState), - VMSTATE_UINT64(consumer_pa, MegasasState), - VMSTATE_UINT64(producer_pa, MegasasState), --- -2.25.1 - diff --git a/megasas-use-unsigned-type-for-reply_queue_head-and-c.patch b/megasas-use-unsigned-type-for-reply_queue_head-and-c.patch deleted file mode 100644 index 507aeafb6911562d542f06f91c75a3dd90f43478..0000000000000000000000000000000000000000 --- a/megasas-use-unsigned-type-for-reply_queue_head-and-c.patch +++ /dev/null @@ -1,51 +0,0 @@ -From e081fb1058e357d4d7adc30201013a46123fe2ae Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 14 May 2020 00:55:38 +0530 -Subject: [PATCH 4/9] megasas: use unsigned type for reply_queue_head and check - index - -A guest user may set 'reply_queue_head' field of MegasasState to -a negative value. Later in 'megasas_lookup_frame' it is used to -index into s->frames[] array. Use unsigned type to avoid OOB -access issue. - -Also check that 'index' value stays within s->frames[] bounds -through the while() loop in 'megasas_lookup_frame' to avoid OOB -access. - -Reported-by: Ren Ding -Reported-by: Hanqing Zhao -Reported-by: Alexander Bulekov -Signed-off-by: Prasad J Pandit -Acked-by: Alexander Bulekov -Message-Id: <20200513192540.1583887-2-ppandit@redhat.com> -Signed-off-by: Paolo Bonzini ---- - hw/scsi/megasas.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c -index 0c4399930a..7ee331d9da 100644 ---- a/hw/scsi/megasas.c -+++ b/hw/scsi/megasas.c -@@ -111,7 +111,7 @@ typedef struct MegasasState { - uint64_t reply_queue_pa; - void *reply_queue; - int reply_queue_len; -- int reply_queue_head; -+ uint16_t reply_queue_head; - int reply_queue_tail; - uint64_t consumer_pa; - uint64_t producer_pa; -@@ -444,7 +444,7 @@ static MegasasCmd *megasas_lookup_frame(MegasasState *s, - - index = s->reply_queue_head; - -- while (num < s->fw_cmds) { -+ while (num < s->fw_cmds && index < MEGASAS_MAX_FRAMES) { - if (s->frames[index].pa && s->frames[index].pa == frame) { - cmd = &s->frames[index]; - break; --- -2.25.1 - diff --git a/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch b/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch deleted file mode 100644 index c67de46045dcbdca04a8a78d8ca0d44b27a794c2..0000000000000000000000000000000000000000 --- a/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch +++ /dev/null @@ -1,32 +0,0 @@ -From b7f4f3b71a179a21a90ca32ef7d6ea000fb0e3bd Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 25 Mar 2019 16:35:05 +0100 -Subject: [PATCH] memory: Add IOMMU_ATTR_MSI_TRANSLATE IOMMU memory region - attribute - -We introduce a new IOMMU Memory Region attribute, IOMMU_ATTR_MSI_TRANSLATE -which tells whether the virtual IOMMU translates MSIs. ARM SMMU -will expose this attribute since, as opposed to Intel DMAR, MSIs -are translated as any other DMA requests. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - include/exec/memory.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 74606e14aa..716b07e115 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -242,6 +242,7 @@ struct MemoryRegionOps { - enum IOMMUMemoryRegionAttr { - IOMMU_ATTR_SPAPR_TCE_FD, - IOMMU_ATTR_VFIO_NESTED, -+ IOMMU_ATTR_MSI_TRANSLATE, - }; - - /** --- -2.27.0 - diff --git a/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch b/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch deleted file mode 100644 index 3932161dc8aeb2377a64f77c1ccc2e8a5c0d9a6a..0000000000000000000000000000000000000000 --- a/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 5f4291f431add76b8754a5fb2d62ab4108ece73f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 1 Jul 2019 11:30:30 +0200 -Subject: [PATCH] memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region - attribute - -We introduce a new IOMMU Memory Region attribute, -IOMMU_ATTR_VFIO_NESTED that tells whether the virtual IOMMU -requires HW nested paging for VFIO integration. - -Current Intel virtual IOMMU device supports "Caching -Mode" and does not require 2 stages at physical level to be -integrated with VFIO. However SMMUv3 does not implement such -"caching mode" and requires to use HW nested paging. - -As such SMMUv3 is the first IOMMU device to advertise this -attribute. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/arm/smmuv3.c | 12 ++++++++++++ - include/exec/memory.h | 3 ++- - 2 files changed, 14 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 0ef1ca376c..55eed5189e 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -1531,6 +1531,17 @@ static void smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, - } - } - -+static int smmuv3_get_attr(IOMMUMemoryRegion *iommu, -+ enum IOMMUMemoryRegionAttr attr, -+ void *data) -+{ -+ if (attr == IOMMU_ATTR_VFIO_NESTED) { -+ *(bool *) data = true; -+ return 0; -+ } -+ return -EINVAL; -+} -+ - static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, - void *data) - { -@@ -1538,6 +1549,7 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, - - imrc->translate = smmuv3_translate; - imrc->notify_flag_changed = smmuv3_notify_flag_changed; -+ imrc->get_attr = smmuv3_get_attr; - } - - static const TypeInfo smmuv3_type_info = { -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 3c5206dce6..74606e14aa 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -240,7 +240,8 @@ struct MemoryRegionOps { - }; - - enum IOMMUMemoryRegionAttr { -- IOMMU_ATTR_SPAPR_TCE_FD -+ IOMMU_ATTR_SPAPR_TCE_FD, -+ IOMMU_ATTR_VFIO_NESTED, - }; - - /** --- -2.27.0 - diff --git a/memory-Add-new-fields-in-IOTLBEntry.patch b/memory-Add-new-fields-in-IOTLBEntry.patch deleted file mode 100644 index d76ff3bcd7321b32c9a57b6862f68b19f1216daa..0000000000000000000000000000000000000000 --- a/memory-Add-new-fields-in-IOTLBEntry.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 5a77056573d946eb9220b90dd1edce1f6f925c42 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 4 Sep 2018 08:43:05 -0400 -Subject: [PATCH] memory: Add new fields in IOTLBEntry - -The current IOTLBEntry becomes too simple to interact with -some physical IOMMUs. IOTLBs can be invalidated with different -granularities: domain, pasid, addr. Current IOTLB entry only offers -page selective invalidation. Let's add a granularity field -that conveys this information. - -TLB entries are usually tagged with some ids such as the asid -or pasid. When propagating an invalidation command from the -guest to the host, we need to pass those IDs. - -Also we add a leaf field which indicates, in case of invalidation -notification, whether only cache entries for the last level of -translation are required to be invalidated. - -A flag field is introduced to inform whether those fields are set. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - include/exec/memory.h | 36 +++++++++++++++++++++++++++++++++++- - 1 file changed, 35 insertions(+), 1 deletion(-) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index dca8184277..3c5206dce6 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -66,14 +66,48 @@ typedef enum { - IOMMU_RW = 3, - } IOMMUAccessFlags; - -+/* Granularity of the cache invalidation */ -+typedef enum { -+ IOMMU_INV_GRAN_ADDR = 0, -+ IOMMU_INV_GRAN_PASID, -+ IOMMU_INV_GRAN_DOMAIN, -+} IOMMUInvGranularity; -+ - #define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0)) - -+/** -+ * IOMMUTLBEntry - IOMMU TLB entry -+ * -+ * Structure used when performing a translation or when notifying MAP or -+ * UNMAP (invalidation) events -+ * -+ * @target_as: target address space -+ * @iova: IO virtual address (input) -+ * @translated_addr: translated address (output) -+ * @addr_mask: address mask (0xfff means 4K binding), must be multiple of 2 -+ * @perm: permission flag of the mapping (NONE encodes no mapping or -+ * invalidation notification) -+ * @granularity: granularity of the invalidation -+ * @flags: informs whether the following fields are set -+ * @arch_id: architecture specific ID tagging the TLB -+ * @pasid: PASID tagging the TLB -+ * @leaf: when @perm is NONE, indicates whether only caches for the last -+ * level of translation need to be invalidated. -+ */ - struct IOMMUTLBEntry { - AddressSpace *target_as; - hwaddr iova; - hwaddr translated_addr; -- hwaddr addr_mask; /* 0xfff = 4k translation */ -+ hwaddr addr_mask; - IOMMUAccessFlags perm; -+ IOMMUInvGranularity granularity; -+#define IOMMU_INV_FLAGS_PASID (1 << 0) -+#define IOMMU_INV_FLAGS_ARCHID (1 << 1) -+#define IOMMU_INV_FLAGS_LEAF (1 << 2) -+ uint32_t flags; -+ uint32_t arch_id; -+ uint32_t pasid; -+ bool leaf; - }; - - /* --- -2.27.0 - diff --git a/memory-Align-MemoryRegionSections-fields.patch b/memory-Align-MemoryRegionSections-fields.patch deleted file mode 100644 index c363a026a5f2d071950d6d9749511166432d59e4..0000000000000000000000000000000000000000 --- a/memory-Align-MemoryRegionSections-fields.patch +++ /dev/null @@ -1,45 +0,0 @@ -From aebd98d0799d6dd9bb4dd4bf73f0b75c5f4e665d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 14 Aug 2019 18:55:33 +0100 -Subject: [PATCH] memory: Align MemoryRegionSections fields -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -MemoryRegionSection includes an Int128 'size' field; -on some platforms the compiler causes an alignment of this to -a 128bit boundary, leaving 8 bytes of dead space. -This deadspace can be filled with junk. - -Move the size field to the top avoiding unnecessary alignment. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20190814175535.2023-2-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 44f85d3276397cfa2cfa379c61430405dad4e644) -Signed-off-by: Michael Roth ---- - include/exec/memory.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 1625913..f0f0767 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -484,10 +484,10 @@ static inline FlatView *address_space_to_flatview(AddressSpace *as) - * @nonvolatile: this section is non-volatile - */ - struct MemoryRegionSection { -+ Int128 size; - MemoryRegion *mr; - FlatView *fv; - hwaddr offset_within_region; -- Int128 size; - hwaddr offset_within_address_space; - bool readonly; - bool nonvolatile; --- -1.8.3.1 - diff --git a/memory-Change-NotifyStateClear-definition-to-return-.patch b/memory-Change-NotifyStateClear-definition-to-return-.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc8ba1c91bf6cce662f9a07f0e691feca94c73af --- /dev/null +++ b/memory-Change-NotifyStateClear-definition-to-return-.patch @@ -0,0 +1,84 @@ +From d99491bfe7983151fa8e2688f0b0aad591e36147 Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:30 +0800 +Subject: [PATCH] memory: Change NotifyStateClear() definition to return the + result + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/13fd87aac38509ab07bacafa2e35eb528d4be365 + +So that the caller can check the result of NotifyStateClear() handler if +the operation fails. + +Signed-off-by: Chenyi Qiang +Signed-off-by: houmingyong +--- + hw/vfio/common.c | 18 ++++++++++-------- + include/exec/memory.h | 4 ++-- + 2 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 62a2000acd..182874eccb 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -350,8 +350,8 @@ out: + rcu_read_unlock(); + } + +-static void vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontainer, +- MemoryRegionSection *section) ++static int vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { + const hwaddr size = int128_get64(section->size); + const hwaddr iova = section->offset_within_address_space; +@@ -363,24 +363,26 @@ static void vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontaine + error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, + strerror(-ret)); + } ++ ++ return ret; + } + +-static void vfio_ram_discard_notify_discard(StateChangeListener *scl, +- MemoryRegionSection *section) ++static int vfio_ram_discard_notify_discard(StateChangeListener *scl, ++ MemoryRegionSection *section) + { + RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); +- vfio_state_change_notify_to_state_clear(vrdl->bcontainer, section); ++ return vfio_state_change_notify_to_state_clear(vrdl->bcontainer, section); + } + +-static void vfio_private_shared_notify_to_private(StateChangeListener *scl, +- MemoryRegionSection *section) ++static int vfio_private_shared_notify_to_private(StateChangeListener *scl, ++ MemoryRegionSection *section) + { + PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl); + VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener, + listener); +- vfio_state_change_notify_to_state_clear(vpsl->bcontainer, section); ++ return vfio_state_change_notify_to_state_clear(vpsl->bcontainer, section); + } + + static int vfio_state_change_notify_to_state_set(VFIOContainerBase *bcontainer, +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 964ec53afc..b93ffb533e 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -580,8 +580,8 @@ typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque); + typedef struct StateChangeListener StateChangeListener; + typedef int (*NotifyStateSet)(StateChangeListener *scl, + MemoryRegionSection *section); +-typedef void (*NotifyStateClear)(StateChangeListener *scl, +- MemoryRegionSection *section); ++typedef int (*NotifyStateClear)(StateChangeListener *scl, ++ MemoryRegionSection *section); + + struct StateChangeListener { + /* +-- +2.33.0 + diff --git a/memory-Change-memory_region_set_ram_discard_manager-.patch b/memory-Change-memory_region_set_ram_discard_manager-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7bf838f61e141ad0d9bed6f8e26c4ec899f5f83 --- /dev/null +++ b/memory-Change-memory_region_set_ram_discard_manager-.patch @@ -0,0 +1,140 @@ +From 9d4e30a832e8de249869c6cbc29b102e4e9b3db9 Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:22 +0800 +Subject: [PATCH] memory: Change memory_region_set_ram_discard_manager() to + return the result + +Reference:https://gitlab.com/qemu-project/qemu/-/commit/ff1211154c45c9f7f82116ae9a8c72a848e4a8b5 + +Modify memory_region_set_ram_discard_manager() to return false if a +RamDiscardManager is already set in the MemoryRegion. The caller must +handle this failure, such as having virtio-mem undo its actions and fail +the realize() process. Opportunistically move the call earlier to avoid +complex error handling. + +This change is beneficial when introducing a new RamDiscardManager +instance besides virtio-mem. After +ram_block_coordinated_discard_require(true) unlocks all +RamDiscardManager instances, only one instance is allowed to be set for +a MemoryRegion at present. + +Suggested-by: David Hildenbrand +Signed-off-by: Chenyi Qiang +Conflicts: + hw/virtio/virtio-mem.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/virtio/virtio-mem.c | 28 ++++++++++++++++------------ + include/exec/memory.h | 6 +++--- + system/memory.c | 10 +++++++--- + 3 files changed, 26 insertions(+), 18 deletions(-) + +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index 90bfc5e596..6f3ecddfc7 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -1049,6 +1049,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) + return; + } + ++ /* ++ * Set ourselves as RamDiscardManager before the plug handler maps the ++ * memory region and exposes it via an address space. ++ */ ++ if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, ++ RAM_DISCARD_MANAGER(vmem))) { ++ error_setg(errp, "Failed to set RamDiscardManager"); ++ ram_block_coordinated_discard_require(false); ++ return; ++ } ++ + /* + * We don't know at this point whether shared RAM is migrated using + * QEMU or migrated using the file content. "x-ignore-shared" will be +@@ -1103,13 +1114,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) + &vmstate_virtio_mem_device_early, vmem); + } + qemu_register_reset(virtio_mem_system_reset, vmem); +- +- /* +- * Set ourselves as RamDiscardManager before the plug handler maps the +- * memory region and exposes it via an address space. +- */ +- memory_region_set_ram_discard_manager(&vmem->memdev->mr, +- RAM_DISCARD_MANAGER(vmem)); + } + + static void virtio_mem_device_unrealize(DeviceState *dev) +@@ -1117,11 +1121,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev) + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOMEM *vmem = VIRTIO_MEM(dev); + +- /* +- * The unplug handler unmapped the memory region, it cannot be +- * found via an address space anymore. Unset ourselves. +- */ +- memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); + qemu_unregister_reset(virtio_mem_system_reset, vmem); + if (vmem->early_migration) { + vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, +@@ -1132,6 +1131,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev) + virtio_del_queue(vdev, 0); + virtio_cleanup(vdev); + g_free(vmem->bitmap); ++ /* ++ * The unplug handler unmapped the memory region, it cannot be ++ * found via an address space anymore. Unset ourselves. ++ */ ++ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); + ram_block_coordinated_discard_require(false); + } + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 950362d53c..a4e9e084cd 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2554,13 +2554,13 @@ static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr) + * + * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion + * that does not cover RAM, or a #MemoryRegion that already has a +- * #RamDiscardManager assigned. ++ * #RamDiscardManager assigned. Return 0 if the rdm is set successfully. + * + * @mr: the #MemoryRegion + * @rdm: #RamDiscardManager to set + */ +-void memory_region_set_ram_discard_manager(MemoryRegion *mr, +- RamDiscardManager *rdm); ++int memory_region_set_ram_discard_manager(MemoryRegion *mr, ++ RamDiscardManager *rdm); + + /** + * memory_region_find: translate an address/size relative to a +diff --git a/system/memory.c b/system/memory.c +index 607ce9cf60..c3985e8eef 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -2121,12 +2121,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) + return mr->rdm; + } + +-void memory_region_set_ram_discard_manager(MemoryRegion *mr, +- RamDiscardManager *rdm) ++int memory_region_set_ram_discard_manager(MemoryRegion *mr, ++ RamDiscardManager *rdm) + { + g_assert(memory_region_is_ram(mr)); +- g_assert(!rdm || !mr->rdm); ++ if (mr->rdm && rdm) { ++ return -EBUSY; ++ } ++ + mr->rdm = rdm; ++ return 0; + } + + uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, +-- +2.33.0 + diff --git a/memory-Export-a-helper-to-get-intersection-of-a-Memo.patch b/memory-Export-a-helper-to-get-intersection-of-a-Memo.patch new file mode 100644 index 0000000000000000000000000000000000000000..8cea98bfe44ddf406289ce276bf028199a78b1b4 --- /dev/null +++ b/memory-Export-a-helper-to-get-intersection-of-a-Memo.patch @@ -0,0 +1,142 @@ +From a7cca9b3931b22d9893ddf938b6ab4b74d4c7533 Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:21 +0800 +Subject: [PATCH] memory: Export a helper to get intersection of a + MemoryRegionSection with a given range + +Rename the helper to memory_region_section_intersect_range() to make it +more generic. Meanwhile, define the @end as Int128 and replace the +related operations with Int128_* format since the helper is exported as +a wider API. + +Reference:https://gitlab.com/qemu-project/qemu/-/commit/f47a672a72acd6e2712031f0bc4d4f3ae4b6302c + +Suggested-by: Alexey Kardashevskiy +Reviewed-by: David Hildenbrand +Signed-off-by: Chenyi Qiang +Reviewed-by: Alexey Kardashevskiy +Signed-off-by: houmingyong +--- + hw/virtio/virtio-mem.c | 32 +++++--------------------------- + include/exec/memory.h | 27 +++++++++++++++++++++++++++ + 2 files changed, 32 insertions(+), 27 deletions(-) + +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index 75ee38aa46..90bfc5e596 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -235,28 +235,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, + return ret; + } + +-/* +- * Adjust the memory section to cover the intersection with the given range. +- * +- * Returns false if the intersection is empty, otherwise returns true. +- */ +-static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, +- uint64_t offset, uint64_t size) +-{ +- uint64_t start = MAX(s->offset_within_region, offset); +- uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), +- offset + size); +- +- if (end <= start) { +- return false; +- } +- +- s->offset_within_address_space += start - s->offset_within_region; +- s->offset_within_region = start; +- s->size = int128_make64(end - start); +- return true; +-} +- + typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); + + static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, +@@ -278,7 +256,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * vmem->block_size; + +- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { ++ if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + ret = cb(&tmp, arg); +@@ -310,7 +288,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * vmem->block_size; + +- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { ++ if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + ret = cb(&tmp, arg); +@@ -346,7 +324,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + +- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { ++ if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + rdl->notify_discard(rdl, &tmp); +@@ -362,7 +340,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + +- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { ++ if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + ret = rdl->notify_populate(rdl, &tmp); +@@ -379,7 +357,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, + if (rdl2 == rdl) { + break; + } +- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { ++ if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + rdl2->notify_discard(rdl2, &tmp); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 0361ec2054..950362d53c 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1272,6 +1272,33 @@ MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s); + */ + void memory_region_section_free_copy(MemoryRegionSection *s); + ++/** ++ * memory_region_section_intersect_range: Adjust the memory section to cover ++ * the intersection with the given range. ++ * ++ * @s: the #MemoryRegionSection to be adjusted ++ * @offset: the offset of the given range in the memory region ++ * @size: the size of the given range ++ * ++ * Returns false if the intersection is empty, otherwise returns true. ++ */ ++static inline bool memory_region_section_intersect_range(MemoryRegionSection *s, ++ uint64_t offset, uint64_t size) ++{ ++ uint64_t start = MAX(s->offset_within_region, offset); ++ Int128 end = int128_min(int128_add(int128_make64(s->offset_within_region), s->size), ++ int128_add(int128_make64(offset), int128_make64(size))); ++ ++ if (int128_le(end, int128_make64(start))) { ++ return false; ++ } ++ ++ s->offset_within_address_space += start - s->offset_within_region; ++ s->offset_within_region = start; ++ s->size = int128_sub(end, int128_make64(start)); ++ return true; ++} ++ + /** + * memory_region_init: Initialize a memory region + * +-- +2.33.0 + diff --git a/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch b/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch deleted file mode 100644 index 7cecd31a9765fb0926a4de993b38e0d5e68dfd6b..0000000000000000000000000000000000000000 --- a/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 497e055ed89e3cb5286dde2b05b7d7fd67e69331 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 13 Sep 2018 14:13:04 +0200 -Subject: [PATCH] memory: Introduce IOMMU Memory Region inject_faults API - -This new API allows to inject @count iommu_faults into -the IOMMU memory region. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - include/exec/memory.h | 25 +++++++++++++++++++++++++ - memory.c | 10 ++++++++++ - 2 files changed, 35 insertions(+) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 716b07e115..ffd4282f14 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -56,6 +56,8 @@ struct MemoryRegionMmio { - CPUWriteMemoryFunc *write[3]; - }; - -+struct iommu_fault; -+ - typedef struct IOMMUTLBEntry IOMMUTLBEntry; - - /* See address_space_translate: bit 0 is read, bit 1 is write. */ -@@ -378,6 +380,19 @@ typedef struct IOMMUMemoryRegionClass { - * @iommu: the IOMMUMemoryRegion - */ - int (*num_indexes)(IOMMUMemoryRegion *iommu); -+ -+ /* -+ * Inject @count faults into the IOMMU memory region -+ * -+ * Optional method: if this method is not provided, then -+ * memory_region_injection_faults() will return -ENOENT -+ * -+ * @iommu: the IOMMU memory region to inject the faults in -+ * @count: number of faults to inject -+ * @buf: fault buffer -+ */ -+ int (*inject_faults)(IOMMUMemoryRegion *iommu, int count, -+ struct iommu_fault *buf); - } IOMMUMemoryRegionClass; - - typedef struct CoalescedMemoryRange CoalescedMemoryRange; -@@ -1182,6 +1197,16 @@ int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr, - */ - int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr); - -+/** -+ * memory_region_inject_faults : inject @count faults stored in @buf -+ * -+ * @iommu_mr: the IOMMU memory region -+ * @count: number of faults to be injected -+ * @buf: buffer containing the faults -+ */ -+int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count, -+ struct iommu_fault *buf); -+ - /** - * memory_region_name: get a memory region's name - * -diff --git a/memory.c b/memory.c -index 708b3dff3d..623f89baa4 100644 ---- a/memory.c -+++ b/memory.c -@@ -2017,6 +2017,16 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr) - return imrc->num_indexes(iommu_mr); - } - -+int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count, -+ struct iommu_fault *buf) -+{ -+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); -+ if (!imrc->inject_faults) { -+ return -ENOENT; -+ } -+ return imrc->inject_faults(iommu_mr, count, buf); -+} -+ - void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) - { - uint8_t mask = 1 << client; --- -2.27.0 - diff --git a/memory-Introduce-PrivateSharedManager-Interface-as-c.patch b/memory-Introduce-PrivateSharedManager-Interface-as-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..884819dc0a76c2c141ae0eb65fa8759ce808ba1f --- /dev/null +++ b/memory-Introduce-PrivateSharedManager-Interface-as-c.patch @@ -0,0 +1,152 @@ +From 8d2a28564e7642b156d2a8d7351c5a70011c4529 Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:25 +0800 +Subject: [PATCH] memory: Introduce PrivateSharedManager Interface as child of + GenericStateManager + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/dd9686d946fcd8ebd5d5e7dec1fb8b1c05f8b980 + +To manage the private and shared RAM states in confidential VMs, +introduce a new class of PrivateShareManager as a child of +GenericStateManager, which inherits the six interface callbacks. With a +different interface type, it can be distinguished from the +RamDiscardManager object and provide the flexibility for addressing +specific requirements of confidential VMs in the future. + +Signed-off-by: Chenyi Qiang +Conflicts: + include/exec/memory.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + include/exec/memory.h | 44 +++++++++++++++++++++++++++++++++++++++++-- + system/memory.c | 17 +++++++++++++++++ + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 652d71ddf0..964ec53afc 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -55,6 +55,12 @@ typedef struct RamDiscardManager RamDiscardManager; + DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass, + RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER); + ++#define TYPE_PRIVATE_SHARED_MANAGER "private-shared-manager" ++typedef struct PrivateSharedManagerClass PrivateSharedManagerClass; ++typedef struct PrivateSharedManager PrivateSharedManager; ++DECLARE_OBJ_CHECKERS(PrivateSharedManager, PrivateSharedManagerClass, ++ PRIVATE_SHARED_MANAGER, TYPE_PRIVATE_SHARED_MANAGER) ++ + #ifdef CONFIG_FUZZ + void fuzz_dma_read_cb(size_t addr, + size_t len, +@@ -749,6 +755,14 @@ void generic_state_manager_register_listener(GenericStateManager *gsm, + void generic_state_manager_unregister_listener(GenericStateManager *gsm, + StateChangeListener *scl); + ++static inline void state_change_listener_init(StateChangeListener *scl, ++ NotifyStateSet state_set_fn, ++ NotifyStateClear state_clear_fn) ++{ ++ scl->notify_to_state_set = state_set_fn; ++ scl->notify_to_state_clear = state_clear_fn; ++} ++ + typedef struct RamDiscardListener RamDiscardListener; + + struct RamDiscardListener { +@@ -770,8 +784,7 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl, + NotifyStateClear discard_fn, + bool double_discard_supported) + { +- rdl->scl.notify_to_state_set = populate_fn; +- rdl->scl.notify_to_state_clear = discard_fn; ++ state_change_listener_init(&rdl->scl, populate_fn, discard_fn); + rdl->double_discard_supported = double_discard_supported; + } + +@@ -814,6 +827,25 @@ struct RamDiscardManagerClass { + GenericStateManagerClass parent_class; + }; + ++typedef struct PrivateSharedListener PrivateSharedListener; ++struct PrivateSharedListener { ++ struct StateChangeListener scl; ++ ++ QLIST_ENTRY(PrivateSharedListener) next; ++}; ++ ++struct PrivateSharedManagerClass { ++ /* private */ ++ GenericStateManagerClass parent_class; ++}; ++ ++static inline void private_shared_listener_init(PrivateSharedListener *psl, ++ NotifyStateSet populate_fn, ++ NotifyStateClear discard_fn) ++{ ++ state_change_listener_init(&psl->scl, populate_fn, discard_fn); ++} ++ + bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + ram_addr_t *ram_addr, bool *read_only, + bool *mr_has_discard_manager); +@@ -2588,6 +2620,14 @@ int memory_region_set_generic_state_manager(MemoryRegion *mr, + */ + bool memory_region_has_ram_discard_manager(MemoryRegion *mr); + ++/** ++ * memory_region_has_private_shared_manager: check whether a #MemoryRegion has a ++ * #PrivateSharedManager assigned ++ * ++ * @mr: the #MemoryRegion ++ */ ++bool memory_region_has_private_shared_manager(MemoryRegion *mr); ++ + /** + * memory_region_find: translate an address/size relative to a + * MemoryRegion into a #MemoryRegionSection. +diff --git a/system/memory.c b/system/memory.c +index 38f73eb48b..fa99009701 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -2143,6 +2143,16 @@ bool memory_region_has_ram_discard_manager(MemoryRegion *mr) + return true; + } + ++bool memory_region_has_private_shared_manager(MemoryRegion *mr) ++{ ++ if (!memory_region_is_ram(mr) || ++ !object_dynamic_cast(OBJECT(mr->gsm), TYPE_PRIVATE_SHARED_MANAGER)) { ++ return false; ++ } ++ ++ return true; ++} ++ + uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm, + const MemoryRegion *mr) + { +@@ -3760,12 +3770,19 @@ static const TypeInfo ram_discard_manager_info = { + .class_size = sizeof(RamDiscardManagerClass), + }; + ++static const TypeInfo private_shared_manager_info = { ++ .parent = TYPE_GENERIC_STATE_MANAGER, ++ .name = TYPE_PRIVATE_SHARED_MANAGER, ++ .class_size = sizeof(PrivateSharedManagerClass), ++}; ++ + static void memory_register_types(void) + { + type_register_static(&memory_region_info); + type_register_static(&iommu_memory_region_info); + type_register_static(&generic_state_manager_info); + type_register_static(&ram_discard_manager_info); ++ type_register_static(&private_shared_manager_info); + } + + type_init(memory_register_types) +-- +2.33.0 + diff --git a/memory-Introduce-generic-state-change-parent-class-f.patch b/memory-Introduce-generic-state-change-parent-class-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb5944caaeb42e47527ac540baf02a79949b0a92 --- /dev/null +++ b/memory-Introduce-generic-state-change-parent-class-f.patch @@ -0,0 +1,1145 @@ +From c0f15fa6a2c663bba5cf56f98bdcfec20dc2e807 Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:24 +0800 +Subject: [PATCH] memory: Introduce generic state change parent class for + RamDiscardManager + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/31df9c4804e4e422e27a18ca9a7e22b4123203d1 + +RamDiscardManager is an interface used by virtio-mem to adjust VFIO +mappings in relation to VM page assignment. It manages the state of +populated and discard for the RAM. To accommodate future scnarios for +managing RAM states, such as private and shared states in confidential +VMs, the existing RamDiscardManager interface needs to be generalized. + +Introduce a parent class, GenericStateManager, to manage a pair of +opposite states with RamDiscardManager as its child. The changes include +- Define a new abstract class GenericStateChange. +- Extract six callbacks into GenericStateChangeClass and allow the child + classes to inherit them. +- Modify RamDiscardManager-related helpers to use GenericStateManager + ones. +- Define a generic StatChangeListener to extract fields from + RamDiscardManager listener which allows future listeners to embed it + and avoid duplication. +- Change the users of RamDiscardManager (virtio-mem, migration, etc.) to + switch to use GenericStateChange helpers. + +It can provide a more flexible and resuable framework for RAM state +management, facilitating future enhancements and use cases. + +Signed-off-by: Chenyi Qiang +Conflicts: + hw/vfio/common.c + include/exec/memory.h + system/memory.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/vfio/common.c | 30 ++-- + hw/virtio/virtio-mem.c | 95 ++++++------ + include/exec/memory.h | 313 ++++++++++++++++++++++------------------ + migration/ram.c | 16 +- + system/memory.c | 106 ++++++++------ + system/memory_mapping.c | 6 +- + 6 files changed, 310 insertions(+), 256 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 0be63c5fbc..ab7450f3bd 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -350,9 +350,10 @@ out: + rcu_read_unlock(); + } + +-static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, ++static void vfio_ram_discard_notify_discard(StateChangeListener *scl, + MemoryRegionSection *section) + { ++ RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); + VFIOContainerBase *bcontainer = vrdl->bcontainer; +@@ -368,9 +369,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + } + } + +-static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, ++static int vfio_ram_discard_notify_populate(StateChangeListener *scl, + MemoryRegionSection *section) + { ++ RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); + VFIOContainerBase *bcontainer = vrdl->bcontainer; +@@ -396,7 +398,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + vaddr, section->readonly); + if (ret) { + /* Rollback */ +- vfio_ram_discard_notify_discard(rdl, section); ++ vfio_ram_discard_notify_discard(scl, section); + return ret; + } + } +@@ -406,8 +408,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + VFIORamDiscardListener *vrdl; ++ RamDiscardListener *rdl; + + /* Ignore some corner cases not relevant in practice. */ + g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE)); +@@ -420,17 +423,18 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + vrdl->mr = section->mr; + vrdl->offset_within_address_space = section->offset_within_address_space; + vrdl->size = int128_get64(section->size); +- vrdl->granularity = ram_discard_manager_get_min_granularity(rdm, +- section->mr); ++ vrdl->granularity = generic_state_manager_get_min_granularity(gsm, ++ section->mr); + + g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); + g_assert(bcontainer->pgsizes && + vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes)); + +- ram_discard_listener_init(&vrdl->listener, ++ rdl = &vrdl->listener; ++ ram_discard_listener_init(rdl, + vfio_ram_discard_notify_populate, + vfio_ram_discard_notify_discard, true); +- ram_discard_manager_register_listener(rdm, &vrdl->listener, section); ++ generic_state_manager_register_listener(gsm, &rdl->scl, section); + QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next); + + /* +@@ -480,8 +484,9 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; ++ RamDiscardListener *rdl; + + QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && +@@ -495,7 +500,8 @@ static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, + hw_error("vfio: Trying to unregister missing RAM discard listener"); + } + +- ram_discard_manager_unregister_listener(rdm, &vrdl->listener); ++ rdl = &vrdl->listener; ++ generic_state_manager_unregister_listener(gsm, &rdl->scl); + QLIST_REMOVE(vrdl, next); + g_free(vrdl); + } +@@ -1275,7 +1281,7 @@ static int + vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + + QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { +@@ -1294,7 +1300,7 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, + * We only want/can synchronize the bitmap for actually mapped parts - + * which correspond to populated parts. Replay all populated parts. + */ +- return ram_discard_manager_replay_populated(rdm, section, ++ return generic_state_manager_replay_on_state_set(gsm, section, + vfio_ram_discard_get_dirty_bitmap, + &vrdl); + } +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index f40a816b7f..d60bc994ad 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -303,16 +303,16 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, + + static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg) + { +- RamDiscardListener *rdl = arg; ++ StateChangeListener *scl = arg; + +- return rdl->notify_populate(rdl, s); ++ return scl->notify_to_state_set(scl, s); + } + + static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg) + { +- RamDiscardListener *rdl = arg; ++ StateChangeListener *scl = arg; + +- rdl->notify_discard(rdl, s); ++ scl->notify_to_state_clear(scl, s); + return 0; + } + +@@ -322,12 +322,13 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, + RamDiscardListener *rdl; + + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { +- MemoryRegionSection tmp = *rdl->section; ++ StateChangeListener *scl = &rdl->scl; ++ MemoryRegionSection tmp = *scl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } +- rdl->notify_discard(rdl, &tmp); ++ scl->notify_to_state_clear(scl, &tmp); + } + } + +@@ -338,12 +339,13 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, + int ret = 0; + + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { +- MemoryRegionSection tmp = *rdl->section; ++ StateChangeListener *scl = &rdl->scl; ++ MemoryRegionSection tmp = *scl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } +- ret = rdl->notify_populate(rdl, &tmp); ++ ret = scl->notify_to_state_set(scl, &tmp); + if (ret) { + break; + } +@@ -352,7 +354,8 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, + if (ret) { + /* Notify all already-notified listeners. */ + QLIST_FOREACH(rdl2, &vmem->rdl_list, next) { +- MemoryRegionSection tmp = *rdl2->section; ++ StateChangeListener *scl2 = &rdl2->scl; ++ MemoryRegionSection tmp = *scl2->section; + + if (rdl2 == rdl) { + break; +@@ -360,7 +363,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } +- rdl2->notify_discard(rdl2, &tmp); ++ scl2->notify_to_state_clear(scl2, &tmp); + } + } + return ret; +@@ -375,10 +378,11 @@ static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) + } + + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { ++ StateChangeListener *scl = &rdl->scl; + if (rdl->double_discard_supported) { +- rdl->notify_discard(rdl, rdl->section); ++ scl->notify_to_state_clear(scl, scl->section); + } else { +- virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, ++ virtio_mem_for_each_plugged_section(vmem, scl->section, scl, + virtio_mem_notify_discard_cb); + } + } +@@ -1053,8 +1057,8 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ +- if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, +- RAM_DISCARD_MANAGER(vmem))) { ++ if (memory_region_set_generic_state_manager(&vmem->memdev->mr, ++ GENERIC_STATE_MANAGER(vmem))) { + error_setg(errp, "Failed to set RamDiscardManager"); + ram_block_coordinated_discard_require(false); + return; +@@ -1135,7 +1139,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev) + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ +- memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ++ memory_region_set_generic_state_manager(&vmem->memdev->mr, NULL); + ram_block_coordinated_discard_require(false); + } + +@@ -1184,7 +1188,8 @@ static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem) + * into an address space. Replay, now that we updated the bitmap. + */ + QLIST_FOREACH(rdl, &vmem->rdl_list, next) { +- ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, ++ StateChangeListener *scl = &rdl->scl; ++ ret = virtio_mem_for_each_plugged_section(vmem, scl->section, scl, + virtio_mem_notify_populate_cb); + if (ret) { + return ret; +@@ -1683,19 +1688,19 @@ static Property virtio_mem_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + +-static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, ++static uint64_t virtio_mem_rdm_get_min_granularity(const GenericStateManager *gsm, + const MemoryRegion *mr) + { +- const VirtIOMEM *vmem = VIRTIO_MEM(rdm); ++ const VirtIOMEM *vmem = VIRTIO_MEM(gsm); + + g_assert(mr == &vmem->memdev->mr); + return vmem->block_size; + } + +-static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, ++static bool virtio_mem_rdm_is_populated(const GenericStateManager *gsm, + const MemoryRegionSection *s) + { +- const VirtIOMEM *vmem = VIRTIO_MEM(rdm); ++ const VirtIOMEM *vmem = VIRTIO_MEM(gsm); + uint64_t start_gpa = vmem->addr + s->offset_within_region; + uint64_t end_gpa = start_gpa + int128_get64(s->size); + +@@ -1723,12 +1728,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) + return data->fn(s, data->opaque); + } + +-static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, ++static int virtio_mem_rdm_replay_populated(const GenericStateManager *gsm, + MemoryRegionSection *s, + ReplayStateChange replay_fn, + void *opaque) + { +- const VirtIOMEM *vmem = VIRTIO_MEM(rdm); ++ const VirtIOMEM *vmem = VIRTIO_MEM(gsm); + struct VirtIOMEMReplayData data = { + .fn = replay_fn, + .opaque = opaque, +@@ -1748,12 +1753,12 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, + return 0; + } + +-static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, ++static int virtio_mem_rdm_replay_discarded(const GenericStateManager *gsm, + MemoryRegionSection *s, + ReplayStateChange replay_fn, + void *opaque) + { +- const VirtIOMEM *vmem = VIRTIO_MEM(rdm); ++ const VirtIOMEM *vmem = VIRTIO_MEM(gsm); + struct VirtIOMEMReplayData data = { + .fn = replay_fn, + .opaque = opaque, +@@ -1764,18 +1769,19 @@ static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + virtio_mem_rdm_replay_discarded_cb); + } + +-static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, +- RamDiscardListener *rdl, ++static void virtio_mem_rdm_register_listener(GenericStateManager *gsm, ++ StateChangeListener *scl, + MemoryRegionSection *s) + { +- VirtIOMEM *vmem = VIRTIO_MEM(rdm); ++ VirtIOMEM *vmem = VIRTIO_MEM(gsm); ++ RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); + int ret; + + g_assert(s->mr == &vmem->memdev->mr); +- rdl->section = memory_region_section_new_copy(s); ++ scl->section = memory_region_section_new_copy(s); + + QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next); +- ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, ++ ret = virtio_mem_for_each_plugged_section(vmem, scl->section, scl, + virtio_mem_notify_populate_cb); + if (ret) { + error_report("%s: Replaying plugged ranges failed: %s", __func__, +@@ -1783,23 +1789,24 @@ static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, + } + } + +-static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, +- RamDiscardListener *rdl) ++static void virtio_mem_rdm_unregister_listener(GenericStateManager *gsm, ++ StateChangeListener *scl) + { +- VirtIOMEM *vmem = VIRTIO_MEM(rdm); ++ VirtIOMEM *vmem = VIRTIO_MEM(gsm); ++ RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); + +- g_assert(rdl->section->mr == &vmem->memdev->mr); ++ g_assert(scl->section->mr == &vmem->memdev->mr); + if (vmem->size) { + if (rdl->double_discard_supported) { +- rdl->notify_discard(rdl, rdl->section); ++ scl->notify_to_state_clear(scl, scl->section); + } else { +- virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, ++ virtio_mem_for_each_plugged_section(vmem, scl->section, scl, + virtio_mem_notify_discard_cb); + } + } + +- memory_region_section_free_copy(rdl->section); +- rdl->section = NULL; ++ memory_region_section_free_copy(scl->section); ++ scl->section = NULL; + QLIST_REMOVE(rdl, next); + } + +@@ -1832,7 +1839,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); +- RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); ++ GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_CLASS(klass); + + device_class_set_props(dc, virtio_mem_properties); + dc->vmsd = &vmstate_virtio_mem; +@@ -1853,12 +1860,12 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) + vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; + vmc->unplug_request_check = virtio_mem_unplug_request_check; + +- rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; +- rdmc->is_populated = virtio_mem_rdm_is_populated; +- rdmc->replay_populated = virtio_mem_rdm_replay_populated; +- rdmc->replay_discarded = virtio_mem_rdm_replay_discarded; +- rdmc->register_listener = virtio_mem_rdm_register_listener; +- rdmc->unregister_listener = virtio_mem_rdm_unregister_listener; ++ gsmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; ++ gsmc->is_state_set = virtio_mem_rdm_is_populated; ++ gsmc->replay_on_state_set = virtio_mem_rdm_replay_populated; ++ gsmc->replay_on_state_clear = virtio_mem_rdm_replay_discarded; ++ gsmc->register_listener = virtio_mem_rdm_register_listener; ++ gsmc->unregister_listener = virtio_mem_rdm_unregister_listener; + } + + static const TypeInfo virtio_mem_info = { +diff --git a/include/exec/memory.h b/include/exec/memory.h +index a3243ee218..652d71ddf0 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -43,6 +43,12 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass; + DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass, + IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION) + ++#define TYPE_GENERIC_STATE_MANAGER "generic-state-manager" ++typedef struct GenericStateManagerClass GenericStateManagerClass; ++typedef struct GenericStateManager GenericStateManager; ++DECLARE_OBJ_CHECKERS(GenericStateManager, GenericStateManagerClass, ++ GENERIC_STATE_MANAGER, TYPE_GENERIC_STATE_MANAGER) ++ + #define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager" + typedef struct RamDiscardManagerClass RamDiscardManagerClass; + typedef struct RamDiscardManager RamDiscardManager; +@@ -563,103 +569,59 @@ struct IOMMUMemoryRegionClass { + Error **errp); + }; + +-typedef struct RamDiscardListener RamDiscardListener; +-typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl, +- MemoryRegionSection *section); +-typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl, ++typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque); ++ ++typedef struct StateChangeListener StateChangeListener; ++typedef int (*NotifyStateSet)(StateChangeListener *scl, ++ MemoryRegionSection *section); ++typedef void (*NotifyStateClear)(StateChangeListener *scl, + MemoryRegionSection *section); + +-struct RamDiscardListener { ++struct StateChangeListener { + /* +- * @notify_populate: ++ * @notify_to_state_set: + * +- * Notification that previously discarded memory is about to get populated. +- * Listeners are able to object. If any listener objects, already +- * successfully notified listeners are notified about a discard again. ++ * Notification that previously state clear part is about to be set. + * +- * @rdl: the #RamDiscardListener getting notified +- * @section: the #MemoryRegionSection to get populated. The section ++ * @scl: the #StateChangeListener getting notified ++ * @section: the #MemoryRegionSection to be state-set. The section + * is aligned within the memory region to the minimum granularity + * unless it would exceed the registered section. + * + * Returns 0 on success. If the notification is rejected by the listener, + * an error is returned. + */ +- NotifyRamPopulate notify_populate; ++ NotifyStateSet notify_to_state_set; + + /* +- * @notify_discard: ++ * @notify_to_state_clear: + * +- * Notification that previously populated memory was discarded successfully +- * and listeners should drop all references to such memory and prevent +- * new population (e.g., unmap). ++ * Notification that previously state set part is about to be cleared + * +- * @rdl: the #RamDiscardListener getting notified +- * @section: the #MemoryRegionSection to get populated. The section ++ * @scl: the #StateChangeListener getting notified ++ * @section: the #MemoryRegionSection to be state-cleared. The section + * is aligned within the memory region to the minimum granularity + * unless it would exceed the registered section. +- */ +- NotifyRamDiscard notify_discard; +- +- /* +- * @double_discard_supported: + * +- * The listener suppors getting @notify_discard notifications that span +- * already discarded parts. ++ * Returns 0 on success. If the notification is rejected by the listener, ++ * an error is returned. + */ +- bool double_discard_supported; ++ NotifyStateClear notify_to_state_clear; + + MemoryRegionSection *section; +- QLIST_ENTRY(RamDiscardListener) next; + }; + +-static inline void ram_discard_listener_init(RamDiscardListener *rdl, +- NotifyRamPopulate populate_fn, +- NotifyRamDiscard discard_fn, +- bool double_discard_supported) +-{ +- rdl->notify_populate = populate_fn; +- rdl->notify_discard = discard_fn; +- rdl->double_discard_supported = double_discard_supported; +-} +- +-typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque); +- + /* +- * RamDiscardManagerClass: +- * +- * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion +- * regions are currently populated to be used/accessed by the VM, notifying +- * after parts were discarded (freeing up memory) and before parts will be +- * populated (consuming memory), to be used/accessed by the VM. +- * +- * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the +- * #MemoryRegion isn't mapped into an address space yet (either directly +- * or via an alias); it cannot change while the #MemoryRegion is +- * mapped into an address space. ++ * GenericStateManagerClass: + * +- * The #RamDiscardManager is intended to be used by technologies that are +- * incompatible with discarding of RAM (e.g., VFIO, which may pin all +- * memory inside a #MemoryRegion), and require proper coordination to only +- * map the currently populated parts, to hinder parts that are expected to +- * remain discarded from silently getting populated and consuming memory. +- * Technologies that support discarding of RAM don't have to bother and can +- * simply map the whole #MemoryRegion. +- * +- * An example #RamDiscardManager is virtio-mem, which logically (un)plugs +- * memory within an assigned RAM #MemoryRegion, coordinated with the VM. +- * Logically unplugging memory consists of discarding RAM. The VM agreed to not +- * access unplugged (discarded) memory - especially via DMA. virtio-mem will +- * properly coordinate with listeners before memory is plugged (populated), +- * and after memory is unplugged (discarded). ++ * A #GenericStateManager is a common interface used to manage the state of ++ * a #MemoryRegion. The managed states is a pair of opposite states, such as ++ * populated and discarded, or private and shared. It is abstract as set and ++ * clear in below callbacks, and the actual state is managed by the ++ * implementation. + * +- * Listeners are called in multiples of the minimum granularity (unless it +- * would exceed the registered range) and changes are aligned to the minimum +- * granularity within the #MemoryRegion. Listeners have to prepare for memory +- * becoming discarded in a different granularity than it was populated and the +- * other way around. + */ +-struct RamDiscardManagerClass { ++struct GenericStateManagerClass { + /* private */ + InterfaceClass parent_class; + +@@ -669,122 +631,188 @@ struct RamDiscardManagerClass { + * @get_min_granularity: + * + * Get the minimum granularity in which listeners will get notified +- * about changes within the #MemoryRegion via the #RamDiscardManager. ++ * about changes within the #MemoryRegion via the #GenericStateManager. + * +- * @rdm: the #RamDiscardManager ++ * @gsm: the #GenericStateManager + * @mr: the #MemoryRegion + * + * Returns the minimum granularity. + */ +- uint64_t (*get_min_granularity)(const RamDiscardManager *rdm, ++ uint64_t (*get_min_granularity)(const GenericStateManager *gsm, + const MemoryRegion *mr); + + /** +- * @is_populated: ++ * @is_state_set: + * +- * Check whether the given #MemoryRegionSection is completely populated +- * (i.e., no parts are currently discarded) via the #RamDiscardManager. +- * There are no alignment requirements. ++ * Check whether the given #MemoryRegionSection state is set. ++ * via the #GenericStateManager. + * +- * @rdm: the #RamDiscardManager ++ * @gsm: the #GenericStateManager + * @section: the #MemoryRegionSection + * +- * Returns whether the given range is completely populated. ++ * Returns whether the given range is completely set. + */ +- bool (*is_populated)(const RamDiscardManager *rdm, ++ bool (*is_state_set)(const GenericStateManager *gsm, + const MemoryRegionSection *section); + + /** +- * @replay_populated: ++ * @replay_on_state_set: + * +- * Call the #ReplayStateChange callback for all populated parts within the +- * #MemoryRegionSection via the #RamDiscardManager. ++ * Call the #ReplayStateChange callback for all state set parts within the ++ * #MemoryRegionSection via the #GenericStateManager. + * + * In case any call fails, no further calls are made. + * +- * @rdm: the #RamDiscardManager ++ * @gsm: the #GenericStateManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayStateChange callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ +- int (*replay_populated)(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayStateChange replay_fn, void *opaque); ++ int (*replay_on_state_set)(const GenericStateManager *gsm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, void *opaque); + + /** +- * @replay_discarded: ++ * @replay_on_state_clear: + * +- * Call the #ReplayStateChange callback for all discarded parts within the +- * #MemoryRegionSection via the #RamDiscardManager. ++ * Call the #ReplayStateChange callback for all state clear parts within the ++ * #MemoryRegionSection via the #GenericStateManager. ++ * ++ * In case any call fails, no further calls are made. + * +- * @rdm: the #RamDiscardManager ++ * @gsm: the #GenericStateManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayStateChange callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ +- int (*replay_discarded)(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayStateChange replay_fn, void *opaque); ++ int (*replay_on_state_clear)(const GenericStateManager *gsm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, void *opaque); + + /** + * @register_listener: + * +- * Register a #RamDiscardListener for the given #MemoryRegionSection and +- * immediately notify the #RamDiscardListener about all populated parts +- * within the #MemoryRegionSection via the #RamDiscardManager. ++ * Register a #StateChangeListener for the given #MemoryRegionSection and ++ * immediately notify the #StateChangeListener about all state-set parts ++ * within the #MemoryRegionSection via the #GenericStateManager. + * + * In case any notification fails, no further notifications are triggered + * and an error is logged. + * +- * @rdm: the #RamDiscardManager +- * @rdl: the #RamDiscardListener ++ * @rdm: the #GenericStateManager ++ * @rdl: the #StateChangeListener + * @section: the #MemoryRegionSection + */ +- void (*register_listener)(RamDiscardManager *rdm, +- RamDiscardListener *rdl, ++ void (*register_listener)(GenericStateManager *gsm, ++ StateChangeListener *scl, + MemoryRegionSection *section); + + /** + * @unregister_listener: + * +- * Unregister a previously registered #RamDiscardListener via the +- * #RamDiscardManager after notifying the #RamDiscardListener about all +- * populated parts becoming unpopulated within the registered ++ * Unregister a previously registered #StateChangeListener via the ++ * #GenericStateManager after notifying the #StateChangeListener about all ++ * state-set parts becoming state-cleared within the registered + * #MemoryRegionSection. + * +- * @rdm: the #RamDiscardManager +- * @rdl: the #RamDiscardListener ++ * @rdm: the #GenericStateManager ++ * @rdl: the #StateChangeListener + */ +- void (*unregister_listener)(RamDiscardManager *rdm, +- RamDiscardListener *rdl); ++ void (*unregister_listener)(GenericStateManager *gsm, ++ StateChangeListener *scl); + }; + +-uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, +- const MemoryRegion *mr); ++uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm, ++ const MemoryRegion *mr); + +-bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, +- const MemoryRegionSection *section); ++bool generic_state_manager_is_state_set(const GenericStateManager *gsm, ++ const MemoryRegionSection *section); + +-int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayStateChange replay_fn, +- void *opaque); ++int generic_state_manager_replay_on_state_set(const GenericStateManager *gsm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, ++ void *opaque); + +-int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayStateChange replay_fn, +- void *opaque); ++int generic_state_manager_replay_on_state_clear(const GenericStateManager *gsm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, ++ void *opaque); + +-void ram_discard_manager_register_listener(RamDiscardManager *rdm, +- RamDiscardListener *rdl, +- MemoryRegionSection *section); ++void generic_state_manager_register_listener(GenericStateManager *gsm, ++ StateChangeListener *scl, ++ MemoryRegionSection *section); + +-void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, +- RamDiscardListener *rdl); ++void generic_state_manager_unregister_listener(GenericStateManager *gsm, ++ StateChangeListener *scl); ++ ++typedef struct RamDiscardListener RamDiscardListener; ++ ++struct RamDiscardListener { ++ struct StateChangeListener scl; ++ ++ /* ++ * @double_discard_supported: ++ * ++ * The listener suppors getting @notify_discard notifications that span ++ * already discarded parts. ++ */ ++ bool double_discard_supported; ++ ++ QLIST_ENTRY(RamDiscardListener) next; ++}; ++ ++static inline void ram_discard_listener_init(RamDiscardListener *rdl, ++ NotifyStateSet populate_fn, ++ NotifyStateClear discard_fn, ++ bool double_discard_supported) ++{ ++ rdl->scl.notify_to_state_set = populate_fn; ++ rdl->scl.notify_to_state_clear = discard_fn; ++ rdl->double_discard_supported = double_discard_supported; ++} ++ ++/* ++ * RamDiscardManagerClass: ++ * ++ * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion ++ * regions are currently populated to be used/accessed by the VM, notifying ++ * after parts were discarded (freeing up memory) and before parts will be ++ * populated (consuming memory), to be used/accessed by the VM. ++ * ++ * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the ++ * #MemoryRegion isn't mapped into an address space yet (either directly ++ * or via an alias); it cannot change while the #MemoryRegion is ++ * mapped into an address space. ++ * ++ * The #RamDiscardManager is intended to be used by technologies that are ++ * incompatible with discarding of RAM (e.g., VFIO, which may pin all ++ * memory inside a #MemoryRegion), and require proper coordination to only ++ * map the currently populated parts, to hinder parts that are expected to ++ * remain discarded from silently getting populated and consuming memory. ++ * Technologies that support discarding of RAM don't have to bother and can ++ * simply map the whole #MemoryRegion. ++ * ++ * An example #RamDiscardManager is virtio-mem, which logically (un)plugs ++ * memory within an assigned RAM #MemoryRegion, coordinated with the VM. ++ * Logically unplugging memory consists of discarding RAM. The VM agreed to not ++ * access unplugged (discarded) memory - especially via DMA. virtio-mem will ++ * properly coordinate with listeners before memory is plugged (populated), ++ * and after memory is unplugged (discarded). ++ * ++ * Listeners are called in multiples of the minimum granularity (unless it ++ * would exceed the registered range) and changes are aligned to the minimum ++ * granularity within the #MemoryRegion. Listeners have to prepare for memory ++ * becoming discarded in a different granularity than it was populated and the ++ * other way around. ++ */ ++struct RamDiscardManagerClass { ++ /* private */ ++ GenericStateManagerClass parent_class; ++}; + + bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + ram_addr_t *ram_addr, bool *read_only, +@@ -851,7 +879,7 @@ struct MemoryRegion { + const char *name; + unsigned ioeventfd_nb; + MemoryRegionIoeventfd *ioeventfds; +- RamDiscardManager *rdm; /* Only for RAM */ ++ GenericStateManager *gsm; /* Only for RAM */ + + /* For devices designed to perform re-entrant IO into their own IO MRs */ + bool disable_reentrancy_guard; +@@ -2529,39 +2557,36 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr); + bool memory_region_is_mapped(MemoryRegion *mr); + + /** +- * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a ++ * memory_region_get_generic_state_manager: get the #GenericStateManager for a + * #MemoryRegion + * +- * The #RamDiscardManager cannot change while a memory region is mapped. ++ * The #GenericStateManager cannot change while a memory region is mapped. + * + * @mr: the #MemoryRegion + */ +-RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr); ++GenericStateManager *memory_region_get_generic_state_manager(MemoryRegion *mr); + + /** +- * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a +- * #RamDiscardManager assigned ++ * memory_region_set_generic_state_manager: set the #GenericStateManager for a ++ * #MemoryRegion ++ * ++ * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion ++ * that does not cover RAM, or a #MemoryRegion that already has a ++ * #GenericStateManager assigned. Return 0 if the gsm is set successfully. + * + * @mr: the #MemoryRegion ++ * @gsm: #GenericStateManager to set + */ +-static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr) +-{ +- return !!memory_region_get_ram_discard_manager(mr); +-} ++int memory_region_set_generic_state_manager(MemoryRegion *mr, ++ GenericStateManager *gsm); + + /** +- * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a +- * #MemoryRegion +- * +- * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion +- * that does not cover RAM, or a #MemoryRegion that already has a +- * #RamDiscardManager assigned. Return 0 if the rdm is set successfully. ++ * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a ++ * #RamDiscardManager assigned + * + * @mr: the #MemoryRegion +- * @rdm: #RamDiscardManager to set + */ +-int memory_region_set_ram_discard_manager(MemoryRegion *mr, +- RamDiscardManager *rdm); ++bool memory_region_has_ram_discard_manager(MemoryRegion *mr); + + /** + * memory_region_find: translate an address/size relative to a +diff --git a/migration/ram.c b/migration/ram.c +index 083a8a8073..e6baecf143 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -882,14 +882,14 @@ static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb) + uint64_t cleared_bits = 0; + + if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = 0, + .size = int128_make64(qemu_ram_get_used_length(rb)), + }; + +- ram_discard_manager_replay_discarded(rdm, §ion, ++ generic_state_manager_replay_on_state_clear(gsm, §ion, + dirty_bitmap_clear_section, + &cleared_bits); + } +@@ -905,14 +905,14 @@ static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb) + bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start) + { + if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = start, + .size = int128_make64(qemu_ram_pagesize(rb)), + }; + +- return !ram_discard_manager_is_populated(rdm, §ion); ++ return !generic_state_manager_is_state_set(gsm, §ion); + } + return false; + } +@@ -1732,14 +1732,14 @@ static void ram_block_populate_read(RAMBlock *rb) + * Note: The result is only stable while migrating (precopy/postcopy). + */ + if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = 0, + .size = rb->mr->size, + }; + +- ram_discard_manager_replay_populated(rdm, §ion, ++ generic_state_manager_replay_on_state_set(gsm, §ion, + populate_read_section, NULL); + } else { + populate_read_range(rb, 0, rb->used_length); +@@ -1791,14 +1791,14 @@ static int ram_block_uffd_protect(RAMBlock *rb, int uffd_fd) + + /* See ram_block_populate_read() */ + if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = 0, + .size = rb->mr->size, + }; + +- return ram_discard_manager_replay_populated(rdm, §ion, ++ return generic_state_manager_replay_on_state_set(gsm, §ion, + uffd_protect_section, + (void *)(uintptr_t)uffd_fd); + } +diff --git a/system/memory.c b/system/memory.c +index ace79b0f59..38f73eb48b 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -2113,83 +2113,93 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr) + return imrc->num_indexes(iommu_mr); + } + +-RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) ++GenericStateManager *memory_region_get_generic_state_manager(MemoryRegion *mr) + { + if (!memory_region_is_ram(mr)) { + return NULL; + } +- return mr->rdm; ++ return mr->gsm; + } + +-int memory_region_set_ram_discard_manager(MemoryRegion *mr, +- RamDiscardManager *rdm) ++int memory_region_set_generic_state_manager(MemoryRegion *mr, ++ GenericStateManager *gsm) + { + g_assert(memory_region_is_ram(mr)); +- if (mr->rdm && rdm) { ++ if (mr->gsm && gsm) { + return -EBUSY; + } + +- mr->rdm = rdm; ++ mr->gsm = gsm; + return 0; + } + +-uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, +- const MemoryRegion *mr) ++bool memory_region_has_ram_discard_manager(MemoryRegion *mr) + { +- RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); ++ if (!memory_region_is_ram(mr) || ++ !object_dynamic_cast(OBJECT(mr->gsm), TYPE_RAM_DISCARD_MANAGER)) { ++ return false; ++ } ++ ++ return true; ++} ++ ++uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm, ++ const MemoryRegion *mr) ++{ ++ GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); + +- g_assert(rdmc->get_min_granularity); +- return rdmc->get_min_granularity(rdm, mr); ++ g_assert(gsmc->get_min_granularity); ++ return gsmc->get_min_granularity(gsm, mr); + } + +-bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, +- const MemoryRegionSection *section) ++bool generic_state_manager_is_state_set(const GenericStateManager *gsm, ++ const MemoryRegionSection *section) + { +- RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); ++ GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); + +- g_assert(rdmc->is_populated); +- return rdmc->is_populated(rdm, section); ++ g_assert(gsmc->is_state_set); ++ return gsmc->is_state_set(gsm, section); + } + +-int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayStateChange replay_fn, +- void *opaque) ++int generic_state_manager_replay_on_state_set(const GenericStateManager *gsm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, ++ void *opaque) + { +- RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); ++ GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); + +- g_assert(rdmc->replay_populated); +- return rdmc->replay_populated(rdm, section, replay_fn, opaque); ++ g_assert(gsmc->replay_on_state_set); ++ return gsmc->replay_on_state_set(gsm, section, replay_fn, opaque); + } + +-int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayStateChange replay_fn, +- void *opaque) ++int generic_state_manager_replay_on_state_clear(const GenericStateManager *gsm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, ++ void *opaque) + { +- RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); ++ GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); + +- g_assert(rdmc->replay_discarded); +- return rdmc->replay_discarded(rdm, section, replay_fn, opaque); ++ g_assert(gsmc->replay_on_state_clear); ++ return gsmc->replay_on_state_clear(gsm, section, replay_fn, opaque); + } + +-void ram_discard_manager_register_listener(RamDiscardManager *rdm, +- RamDiscardListener *rdl, +- MemoryRegionSection *section) ++void generic_state_manager_register_listener(GenericStateManager *gsm, ++ StateChangeListener *scl, ++ MemoryRegionSection *section) + { +- RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); ++ GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); + +- g_assert(rdmc->register_listener); +- rdmc->register_listener(rdm, rdl, section); ++ g_assert(gsmc->register_listener); ++ gsmc->register_listener(gsm, scl, section); + } + +-void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, +- RamDiscardListener *rdl) ++void generic_state_manager_unregister_listener(GenericStateManager *gsm, ++ StateChangeListener *scl) + { +- RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); ++ GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); + +- g_assert(rdmc->unregister_listener); +- rdmc->unregister_listener(rdm, rdl); ++ g_assert(gsmc->unregister_listener); ++ gsmc->unregister_listener(gsm, scl); + } + + /* Called with rcu_read_lock held. */ +@@ -2216,7 +2226,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat); + return false; + } else if (memory_region_has_ram_discard_manager(mr)) { +- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(mr); + MemoryRegionSection tmp = { + .mr = mr, + .offset_within_region = xlat, +@@ -2231,7 +2241,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + * Disallow that. vmstate priorities make sure any RamDiscardManager + * were already restored before IOMMUs are restored. + */ +- if (!ram_discard_manager_is_populated(rdm, &tmp)) { ++ if (!generic_state_manager_is_state_set(gsm, &tmp)) { + error_report("iommu map to discarded memory (e.g., unplugged via" + " virtio-mem): %" HWADDR_PRIx "", + iotlb->translated_addr); +@@ -3737,8 +3747,15 @@ static const TypeInfo iommu_memory_region_info = { + .abstract = true, + }; + +-static const TypeInfo ram_discard_manager_info = { ++static const TypeInfo generic_state_manager_info = { + .parent = TYPE_INTERFACE, ++ .name = TYPE_GENERIC_STATE_MANAGER, ++ .class_size = sizeof(GenericStateManagerClass), ++ .abstract = true, ++}; ++ ++static const TypeInfo ram_discard_manager_info = { ++ .parent = TYPE_GENERIC_STATE_MANAGER, + .name = TYPE_RAM_DISCARD_MANAGER, + .class_size = sizeof(RamDiscardManagerClass), + }; +@@ -3747,6 +3764,7 @@ static void memory_register_types(void) + { + type_register_static(&memory_region_info); + type_register_static(&iommu_memory_region_info); ++ type_register_static(&generic_state_manager_info); + type_register_static(&ram_discard_manager_info); + } + +diff --git a/system/memory_mapping.c b/system/memory_mapping.c +index 6f884c5b90..7bd8972b55 100644 +--- a/system/memory_mapping.c ++++ b/system/memory_mapping.c +@@ -270,10 +270,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, + + /* for special sparse regions, only add populated parts */ + if (memory_region_has_ram_discard_manager(section->mr)) { +- RamDiscardManager *rdm; +- +- rdm = memory_region_get_ram_discard_manager(section->mr); +- ram_discard_manager_replay_populated(rdm, section, ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); ++ generic_state_manager_replay_on_state_set(gsm, section, + guest_phys_ram_populate_cb, g); + return; + } +-- +2.33.0 + diff --git a/memory-Optimize-flatview-ioeventfd-processing.patch b/memory-Optimize-flatview-ioeventfd-processing.patch new file mode 100644 index 0000000000000000000000000000000000000000..fec439d77d404ec78742d486771c76fddc1210cd --- /dev/null +++ b/memory-Optimize-flatview-ioeventfd-processing.patch @@ -0,0 +1,102 @@ +From 3b09c85198f4970be18ba8597d545d5dc73a0ba1 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 10 Apr 2025 16:13:49 +0800 +Subject: [PATCH] memory:Optimize flatview ioeventfd processing + +When updating memory regions, do not repeat updates for the same memory region + to optimize the memory region update process + +Signed-off-by: libai +--- + include/exec/memory.h | 2 ++ + system/memory.c | 26 +++++++++++++++++++++++++- + 2 files changed, 27 insertions(+), 1 deletion(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 69021ba491..fe27f323b2 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1201,6 +1201,8 @@ struct FlatView { + unsigned nr_allocated; + struct AddressSpaceDispatch *dispatch; + MemoryRegion *root; ++ #define FLATVIEW_FLAG_LAST_PROCESSED (1 << 0) ++ unsigned flags; + }; + + static inline FlatView *address_space_to_flatview(AddressSpace *as) +diff --git a/system/memory.c b/system/memory.c +index 08d34262c3..7858aa1878 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -856,6 +856,13 @@ static void address_space_update_ioeventfds(AddressSpace *as) + return; + } + ++ view = address_space_get_flatview(as); ++ if (view->flags & FLATVIEW_FLAG_LAST_PROCESSED) { ++ flatview_unref(view); ++ return; ++ } ++ view->flags |= FLATVIEW_FLAG_LAST_PROCESSED; ++ + /* + * It is likely that the number of ioeventfds hasn't changed much, so use + * the previous size as the starting value, with some headroom to avoid +@@ -864,7 +871,6 @@ static void address_space_update_ioeventfds(AddressSpace *as) + ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4); + ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max); + +- view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + for (i = 0; i < fr->mr->ioeventfd_nb; ++i) { + tmp = addrrange_shift(fr->mr->ioeventfds[i].addr, +@@ -1111,6 +1117,17 @@ static void address_space_update_topology(AddressSpace *as) + address_space_set_flatview(as); + } + ++static void address_space_update_view(AddressSpace *as) ++{ ++ FlatView *view; ++ ++ view = address_space_get_flatview(as); ++ if (view->flags & FLATVIEW_FLAG_LAST_PROCESSED) { ++ view->flags &= ~FLATVIEW_FLAG_LAST_PROCESSED; ++ } ++ flatview_unref(view); ++} ++ + void memory_region_transaction_begin(void) + { + qemu_flush_coalesced_mmio_buffer(); +@@ -1132,6 +1149,9 @@ void memory_region_commit(void) + } + memory_region_update_pending = false; + ioeventfd_update_pending = false; ++ QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { ++ address_space_update_view(as); ++ } + MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); + } else if (ioeventfd_update_pending) { + MEMORY_LISTENER_CALL_GLOBAL(eventfd_begin, Forward); +@@ -1139,6 +1159,9 @@ void memory_region_commit(void) + address_space_update_ioeventfds(as); + } + ioeventfd_update_pending = false; ++ QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { ++ address_space_update_view(as); ++ } + MEMORY_LISTENER_CALL_GLOBAL(eventfd_end, Forward); + } + } +@@ -3149,6 +3172,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) + as->name = g_strdup(name ? name : "anonymous"); + address_space_update_topology(as); + address_space_update_ioeventfds(as); ++ address_space_update_view(as); + } + + static void do_address_space_destroy(AddressSpace *as) +-- +2.41.0.windows.1 + diff --git a/memory-Provide-an-equality-function-for-MemoryRegion.patch b/memory-Provide-an-equality-function-for-MemoryRegion.patch deleted file mode 100644 index 9d81f489a7fc516ba95aa411bccbc0a67397d389..0000000000000000000000000000000000000000 --- a/memory-Provide-an-equality-function-for-MemoryRegion.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 026ef4aabd2d533d1d2f206bd3312fb1b1674058 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 14 Aug 2019 18:55:34 +0100 -Subject: [PATCH] memory: Provide an equality function for MemoryRegionSections -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Provide a comparison function that checks all the fields are the same. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20190814175535.2023-3-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9366cf02e4e31c2a8128904d4d8290a0fad5f888) -Signed-off-by: Michael Roth ---- - include/exec/memory.h | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index f0f0767..ba0ce25 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -493,6 +493,18 @@ struct MemoryRegionSection { - bool nonvolatile; - }; - -+static inline bool MemoryRegionSection_eq(MemoryRegionSection *a, -+ MemoryRegionSection *b) -+{ -+ return a->mr == b->mr && -+ a->fv == b->fv && -+ a->offset_within_region == b->offset_within_region && -+ a->offset_within_address_space == b->offset_within_address_space && -+ int128_eq(a->size, b->size) && -+ a->readonly == b->readonly && -+ a->nonvolatile == b->nonvolatile; -+} -+ - /** - * memory_region_init: Initialize a memory region - * --- -1.8.3.1 - diff --git a/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch b/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch deleted file mode 100644 index 2c4052639c2c104de23c1a6d99674ec036e675af..0000000000000000000000000000000000000000 --- a/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0ae8b3e05294fee99870efa9b58e22e16f31caf9 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:20 +0530 -Subject: [PATCH] memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled - -mr->ram_block is NULL when mr->is_iommu is true, then fr.dirty_log_mask -wasn't set correctly due to which memory listener's log_sync doesn't -get called. -This patch returns log_mask with DIRTY_MEMORY_MIGRATION set when -IOMMU is enabled. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Yan Zhao -Acked-by: Paolo Bonzini -Signed-off-by: Alex Williamson ---- - memory.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/memory.c b/memory.c -index 5d8c9a9234..44713efc66 100644 ---- a/memory.c -+++ b/memory.c -@@ -1825,7 +1825,7 @@ bool memory_region_is_ram_device(MemoryRegion *mr) - uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) - { - uint8_t mask = mr->dirty_log_mask; -- if (global_dirty_log && mr->ram_block) { -+ if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) { - mask |= (1 << DIRTY_MEMORY_MIGRATION); - } - return mask; --- -2.27.0 - diff --git a/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch b/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch deleted file mode 100644 index 8a25d177e1349ef0faca9b9280e70bb43dfd2837..0000000000000000000000000000000000000000 --- a/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch +++ /dev/null @@ -1,42 +0,0 @@ -From d0d816682b790b7d8a9caf17c32eadde7756ac9c Mon Sep 17 00:00:00 2001 -From: Zenghui Yu -Date: Mon, 16 Nov 2020 21:22:10 +0800 -Subject: [PATCH] memory: Skip dirty tracking for un-migratable memory regions - -It makes no sense to track dirty pages for those un-migratable memory -regions (e.g., Memory BAR region of the VFIO PCI device) and doing so -will potentially lead to some unpleasant issues during migration [1]. - -Skip dirty tracking for those regions by evaluating if the region is -migratable before setting dirty_log_mask (DIRTY_MEMORY_MIGRATION). - -[1] https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg03757.html - -Signed-off-by: Zenghui Yu -Message-Id: <20201116132210.1730-1-yuzenghui@huawei.com> -Reviewed-by: Cornelia Huck -Signed-off-by: Paolo Bonzini -Signed-off-by: Kunkun Jiang ---- - memory.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/memory.c b/memory.c -index 44713efc66..708b3dff3d 100644 ---- a/memory.c -+++ b/memory.c -@@ -1825,7 +1825,10 @@ bool memory_region_is_ram_device(MemoryRegion *mr) - uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) - { - uint8_t mask = mr->dirty_log_mask; -- if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) { -+ RAMBlock *rb = mr->ram_block; -+ -+ if (global_dirty_log && ((rb && qemu_ram_is_migratable(rb)) || -+ memory_region_is_iommu(mr))) { - mask |= (1 << DIRTY_MEMORY_MIGRATION); - } - return mask; --- -2.27.0 - diff --git a/memory-Unify-the-definiton-of-ReplayRamPopulate-and-.patch b/memory-Unify-the-definiton-of-ReplayRamPopulate-and-.patch new file mode 100644 index 0000000000000000000000000000000000000000..1e31cc4dba008e41f4f4f65cde6e971b09f30703 --- /dev/null +++ b/memory-Unify-the-definiton-of-ReplayRamPopulate-and-.patch @@ -0,0 +1,222 @@ +From b18b91d25cd224fd4920b804a401c90a6f5ed2b8 Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:23 +0800 +Subject: [PATCH] memory: Unify the definiton of ReplayRamPopulate() and + ReplayRamDiscard() + +Reference:https://gitlab.com/qemu-project/qemu/-/commit/2205b8466733f8c6e3306c964f31c5a7cac69dfa + +Update ReplayRamDiscard() function to return the result and unify the +ReplayRamPopulate() and ReplayRamDiscard() to ReplayStateChange() at +the same time due to their identical definitions. This unification +simplifies related structures, such as VirtIOMEMReplayData, which makes +it more cleaner and maintainable. + +Signed-off-by: Chenyi Qiang +Signed-off-by: houmingyong +--- + hw/virtio/virtio-mem.c | 20 ++++++++++---------- + include/exec/memory.h | 31 ++++++++++++++++--------------- + migration/ram.c | 5 +++-- + system/memory.c | 12 ++++++------ + 4 files changed, 35 insertions(+), 33 deletions(-) + +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index 6f3ecddfc7..f40a816b7f 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -1712,7 +1712,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, + } + + struct VirtIOMEMReplayData { +- void *fn; ++ ReplayStateChange fn; + void *opaque; + }; + +@@ -1720,12 +1720,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) + { + struct VirtIOMEMReplayData *data = arg; + +- return ((ReplayRamPopulate)data->fn)(s, data->opaque); ++ return data->fn(s, data->opaque); + } + + static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *s, +- ReplayRamPopulate replay_fn, ++ ReplayStateChange replay_fn, + void *opaque) + { + const VirtIOMEM *vmem = VIRTIO_MEM(rdm); +@@ -1744,14 +1744,14 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, + { + struct VirtIOMEMReplayData *data = arg; + +- ((ReplayRamDiscard)data->fn)(s, data->opaque); ++ data->fn(s, data->opaque); + return 0; + } + +-static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, +- MemoryRegionSection *s, +- ReplayRamDiscard replay_fn, +- void *opaque) ++static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, ++ MemoryRegionSection *s, ++ ReplayStateChange replay_fn, ++ void *opaque) + { + const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + struct VirtIOMEMReplayData data = { +@@ -1760,8 +1760,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + }; + + g_assert(s->mr == &vmem->memdev->mr); +- virtio_mem_for_each_unplugged_section(vmem, s, &data, +- virtio_mem_rdm_replay_discarded_cb); ++ return virtio_mem_for_each_unplugged_section(vmem, s, &data, ++ virtio_mem_rdm_replay_discarded_cb); + } + + static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, +diff --git a/include/exec/memory.h b/include/exec/memory.h +index a4e9e084cd..a3243ee218 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -623,8 +623,7 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl, + rdl->double_discard_supported = double_discard_supported; + } + +-typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque); +-typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); ++typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque); + + /* + * RamDiscardManagerClass: +@@ -698,36 +697,38 @@ struct RamDiscardManagerClass { + /** + * @replay_populated: + * +- * Call the #ReplayRamPopulate callback for all populated parts within the ++ * Call the #ReplayStateChange callback for all populated parts within the + * #MemoryRegionSection via the #RamDiscardManager. + * + * In case any call fails, no further calls are made. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection +- * @replay_fn: the #ReplayRamPopulate callback ++ * @replay_fn: the #ReplayStateChange callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ + int (*replay_populated)(const RamDiscardManager *rdm, + MemoryRegionSection *section, +- ReplayRamPopulate replay_fn, void *opaque); ++ ReplayStateChange replay_fn, void *opaque); + + /** + * @replay_discarded: + * +- * Call the #ReplayRamDiscard callback for all discarded parts within the ++ * Call the #ReplayStateChange callback for all discarded parts within the + * #MemoryRegionSection via the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection +- * @replay_fn: the #ReplayRamDiscard callback ++ * @replay_fn: the #ReplayStateChange callback + * @opaque: pointer to forward to the callback ++ * ++ * Returns 0 on success, or a negative error if any notification failed. + */ +- void (*replay_discarded)(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayRamDiscard replay_fn, void *opaque); ++ int (*replay_discarded)(const RamDiscardManager *rdm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, void *opaque); + + /** + * @register_listener: +@@ -770,13 +771,13 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, + + int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *section, +- ReplayRamPopulate replay_fn, ++ ReplayStateChange replay_fn, + void *opaque); + +-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayRamDiscard replay_fn, +- void *opaque); ++int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, ++ void *opaque); + + void ram_discard_manager_register_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl, +diff --git a/migration/ram.c b/migration/ram.c +index 91bec89a6e..083a8a8073 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -841,8 +841,8 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, + return ret; + } + +-static void dirty_bitmap_clear_section(MemoryRegionSection *section, +- void *opaque) ++static int dirty_bitmap_clear_section(MemoryRegionSection *section, ++ void *opaque) + { + const hwaddr offset = section->offset_within_region; + const hwaddr size = int128_get64(section->size); +@@ -861,6 +861,7 @@ static void dirty_bitmap_clear_section(MemoryRegionSection *section, + } + *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages); + bitmap_clear(rb->bmap, start, npages); ++ return 0; + } + + /* +diff --git a/system/memory.c b/system/memory.c +index c3985e8eef..ace79b0f59 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -2153,7 +2153,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, + + int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *section, +- ReplayRamPopulate replay_fn, ++ ReplayStateChange replay_fn, + void *opaque) + { + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); +@@ -2162,15 +2162,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, + return rdmc->replay_populated(rdm, section, replay_fn, opaque); + } + +-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, +- MemoryRegionSection *section, +- ReplayRamDiscard replay_fn, +- void *opaque) ++int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, ++ MemoryRegionSection *section, ++ ReplayStateChange replay_fn, ++ void *opaque) + { + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->replay_discarded); +- rdmc->replay_discarded(rdm, section, replay_fn, opaque); ++ return rdmc->replay_discarded(rdm, section, replay_fn, opaque); + } + + void ram_discard_manager_register_listener(RamDiscardManager *rdm, +-- +2.33.0 + diff --git a/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch b/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch new file mode 100644 index 0000000000000000000000000000000000000000..a1c270bc32a6c14c946175df7231cad51d4173c2 --- /dev/null +++ b/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch @@ -0,0 +1,126 @@ +From 65435e107fc8eee37c61a3a7d1adebd013ad466f Mon Sep 17 00:00:00 2001 +From: Ming Yang +Date: Sat, 23 Mar 2024 16:18:03 +0800 +Subject: [PATCH] memory: [backup] Modify the VM's physical bits value set + policy. + +backup code from qemu-6.2 to qemu-8.2 +old info: +commit id : +a09c3928b33b0c53831bd9eeb56f8171c26057bc +messages: +target-i386: Modify the VM's physical bits value set policy. + +To resolve the problem that a VM with large memory capacity fails +to be live migrated, determine whether the VM is a large memory +capacity based on the memory size (4 TB). If yes, set the bus width +of the VM address to 46 bits. If no, set the bus width to 42 bits. + +Signed-off-by: Jinhua Cao +Signed-off-by: Jiajie Li + +Signed-off-by: Ming Yang +--- + target/i386/cpu.c | 20 +++++++++++++++++++- + target/i386/cpu.h | 6 ++++++ + target/i386/host-cpu.c | 13 +++++++------ + 3 files changed, 32 insertions(+), 7 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a66e5a357b..fc61a84b1e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -7666,6 +7666,24 @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value) + cpu->env.eip = value; + } + ++ ++/* At present, we check the vm is *LARGE* or not, i.e. whether ++ * the memory size is more than 4T or not. ++ */ ++const uint64_t large_vm_mem_size = 0x40000000000UL; ++void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu) ++{ ++ /* If there is not a large vm, we set the phys_bits to 42 bits, ++ * otherwise, we increase the phys_bits to 46 bits. ++ */ ++ if (ram_size < large_vm_mem_size) { ++ cpu->phys_bits = DEFAULT_VM_CPU_PHYS_BITS; ++ } else { ++ cpu->phys_bits = LARGE_VM_CPU_PHYS_BITS; ++ cpu->fill_mtrr_mask = true; ++ } ++} ++ + static vaddr x86_cpu_get_pc(CPUState *cs) + { + X86CPU *cpu = X86_CPU(cs); +@@ -7868,7 +7886,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), +- DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), ++ DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, false), + DEFINE_PROP_UINT32("level-func7", X86CPU, env.cpuid_level_func7, + UINT32_MAX), + DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index ef987f344c..6993552cd9 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -24,6 +24,7 @@ + #include "cpu-qom.h" + #include "kvm/hyperv-proto.h" + #include "exec/cpu-defs.h" ++#include "exec/cpu-common.h" + #include "qapi/qapi-types-common.h" + #include "qemu/cpu-float.h" + #include "qemu/timer.h" +@@ -2081,6 +2082,11 @@ struct X86CPUClass { + extern const VMStateDescription vmstate_x86_cpu; + #endif + ++#define DEFAULT_VM_CPU_PHYS_BITS 42 ++#define LARGE_VM_CPU_PHYS_BITS 46 ++ ++void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu); ++ + int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, +diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c +index 92ecb7254b..07738bf857 100644 +--- a/target/i386/host-cpu.c ++++ b/target/i386/host-cpu.c +@@ -13,6 +13,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/sysemu.h" ++#include "hw/boards.h" + + /* Note: Only safe for use on x86(-64) hosts */ + static uint32_t host_cpu_phys_bits(void) +@@ -57,14 +58,14 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) + uint32_t phys_bits = cpu->phys_bits; + static bool warned; + +- /* +- * Print a warning if the user set it to a value that's not the +- * host value. +- */ +- if (phys_bits != host_phys_bits && phys_bits != 0 && ++ /* adjust x86 cpu phys_bits according to ram_size. */ ++ x86_cpu_adjuest_by_ram_size(current_machine->ram_size, cpu); ++ ++ /* Print a warning if the host value less than the user set. */ ++ if (phys_bits > host_phys_bits && phys_bits != 0 && + !warned) { + warn_report("Host physical bits (%u)" +- " does not match phys-bits property (%u)", ++ " less than phys-bits property (%u)", + host_phys_bits, phys_bits); + warned = true; + } +-- +2.27.0 + diff --git a/memory-clamp-cached-translation-in-case-it-points-to.patch b/memory-clamp-cached-translation-in-case-it-points-to.patch deleted file mode 100644 index c4f74d4e094c937f8b415683dc06997c3ce6cc90..0000000000000000000000000000000000000000 --- a/memory-clamp-cached-translation-in-case-it-points-to.patch +++ /dev/null @@ -1,72 +0,0 @@ -From e07e9fc9d97e9cae3d6316b7286b504398a6fc80 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Wed, 13 Jan 2021 14:50:59 +0800 -Subject: [PATCH] memory: clamp cached translation in case it points to an MMIO - region - -In using the address_space_translate_internal API, address_space_cache_init -forgot one piece of advice that can be found in the code for -address_space_translate_internal: - - /* MMIO registers can be expected to perform full-width accesses based only - * on their address, without considering adjacent registers that could - * decode to completely different MemoryRegions. When such registers - * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO - * regions overlap wildly. For this reason we cannot clamp the accesses - * here. - * - * If the length is small (as is the case for address_space_ldl/stl), - * everything works fine. If the incoming length is large, however, - * the caller really has to do the clamping through memory_access_size. - */ - -address_space_cache_init is exactly one such case where "the incoming length -is large", therefore we need to clamp the resulting length---not to -memory_access_size though, since we are not doing an access yet, but to -the size of the resulting section. This ensures that subsequent accesses -to the cached MemoryRegionSection will be in range. - -With this patch, the enclosed testcase notices that the used ring does -not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used" -error. - -Signed-off-by: Paolo Bonzini -(cherry-picked from 4bfb024b) -Fix CVE-2020-27821 -Signed-off-by: Alex Chen ---- - exec.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/exec.c b/exec.c -index 85c6d80353..8822c241d8 100644 ---- a/exec.c -+++ b/exec.c -@@ -3834,6 +3834,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, - AddressSpaceDispatch *d; - hwaddr l; - MemoryRegion *mr; -+ Int128 diff; - - assert(len > 0); - -@@ -3842,6 +3843,16 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, - d = flatview_to_dispatch(cache->fv); - cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); - -+ /* -+ * cache->xlat is now relative to cache->mrs.mr, not to the section itself. -+ * Take that into account to compute how many bytes are there between -+ * cache->xlat and the end of the section. -+ */ -+ -+ diff = int128_sub(cache->mrs.size, -+ int128_make64(cache->xlat - cache->mrs.offset_within_region)); -+ l = int128_get64(int128_min(diff, int128_make64(l))); -+ - mr = cache->mrs.mr; - memory_region_ref(mr); - if (memory_access_is_direct(mr, is_write)) { --- -2.27.0 - diff --git a/memory-eventfd-Introduce-ioeventfd-batch-processing-.patch b/memory-eventfd-Introduce-ioeventfd-batch-processing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8b7174f80bdcb9ee51b075fb39f41fde23cacfd2 --- /dev/null +++ b/memory-eventfd-Introduce-ioeventfd-batch-processing-.patch @@ -0,0 +1,144 @@ +From d43019e644fb93c64e9016c5d618d8e20a60270d Mon Sep 17 00:00:00 2001 +From: libai +Date: Wed, 9 Apr 2025 14:22:19 +0800 +Subject: [PATCH] memory/eventfd:Introduce ioeventfd batch processing to reduce + the time required to update ioeventfd + +Setting ioeventfd triggers kernel RCU synchronization, which is time-consuming. +Change it to temporarily store the modification of ioeventfds, and submit it +for effect after setting is complete. + +Signed-off-by: libai +--- + accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ + include/exec/memory.h | 21 +++++++++++++++++++++ + linux-headers/linux/kvm.h | 6 ++++++ + system/memory.c | 2 ++ + 4 files changed, 61 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index aa41b42efc..f96afb1230 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1737,6 +1737,36 @@ static void kvm_io_ioeventfd_add(MemoryListener *listener, + } + } + ++static int kvm_ioeventfd_batch(bool start) ++{ ++ int ret; ++ struct kvm_ioeventfd iofd = { ++ .flags = start ? ++ KVM_IOEVENTFD_FLAG_BATCH_BEGIN : KVM_IOEVENTFD_FLAG_BATCH_END, ++ }; ++ ++ if (!kvm_enabled()) { ++ return -ENOSYS; ++ } ++ ++ ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd); ++ if (ret < 0) { ++ return -errno; ++ } ++ ++ return 0; ++} ++ ++static void kvm_ioeventfd_begin(MemoryListener *listener) ++{ ++ kvm_ioeventfd_batch(true); ++} ++ ++static void kvm_ioeventfd_end(MemoryListener *listener) ++{ ++ kvm_ioeventfd_batch(false); ++} ++ + static void kvm_io_ioeventfd_del(MemoryListener *listener, + MemoryRegionSection *section, + bool match_data, uint64_t data, +@@ -2631,6 +2661,8 @@ static int kvm_init(MachineState *ms) + s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; + s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region; + s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region; ++ s->memory_listener.listener.eventfd_begin = kvm_ioeventfd_begin; ++ s->memory_listener.listener.eventfd_end = kvm_ioeventfd_end; + + kvm_memory_listener_register(s, &s->memory_listener, + &address_space_memory, 0, "kvm-memory"); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 924bdbd481..69021ba491 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1079,6 +1079,27 @@ struct MemoryListener { + void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section, + bool match_data, uint64_t data, EventNotifier *e); + ++ /** ++ * @eventfd_begin: ++ * ++ * Called during an address space begin to update ioeventfd, ++ * notify kvm that ioeventfd will be update in batches. ++ * ++ * @listener: The #MemoryListener. ++ */ ++ void (*eventfd_begin)(MemoryListener *listener); ++ ++ /** ++ * @eventfd_end: ++ * ++ * Called during an address space update ioeventfd end, ++ * notify kvm that all ioeventfd modifications have been submitted ++ * and batch processing can be started. ++ * ++ * @listener: The #MemoryListener. ++ */ ++ void (*eventfd_end)(MemoryListener *listener); ++ + /** + * @coalesced_io_add: + * +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index a19683f1e9..0714651440 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -819,6 +819,8 @@ enum { + kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_virtio_ccw_notify, + kvm_ioeventfd_flag_nr_fast_mmio, ++ kvm_ioeventfd_flag_nr_batch_begin, ++ kvm_ioeventfd_flag_nr_batch_end, + kvm_ioeventfd_flag_nr_max, + }; + +@@ -827,6 +829,10 @@ enum { + #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) + #define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) ++#define KVM_IOEVENTFD_FLAG_BATCH_BEGIN \ ++ (1<< kvm_ioeventfd_flag_nr_batch_begin) ++#define KVM_IOEVENTFD_FLAG_BATCH_END \ ++ (1 << kvm_ioeventfd_flag_nr_batch_end) + + #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) + +diff --git a/system/memory.c b/system/memory.c +index fd76eb7048..08d34262c3 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -1134,10 +1134,12 @@ void memory_region_commit(void) + ioeventfd_update_pending = false; + MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); + } else if (ioeventfd_update_pending) { ++ MEMORY_LISTENER_CALL_GLOBAL(eventfd_begin, Forward); + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_update_ioeventfds(as); + } + ioeventfd_update_pending = false; ++ MEMORY_LISTENER_CALL_GLOBAL(eventfd_end, Forward); + } + } + +-- +2.41.0.windows.1 + diff --git a/meson-Introduce-qatzip-feature-to-the-build-system.patch b/meson-Introduce-qatzip-feature-to-the-build-system.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3690cbf3a0b3aee4137912cdbbddc8bf8e9755f --- /dev/null +++ b/meson-Introduce-qatzip-feature-to-the-build-system.patch @@ -0,0 +1,99 @@ +From ca73720f8e625f143a27acf7c1aedb1b426c1ee1 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:19 -0700 +Subject: [89/99] meson: Introduce 'qatzip' feature to the build system + +commit e28ed313c268aeb4e0cefb66dcd215c30e4443fe upstream. + +Add a 'qatzip' feature, which is automatically disabled, and which +depends on the QATzip library if enabled. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-3-yichen.wang@bytedance.com +Signed-off-by: Peter Xu + + Conflicts: + scripts/meson-buildoptions.sh +[jz: resolve simple context conflicts] +Signed-off-by: Jason Zeng +--- + meson.build | 10 ++++++++++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 3 +++ + 3 files changed, 15 insertions(+) + +diff --git a/meson.build b/meson.build +index e3599b9a09..d221f5cad5 100644 +--- a/meson.build ++++ b/meson.build +@@ -1061,6 +1061,14 @@ if not get_option('uadk').auto() or have_system + uadk = declare_dependency(dependencies: [libwd, libwd_comp]) + endif + endif ++ ++qatzip = not_found ++if not get_option('qatzip').auto() or have_system ++ qatzip = dependency('qatzip', version: '>=1.1.2', ++ required: get_option('qatzip'), ++ method: 'pkg-config') ++endif ++ + virgl = not_found + + have_vhost_user_gpu = have_tools and targetos == 'linux' and pixman.found() +@@ -2301,6 +2309,7 @@ config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id) + config_host_data.set('CONFIG_ZSTD', zstd.found()) + config_host_data.set('CONFIG_QPL', qpl.found()) + config_host_data.set('CONFIG_UADK', uadk.found()) ++config_host_data.set('CONFIG_QATZIP', qatzip.found()) + config_host_data.set('CONFIG_FUSE', fuse.found()) + config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found()) + config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) +@@ -4477,6 +4486,7 @@ summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} + summary_info += {'Query Processing Library support': qpl} + summary_info += {'UADK Library support': uadk} ++summary_info += {'qatzip support': qatzip} + summary_info += {'NUMA host support': numa} + summary_info += {'capstone': capstone} + summary_info += {'libpmem support': libpmem} +diff --git a/meson_options.txt b/meson_options.txt +index 709678fa18..61996300d5 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -263,6 +263,8 @@ option('qpl', type : 'feature', value : 'auto', + description: 'Query Processing Library support') + option('uadk', type : 'feature', value : 'auto', + description: 'UADK Library support') ++option('qatzip', type: 'feature', value: 'auto', ++ description: 'QATzip compression support') + option('fuse', type: 'feature', value: 'auto', + description: 'FUSE block device export') + option('fuse_lseek', type : 'feature', value : 'auto', +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 833b996818..8604fe8ffa 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -163,6 +163,7 @@ meson_options_help() { + printf "%s\n" ' pixman pixman support' + printf "%s\n" ' plugins TCG plugins via shared library loading' + printf "%s\n" ' png PNG support with libpng' ++ printf "%s\n" ' qatzip QATzip compression support' + printf "%s\n" ' pvrdma Enable PVRDMA support' + printf "%s\n" ' qcow1 qcow1 image format support' + printf "%s\n" ' qed qed image format support' +@@ -430,6 +431,8 @@ _meson_option_parse() { + --enable-png) printf "%s" -Dpng=enabled ;; + --disable-png) printf "%s" -Dpng=disabled ;; + --prefix=*) quote_sh "-Dprefix=$2" ;; ++ --enable-qatzip) printf "%s" -Dqatzip=enabled ;; ++ --disable-qatzip) printf "%s" -Dqatzip=disabled ;; + --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;; + --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;; + --enable-qcow1) printf "%s" -Dqcow1=enabled ;; +-- +2.33.0 + diff --git a/meson.build-Remove-ncurses-workaround-for-OpenBSD.patch b/meson.build-Remove-ncurses-workaround-for-OpenBSD.patch new file mode 100644 index 0000000000000000000000000000000000000000..5db58de4ebfae2c8f946a6145437e141289de44e --- /dev/null +++ b/meson.build-Remove-ncurses-workaround-for-OpenBSD.patch @@ -0,0 +1,51 @@ +From 199dcd16027e3573f5eeaa4396c361cfec91cbe1 Mon Sep 17 00:00:00 2001 +From: Susanooo +Date: Fri, 25 Oct 2024 09:44:21 +0800 +Subject: [PATCH] meson.build: Remove ncurses workaround for OpenBSD +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +meson.build: Remove ncurses workaround for OpenBSD + +OpenBSD 7.5 has upgraded to ncurses 6.4. + +Signed-off-by: Brad Smith +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +Signed-off-by: zhangchujun +--- + meson.build | 2 +- + ui/curses.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/meson.build b/meson.build +index 4024f9a4bb..b3ee125b72 100644 +--- a/meson.build ++++ b/meson.build +@@ -1139,7 +1139,7 @@ iconv = not_found + curses = not_found + if have_system and get_option('curses').allowed() + curses_test = ''' +- #if defined(__APPLE__) || defined(__OpenBSD__) ++ #ifdef __APPLE__ + #define _XOPEN_SOURCE_EXTENDED 1 + #endif + #include +diff --git a/ui/curses.c b/ui/curses.c +index 8bde8c5cf7..26438486fc 100644 +--- a/ui/curses.c ++++ b/ui/curses.c +@@ -38,7 +38,7 @@ + #include "ui/input.h" + #include "sysemu/sysemu.h" + +-#if defined(__APPLE__) || defined(__OpenBSD__) ++#ifdef __APPLE__ + #define _XOPEN_SOURCE_EXTENDED 1 + #endif + +-- +2.41.0.windows.1 + diff --git a/microblaze-fix-leak-of-fdevice-tree-blob.patch b/microblaze-fix-leak-of-fdevice-tree-blob.patch deleted file mode 100644 index dd845e80cef5f3315e44417f3b7eeaa60ce6b8bb..0000000000000000000000000000000000000000 --- a/microblaze-fix-leak-of-fdevice-tree-blob.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 2ff9c28e2b72cd359a0c4e931412e355baee8e1e Mon Sep 17 00:00:00 2001 -From: lizhengui -Date: Wed, 9 Sep 2020 14:55:11 +0800 -Subject: [PATCH] microblaze: fix leak of fdevice tree blob -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The device tree blob returned by load_device_tree is malloced. -Free it before returning. - -Signed-off-by: Paolo Bonzini -Reviewed-by: Philippe Mathieu-Daudé ---- - hw/microblaze/boot.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c -index a7af4c07..0fcc4e9d 100644 ---- a/hw/microblaze/boot.c -+++ b/hw/microblaze/boot.c -@@ -99,6 +99,7 @@ static int microblaze_load_dtb(hwaddr addr, - } - - cpu_physical_memory_write(addr, fdt, fdt_size); -+ g_free(fdt); - return fdt_size; - } - --- -2.19.1 - diff --git a/migration-Add-compress_level-sanity-check.patch b/migration-Add-compress_level-sanity-check.patch index 8513384f438612de27e6c4508d3f38d22edcd897..011547f9bad7579ceef2360d39122d40dab5ee4d 100644 --- a/migration-Add-compress_level-sanity-check.patch +++ b/migration-Add-compress_level-sanity-check.patch @@ -1,5 +1,5 @@ -From 90c8ce0b3bcf4a3140bc4b500da9b55a694e1bde Mon Sep 17 00:00:00 2001 -From: Zeyu Jin +From 79863c5ccdd4c635657d2e32e91bc02aa49655e0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng Date: Sat, 30 Jan 2021 16:23:15 +0800 Subject: [PATCH] migration: Add compress_level sanity check @@ -7,18 +7,19 @@ Zlib compression has level from 1 to 9. However Zstd compression has level from 1 to 22 (level >= 20 not recommanded). Let's do sanity check here to make sure a vaild compress_level is given by user. +Signed-off-by: Chuan Zheng Signed-off-by: Zeyu Jin Signed-off-by: Ying Fang --- - migration/migration.c | 32 ++++++++++++++++++++++++++++---- + migration/options.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) -diff --git a/migration/migration.c b/migration/migration.c -index 67425fde7a..17a5c16c79 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1111,16 +1111,40 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - } +diff --git a/migration/options.c b/migration/options.c +index 6aaee702dc..9b68962a65 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -1065,16 +1065,40 @@ void migrate_params_init(MigrationParameters *params) + params->has_mode = true; } +static bool compress_level_check(MigrationParameters *params, Error **errp) @@ -52,12 +53,12 @@ index 67425fde7a..17a5c16c79 100644 * Check whether the parameters are valid. Error will be put into errp * (if provided). Return true if valid, otherwise false. */ - static bool migrate_params_check(MigrationParameters *params, Error **errp) + bool migrate_params_check(MigrationParameters *params, Error **errp) { - if (params->has_compress_level && - (params->compress_level > 9)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", -- "is invalid, it should be in the range of 0 to 9"); +- "a value between 0 and 9"); + if (params->has_compress_level && !compress_level_check(params, errp)) { return false; } diff --git a/migration-Add-migration-parameters-for-QATzip.patch b/migration-Add-migration-parameters-for-QATzip.patch new file mode 100644 index 0000000000000000000000000000000000000000..19178bf8c40578ec5e6cae7fb04b052f90dd0e3c --- /dev/null +++ b/migration-Add-migration-parameters-for-QATzip.patch @@ -0,0 +1,214 @@ +From cb3f1e1a84a3776d5382013cb9fcfe08c8ea9b3e Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:20 -0700 +Subject: [90/99] migration: Add migration parameters for QATzip + +commit 86c6eb1f39cbb7eb0467c114469e98ef699fb515 upstream. + +Adds support for migration parameters to control QATzip compression +level. + +Acked-by: Markus Armbruster +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Reviewed-by: Fabiano Rosas +Reviewed-by: Prasad Pandit +Link: https://lore.kernel.org/r/20240830232722.58272-4-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration-hmp-cmds.c | 4 ++++ + migration/options.c | 34 ++++++++++++++++++++++++++++++++++ + migration/options.h | 1 + + qapi/migration.json | 18 ++++++++++++++++++ + 4 files changed, 57 insertions(+) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 91e51eb7af..d6d5f373a1 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -669,6 +669,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zlib_level = true; + visit_type_uint8(v, param, &p->multifd_zlib_level, &err); + break; ++ case MIGRATION_PARAMETER_MULTIFD_QATZIP_LEVEL: ++ p->has_multifd_qatzip_level = true; ++ visit_type_uint8(v, param, &p->multifd_qatzip_level, &err); ++ break; + case MIGRATION_PARAMETER_MULTIFD_ZSTD_LEVEL: + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); +diff --git a/migration/options.c b/migration/options.c +index e752163114..6ba7ff65a3 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -63,6 +63,13 @@ + #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE + /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ + #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 ++/* ++ * 1: best speed, ... 9: best compress ratio ++ * There is some nuance here. Refer to QATzip documentation to understand ++ * the mapping of QATzip levels to standard deflate levels. ++ */ ++#define DEFAULT_MIGRATE_MULTIFD_QATZIP_LEVEL 1 ++ + /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ + #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 + +@@ -147,6 +154,9 @@ Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, + parameters.multifd_zlib_level, + DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), ++ DEFINE_PROP_UINT8("multifd-qatzip-level", MigrationState, ++ parameters.multifd_qatzip_level, ++ DEFAULT_MIGRATE_MULTIFD_QATZIP_LEVEL), + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +@@ -888,6 +898,13 @@ int migrate_multifd_zlib_level(void) + return s->parameters.multifd_zlib_level; + } + ++int migrate_multifd_qatzip_level(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->parameters.multifd_qatzip_level; ++} ++ + int migrate_multifd_zstd_level(void) + { + MigrationState *s = migrate_get_current(); +@@ -1019,6 +1036,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_compression = s->parameters.multifd_compression; + params->has_multifd_zlib_level = true; + params->multifd_zlib_level = s->parameters.multifd_zlib_level; ++ params->has_multifd_qatzip_level = true; ++ params->multifd_qatzip_level = s->parameters.multifd_qatzip_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; + params->has_xbzrle_cache_size = true; +@@ -1082,6 +1101,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_multifd_channels = true; + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; ++ params->has_multifd_qatzip_level = true; + params->has_multifd_zstd_level = true; + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; +@@ -1221,6 +1241,14 @@ bool migrate_params_check(MigrationParameters *params, Error **errp) + return false; + } + ++ if (params->has_multifd_qatzip_level && ++ ((params->multifd_qatzip_level > 9) || ++ (params->multifd_qatzip_level < 1))) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_qatzip_level", ++ "a value between 1 and 9"); ++ return false; ++ } ++ + if (params->has_multifd_zstd_level && + (params->multifd_zstd_level > 20)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", +@@ -1390,6 +1418,9 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_qatzip_level) { ++ dest->multifd_qatzip_level = params->multifd_qatzip_level; ++ } + if (params->has_multifd_zlib_level) { + dest->multifd_zlib_level = params->multifd_zlib_level; + } +@@ -1556,6 +1587,9 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_qatzip_level) { ++ s->parameters.multifd_qatzip_level = params->multifd_qatzip_level; ++ } + if (params->has_multifd_zlib_level) { + s->parameters.multifd_zlib_level = params->multifd_zlib_level; + } +diff --git a/migration/options.h b/migration/options.h +index dbd52d7acd..6b2a893217 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -89,6 +89,7 @@ int migrate_hdbss_buffer_size(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); ++int migrate_multifd_qatzip_level(void); + int migrate_multifd_zstd_level(void); + uint8_t migrate_throttle_trigger_threshold(void); + const char *migrate_tls_authz(void); +diff --git a/qapi/migration.json b/qapi/migration.json +index f1a17c511b..255f5b50a6 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -885,6 +885,11 @@ + # speed, and 9 means best compression ratio which will consume + # more CPU. Defaults to 1. (Since 5.0) + # ++# @multifd-qatzip-level: Set the compression level to be used in live ++# migration. The level is an integer between 1 and 9, where 1 means ++# the best compression speed, and 9 means the best compression ++# ratio which will consume more CPU. Defaults to 1. (Since 9.2) ++# + # @multifd-zstd-level: Set the compression level to be used in live + # migration, the compression level is an integer between 0 and 20, + # where 0 means no compression, 1 means the best compression +@@ -966,6 +971,7 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level', 'multifd-zstd-level', ++ 'multifd-qatzip-level', + 'block-bitmap-mapping', + { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, + 'vcpu-dirty-limit', +@@ -1097,6 +1103,11 @@ + # speed, and 9 means best compression ratio which will consume + # more CPU. Defaults to 1. (Since 5.0) + # ++# @multifd-qatzip-level: Set the compression level to be used in live ++# migration. The level is an integer between 1 and 9, where 1 means ++# the best compression speed, and 9 means the best compression ++# ratio which will consume more CPU. Defaults to 1. (Since 9.2) ++# + # @multifd-zstd-level: Set the compression level to be used in live + # migration, the compression level is an integer between 0 and 20, + # where 0 means no compression, 1 means the best compression +@@ -1198,6 +1209,7 @@ + '*max-cpu-throttle': 'uint8', + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', ++ '*multifd-qatzip-level': 'uint8', + '*multifd-zstd-level': 'uint8', + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ], + '*x-vcpu-dirty-limit-period': { 'type': 'uint64', +@@ -1354,6 +1366,11 @@ + # speed, and 9 means best compression ratio which will consume + # more CPU. Defaults to 1. (Since 5.0) + # ++# @multifd-qatzip-level: Set the compression level to be used in live ++# migration. The level is an integer between 1 and 9, where 1 means ++# the best compression speed, and 9 means the best compression ++# ratio which will consume more CPU. Defaults to 1. (Since 9.2) ++# + # @multifd-zstd-level: Set the compression level to be used in live + # migration, the compression level is an integer between 0 and 20, + # where 0 means no compression, 1 means the best compression +@@ -1451,6 +1468,7 @@ + '*max-cpu-throttle': 'uint8', + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', ++ '*multifd-qatzip-level': 'uint8', + '*multifd-zstd-level': 'uint8', + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ], + '*x-vcpu-dirty-limit-period': { 'type': 'uint64', +-- +2.33.0 + diff --git a/migration-Add-multi-thread-compress-method.patch b/migration-Add-multi-thread-compress-method.patch index e900a729ea9779862318e0d2c192f88d8cf81c44..1c2782dc3107c988df2ceb796c16172dfc0e38e4 100644 --- a/migration-Add-multi-thread-compress-method.patch +++ b/migration-Add-multi-thread-compress-method.patch @@ -1,5 +1,5 @@ -From b0cabc67e16d9b4e1e749b0359dd8f3874e0968d Mon Sep 17 00:00:00 2001 -From: Zeyu Jin +From c2402b63ecb10b9a25695b710f2664dbcbc01ec4 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng Date: Sat, 30 Jan 2021 14:57:54 +0800 Subject: [PATCH] migration: Add multi-thread compress method @@ -7,176 +7,148 @@ A multi-thread compress method parameter is added to hold the method we are going to use. By default the 'zlib' method is used to maintain the compatibility as before. +Signed-off-by: Chuan Zheng Signed-off-by: Zeyu Jin Signed-off-by: Ying Fang --- - hw/core/qdev-prop-internal.h | 18 ++++++++++++++++++ - hw/core/qdev-properties-system.c | 13 +++++++++++++ - hw/core/qdev-properties.c | 14 +++++++++++--- + hw/core/qdev-properties-system.c | 11 +++++++++++ include/hw/qdev-properties.h | 4 ++++ - migration/migration.c | 15 +++++++++++++++ - migration/qemu-file.c | 9 +++++++++ - monitor/hmp-cmds.c | 13 +++++++++++++ - qapi/migration.json | 26 +++++++++++++++++++++++++- - 8 files changed, 108 insertions(+), 4 deletions(-) - create mode 100644 hw/core/qdev-prop-internal.h + migration/migration-hmp-cmds.c | 13 +++++++++++++ + migration/options.c | 15 +++++++++++++++ + monitor/hmp-cmds.c | 1 + + qapi/migration.json | 32 ++++++++++++++++++++++++++++++-- + util/oslib-posix.c | 2 +- + 7 files changed, 75 insertions(+), 3 deletions(-) -diff --git a/hw/core/qdev-prop-internal.h b/hw/core/qdev-prop-internal.h -new file mode 100644 -index 0000000000..a4a7eaf078 ---- /dev/null -+++ b/hw/core/qdev-prop-internal.h -@@ -0,0 +1,18 @@ -+/* -+ * qdev property parsing -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#ifndef HW_CORE_QDEV_PROP_INTERNAL_H -+#define HW_CORE_QDEV_PROP_INTERNAL_H -+ -+void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, -+ Error **errp); -+void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, -+ Error **errp); -+ -+void set_default_value_enum(Object *obj, const Property *prop); -+ -+#endif diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c -index ba412dd2ca..67ed89b406 100644 +index f2e2718c74..cd5571fcfb 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c -@@ -15,6 +15,7 @@ - #include "hw/qdev.h" - #include "qapi/error.h" - #include "qapi/qmp/qerror.h" -+#include "qapi/qapi-types-migration.h" - #include "sysemu/block-backend.h" - #include "sysemu/blockdev.h" - #include "hw/block/block.h" -@@ -23,6 +24,7 @@ - #include "chardev/char-fe.h" - #include "sysemu/iothread.h" - #include "sysemu/tpm_backend.h" -+#include "qdev-prop-internal.h" +@@ -1202,6 +1202,17 @@ const PropertyInfo qdev_prop_uuid = { + .set_default_value = set_default_uuid_auto, + }; - static void get_pointer(Object *obj, Visitor *v, Property *prop, - char *(*print)(void *ptr), -@@ -399,3 +401,14 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd) - } - nd->instantiated = 1; - } -+ +/* --- CompressMethod --- */ +const PropertyInfo qdev_prop_compress_method = { + .name = "CompressMethod", + .description = "multi-thread compression method, " + "zlib", + .enum_table = &CompressMethod_lookup, -+ .get = get_enum, -+ .set = set_enum, -+ .set_default_value = set_default_value_enum, -+}; -diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c -index 81c97f48a7..709f9e0f9d 100644 ---- a/hw/core/qdev-properties.c -+++ b/hw/core/qdev-properties.c -@@ -11,6 +11,7 @@ - #include "qapi/visitor.h" - #include "chardev/char.h" - #include "qemu/uuid.h" -+#include "qdev-prop-internal.h" - - void qdev_prop_set_after_realize(DeviceState *dev, const char *name, - Error **errp) -@@ -46,7 +47,7 @@ void *qdev_get_prop_ptr(DeviceState *dev, Property *prop) - return ptr; - } - --static void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, -+void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, - Error **errp) - { - DeviceState *dev = DEVICE(obj); -@@ -56,7 +57,7 @@ static void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, - visit_type_enum(v, prop->name, ptr, prop->info->enum_table, errp); - } - --static void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, -+void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, - Error **errp) - { - DeviceState *dev = DEVICE(obj); -@@ -71,7 +72,7 @@ static void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, - visit_type_enum(v, prop->name, ptr, prop->info->enum_table, errp); - } - --static void set_default_value_enum(Object *obj, const Property *prop) -+void set_default_value_enum(Object *obj, const Property *prop) - { - object_property_set_str(obj, - qapi_enum_lookup(prop->info->enum_table, -@@ -79,6 +80,13 @@ static void set_default_value_enum(Object *obj, const Property *prop) - prop->name, &error_abort); - } - -+const PropertyInfo qdev_prop_enum = { -+ .name = "enum", -+ .get = get_enum, -+ .set = set_enum, -+ .set_default_value = set_default_value_enum, ++ .get = qdev_propinfo_get_enum, ++ .set = qdev_propinfo_set_enum, ++ .set_default_value = qdev_propinfo_set_default_value_enum, +}; + - /* Bit */ + /* --- s390 cpu entitlement policy --- */ - static uint32_t qdev_get_prop_mask(Property *prop) + QEMU_BUILD_BUG_ON(sizeof(CpuS390Entitlement) != sizeof(int)); diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h -index 1eae5ab056..a22a532eb8 100644 +index 25743a29a0..63602c2c74 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h -@@ -23,6 +23,7 @@ extern const PropertyInfo qdev_prop_tpm; - extern const PropertyInfo qdev_prop_ptr; - extern const PropertyInfo qdev_prop_macaddr; +@@ -60,6 +60,7 @@ extern const PropertyInfo qdev_prop_int64; + extern const PropertyInfo qdev_prop_size; + extern const PropertyInfo qdev_prop_string; extern const PropertyInfo qdev_prop_on_off_auto; +extern const PropertyInfo qdev_prop_compress_method; - extern const PropertyInfo qdev_prop_losttickpolicy; - extern const PropertyInfo qdev_prop_blockdev_on_error; - extern const PropertyInfo qdev_prop_bios_chs_trans; -@@ -205,6 +206,9 @@ extern const PropertyInfo qdev_prop_pcie_link_width; - DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) + extern const PropertyInfo qdev_prop_size32; + extern const PropertyInfo qdev_prop_array; + extern const PropertyInfo qdev_prop_link; +@@ -168,6 +169,9 @@ extern const PropertyInfo qdev_prop_link; + DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*) #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto) +#define DEFINE_PROP_COMPRESS_METHOD(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_compress_method, \ + CompressMethod) - #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ - DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ - LostTickPolicy) -diff --git a/migration/migration.c b/migration/migration.c -index 0e396f22b4..c79bf09269 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -71,6 +71,7 @@ + #define DEFINE_PROP_SIZE32(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 86ae832176..261ec1e35c 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -22,6 +22,7 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qdict.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/string-input-visitor.h" + #include "qapi/string-output-visitor.h" + #include "qemu/cutils.h" +@@ -291,6 +292,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS), + params->decompress_threads); + assert(params->has_throttle_trigger_threshold); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_METHOD), ++ CompressMethod_str(params->compress_method)); + monitor_printf(mon, "%s: %u\n", + MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD), + params->throttle_trigger_threshold); +@@ -519,6 +523,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + MigrateSetParameters *p = g_new0(MigrateSetParameters, 1); + uint64_t valuebw = 0; + uint64_t cache_size; ++ CompressMethod compress_method; + Error *err = NULL; + int val, ret; + +@@ -544,6 +549,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_decompress_threads = true; + visit_type_uint8(v, param, &p->decompress_threads, &err); + break; ++ case MIGRATION_PARAMETER_COMPRESS_METHOD: ++ p->has_compress_method = true; ++ visit_type_CompressMethod(v, param, &compress_method, &err); ++ if (err) { ++ break; ++ } ++ p->compress_method = compress_method; ++ break; + case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD: + p->has_throttle_trigger_threshold = true; + visit_type_uint8(v, param, &p->throttle_trigger_threshold, &err); +diff --git a/migration/options.c b/migration/options.c +index 8d8ec73ad9..af7ea7b346 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -47,6 +47,7 @@ #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +#define DEFAULT_MIGRATE_COMPRESS_METHOD COMPRESS_METHOD_ZLIB /* Define default autoconverge cpu throttle migration parameters */ + #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 - #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 -@@ -748,6 +749,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) +@@ -113,6 +114,9 @@ Property migration_properties[] = { + DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, + parameters.decompress_threads, + DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), ++ DEFINE_PROP_COMPRESS_METHOD("compress-method", MigrationState, ++ parameters.compress_method, ++ DEFAULT_MIGRATE_COMPRESS_METHOD), + DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, + parameters.throttle_trigger_threshold, + DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), +@@ -953,6 +957,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->compress_wait_thread = s->parameters.compress_wait_thread; params->has_decompress_threads = true; params->decompress_threads = s->parameters.decompress_threads; + params->has_compress_method = true; + params->compress_method = s->parameters.compress_method; + params->has_throttle_trigger_threshold = true; + params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; + params->has_cpu_throttle_initial = true; +@@ -1025,6 +1031,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_compress_threads = true; + params->has_compress_wait_thread = true; + params->has_decompress_threads = true; ++ params->has_compress_method = true; + params->has_throttle_trigger_threshold = true; params->has_cpu_throttle_initial = true; - params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; params->has_cpu_throttle_increment = true; -@@ -1250,6 +1253,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, +@@ -1259,6 +1266,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, dest->decompress_threads = params->decompress_threads; } @@ -184,10 +156,10 @@ index 0e396f22b4..c79bf09269 100644 + dest->compress_method = params->compress_method; + } + - if (params->has_cpu_throttle_initial) { - dest->cpu_throttle_initial = params->cpu_throttle_initial; + if (params->has_throttle_trigger_threshold) { + dest->throttle_trigger_threshold = params->throttle_trigger_threshold; } -@@ -1331,6 +1338,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +@@ -1380,6 +1391,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) s->parameters.decompress_threads = params->decompress_threads; } @@ -195,99 +167,28 @@ index 0e396f22b4..c79bf09269 100644 + s->parameters.compress_method = params->compress_method; + } + - if (params->has_cpu_throttle_initial) { - s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; + if (params->has_throttle_trigger_threshold) { + s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; } -@@ -3436,6 +3447,9 @@ static Property migration_properties[] = { - DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, - parameters.decompress_threads, - DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), -+ DEFINE_PROP_COMPRESS_METHOD("compress-method", MigrationState, -+ parameters.compress_method, -+ DEFAULT_MIGRATE_COMPRESS_METHOD), - DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, - parameters.cpu_throttle_initial, - DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), -@@ -3535,6 +3549,7 @@ static void migration_instance_init(Object *obj) - params->has_compress_level = true; - params->has_compress_threads = true; - params->has_decompress_threads = true; -+ params->has_compress_method = true; - params->has_cpu_throttle_initial = true; - params->has_cpu_throttle_increment = true; - params->has_max_bandwidth = true; -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index cd96d04e9a..be0d6c8ca8 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -382,6 +382,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size, - } - } - -+static void add_buf_to_iovec(QEMUFile *f, size_t len) -+{ -+ add_to_iovec(f, f->buf + f->buf_index, len, false); -+ f->buf_index += len; -+ if (f->buf_index == IO_BUF_SIZE) { -+ qemu_fflush(f); -+ } -+} -+ - void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, - bool may_free) - { diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index fc5d6b92c4..e5a7a88ba2 100644 +index 871898ac46..5bb3c9cd46 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c -@@ -41,6 +41,7 @@ - #include "qapi/qapi-commands-tpm.h" - #include "qapi/qapi-commands-ui.h" - #include "qapi/qapi-visit-net.h" -+#include "qapi/qapi-visit-migration.h" +@@ -24,6 +24,7 @@ + #include "qapi/qapi-commands-control.h" + #include "qapi/qapi-commands-misc.h" #include "qapi/qmp/qdict.h" - #include "qapi/qmp/qerror.h" - #include "qapi/string-input-visitor.h" -@@ -426,6 +427,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) - MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS), - params->decompress_threads); - assert(params->has_cpu_throttle_initial); -+ monitor_printf(mon, "%s: %s\n", -+ MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_METHOD), -+ CompressMethod_str(params->compress_method)); - monitor_printf(mon, "%s: %u\n", - MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL), - params->cpu_throttle_initial); -@@ -1756,6 +1760,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - MigrateSetParameters *p = g_new0(MigrateSetParameters, 1); - uint64_t valuebw = 0; - uint64_t cache_size; -+ CompressMethod compress_method; - Error *err = NULL; - int val, ret; - -@@ -1781,6 +1786,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - p->has_decompress_threads = true; - visit_type_int(v, param, &p->decompress_threads, &err); - break; -+ case MIGRATION_PARAMETER_COMPRESS_METHOD: -+ p->has_compress_method = true; -+ visit_type_CompressMethod(v, param, &compress_method, &err); -+ if (err) { -+ break; -+ } -+ p->compress_method = compress_method; -+ break; - case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL: - p->has_cpu_throttle_initial = true; - visit_type_int(v, param, &p->cpu_throttle_initial, &err); ++#include "qapi/qapi-visit-migration.h" + #include "qemu/cutils.h" + #include "hw/intc/intc.h" + #include "qemu/log.h" diff --git a/qapi/migration.json b/qapi/migration.json -index 6844ddfab3..b0e8c493ee 100644 +index eb2f883513..cafaa5ccb3 100644 --- a/qapi/migration.json +++ b/qapi/migration.json -@@ -482,6 +482,19 @@ - ## - { 'command': 'query-migrate-capabilities', 'returns': ['MigrationCapabilityStatus']} +@@ -708,6 +708,19 @@ + 'bitmaps': [ 'BitmapMigrationBitmapAlias' ] + } } +## +# @CompressMethod: @@ -305,61 +206,87 @@ index 6844ddfab3..b0e8c493ee 100644 ## # @MigrationParameter: # -@@ -518,6 +531,9 @@ - # compression, so set the decompress-threads to the number about 1/4 - # of compress-threads is adequate. +@@ -746,6 +759,9 @@ + # fast as compression, so set the decompress-threads to the number + # about 1/4 of compress-threads is adequate. # +# @compress-method: Which multi-thread compression method to use. +# Defaults to none. (Since 5.0) +# - # @cpu-throttle-initial: Initial percentage of time guest cpus are throttled - # when migration auto-converge is activated. The - # default value is 20. (Since 2.7) -@@ -586,7 +602,7 @@ - 'data': ['announce-initial', 'announce-max', - 'announce-rounds', 'announce-step', - 'compress-level', 'compress-threads', 'decompress-threads', -- 'compress-wait-thread', -+ 'compress-wait-thread', 'compress-method', + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -892,6 +908,7 @@ + { 'name': 'compress-level', 'features': [ 'deprecated' ] }, + { 'name': 'compress-threads', 'features': [ 'deprecated' ] }, + { 'name': 'decompress-threads', 'features': [ 'deprecated' ] }, ++ { 'name': 'compress-method', 'features': [ 'deprecated' ] }, + { 'name': 'compress-wait-thread', 'features': [ 'deprecated' ] }, + 'throttle-trigger-threshold', 'cpu-throttle-initial', 'cpu-throttle-increment', - 'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth', - 'downtime-limit', 'x-checkpoint-delay', 'block-incremental', -@@ -620,6 +636,9 @@ +@@ -935,6 +952,9 @@ # # @decompress-threads: decompression thread count # +# @compress-method: Set compression method to use in multi-thread compression. +# Defaults to none. (Since 5.0) +# - # @cpu-throttle-initial: Initial percentage of time guest cpus are - # throttled when migration auto-converge is activated. - # The default value is 20. (Since 2.7) -@@ -695,6 +714,7 @@ - '*compress-threads': 'int', - '*compress-wait-thread': 'bool', - '*decompress-threads': 'int', -+ '*compress-method': 'CompressMethod', - '*cpu-throttle-initial': 'int', - '*cpu-throttle-increment': 'int', - '*tls-creds': 'StrOrNull', -@@ -753,6 +773,9 @@ + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -1066,8 +1086,9 @@ + # + # @deprecated: Member @block-incremental is deprecated. Use + # blockdev-mirror with NBD instead. Members @compress-level, +-# @compress-threads, @decompress-threads and @compress-wait-thread +-# are deprecated because @compression is deprecated. ++# @compress-threads, @decompress-threads, @compress-method ++# and @compress-wait-thread are deprecated because ++# @compression is deprecated. + # + # @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period + # are experimental. +@@ -1090,6 +1111,8 @@ + 'features': [ 'deprecated' ] }, + '*decompress-threads': { 'type': 'uint8', + 'features': [ 'deprecated' ] }, ++ '*compress-method': { 'type': 'CompressMethod', ++ 'features': [ 'deprecated' ] }, + '*throttle-trigger-threshold': 'uint8', + '*cpu-throttle-initial': 'uint8', + '*cpu-throttle-increment': 'uint8', +@@ -1161,6 +1184,9 @@ # # @decompress-threads: decompression thread count # +# @compress-method: Which multi-thread compression method to use. +# Defaults to none. (Since 5.0) +# - # @cpu-throttle-initial: Initial percentage of time guest cpus are - # throttled when migration auto-converge is activated. - # (Since 2.7) -@@ -828,6 +851,7 @@ - '*compress-threads': 'uint8', - '*compress-wait-thread': 'bool', - '*decompress-threads': 'uint8', -+ '*compress-method': 'CompressMethod', + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -1315,6 +1341,8 @@ + 'features': [ 'deprecated' ] }, + '*decompress-threads': { 'type': 'uint8', + 'features': [ 'deprecated' ] }, ++ '*compress-method': { 'type': 'CompressMethod', ++ 'features': [ 'deprecated' ] }, + '*throttle-trigger-threshold': 'uint8', '*cpu-throttle-initial': 'uint8', '*cpu-throttle-increment': 'uint8', - '*tls-creds': 'str', +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index 9ca3fee2b8..43af077fed 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -346,7 +346,7 @@ static void *do_touch_pages(void *arg) + } + qemu_mutex_unlock(&page_mutex); + +- while (started_num_threads != memset_args->context.num_threads) { ++ while (started_num_threads != memset_args->context->num_threads) { + smp_mb(); + } + -- 2.27.0 diff --git a/migration-Add-multi-thread-compress-ops.patch b/migration-Add-multi-thread-compress-ops.patch index 043d9f9b3379a4664e01b9153ccc7ecf3a702c6d..dee278f8e65ab15824e9b624fbedfff6c177f8a4 100644 --- a/migration-Add-multi-thread-compress-ops.patch +++ b/migration-Add-multi-thread-compress-ops.patch @@ -1,24 +1,27 @@ -From 99fddf2ffeefc99ab15b3428dbd2b46476be3e7e Mon Sep 17 00:00:00 2001 -From: Zeyu Jin +From 5896dedf32c7e4417bd7f3e889ca41a34b06f5db Mon Sep 17 00:00:00 2001 +From: Chuan Zheng Date: Sat, 30 Jan 2021 15:57:31 +0800 Subject: [PATCH] migration: Add multi-thread compress ops Add the MigrationCompressOps and MigrationDecompressOps structures to make the compression method configurable for multi-thread compression migration. +Signed-off-by: Chuan Zheng Signed-off-by: Zeyu Jin Signed-off-by: Ying Fang --- - migration/migration.c | 9 ++ - migration/migration.h | 1 + - migration/ram.c | 269 ++++++++++++++++++++++++++++++------------ - 3 files changed, 201 insertions(+), 78 deletions(-) + migration/options.c | 9 ++ + migration/options.h | 1 + + migration/ram-compress.c | 261 ++++++++++++++++++++++++++------------- + migration/ram-compress.h | 31 ++++- + migration/ram.c | 4 +- + 5 files changed, 215 insertions(+), 91 deletions(-) -diff --git a/migration/migration.c b/migration/migration.c -index c79bf09269..67425fde7a 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2143,6 +2143,15 @@ int migrate_decompress_threads(void) +diff --git a/migration/options.c b/migration/options.c +index af7ea7b346..6aaee702dc 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -799,6 +799,15 @@ int migrate_decompress_threads(void) return s->parameters.decompress_threads; } @@ -31,68 +34,50 @@ index c79bf09269..67425fde7a 100644 + return s->parameters.compress_method; +} + - bool migrate_dirty_bitmaps(void) + uint64_t migrate_downtime_limit(void) { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index f2bd4ebe33..4aa72297fc 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -319,6 +319,7 @@ int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 246c160aee..9aca5e41ad 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -78,6 +78,7 @@ uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); + bool migrate_cpu_throttle_tailslow(void); int migrate_decompress_threads(void); +CompressMethod migrate_compress_method(void); - bool migrate_use_events(void); - bool migrate_postcopy_blocktime(void); + uint64_t migrate_downtime_limit(void); + uint8_t migrate_max_cpu_throttle(void); + uint64_t migrate_max_bandwidth(void); +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index 2be344acbc..6e37b22492 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -65,26 +65,167 @@ static QemuThread *compress_threads; + static QemuMutex comp_done_lock; + static QemuCond comp_done_cond; -diff --git a/migration/ram.c b/migration/ram.c -index f78a681ca2..3ed808a4ca 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -417,6 +417,9 @@ struct CompressParam { - /* internally used fields */ - z_stream stream; - uint8_t *originbuf; -+ -+ /* for zlib compression */ -+ z_stream stream; - }; - typedef struct CompressParam CompressParam; - -@@ -428,12 +431,29 @@ struct DecompressParam { - void *des; - uint8_t *compbuf; - int len; -+ -+ /* for zlib compression */ - z_stream stream; - }; - typedef struct DecompressParam DecompressParam; - -+typedef struct { -+ int (*save_setup)(CompressParam *param); -+ void (*save_cleanup)(CompressParam *param); -+ ssize_t (*compress_data)(CompressParam *param, size_t size); -+} MigrationCompressOps; -+ -+typedef struct { -+ int (*load_setup)(DecompressParam *param); -+ void (*load_cleanup)(DecompressParam *param); -+ int (*decompress_data)(DecompressParam *param, uint8_t *dest, size_t size); -+ int (*check_len)(int len); -+} MigrationDecompressOps; -+ - static CompressParam *comp_param; - static QemuThread *compress_threads; -+static MigrationCompressOps *compress_ops; -+static MigrationDecompressOps *decompress_ops; - /* comp_done_cond is used to wake up the migration thread when - * one of the compression threads has finished the compression. - * comp_done_lock is used to co-work with comp_done_cond. -@@ -451,6 +471,157 @@ static QemuCond decomp_done_cond; +-struct DecompressParam { +- bool done; +- bool quit; +- QemuMutex mutex; +- QemuCond cond; +- void *des; +- uint8_t *compbuf; +- int len; +- z_stream stream; +-}; +-typedef struct DecompressParam DecompressParam; +- + static QEMUFile *decomp_file; + static DecompressParam *decomp_param; + static QemuThread *decompress_threads; ++MigrationCompressOps *compress_ops; ++MigrationDecompressOps *decompress_ops; + static QemuMutex decomp_done_lock; + static QemuCond decomp_done_cond; - static bool do_compress_ram_page(CompressParam *param, RAMBlock *block); + static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block); +static int zlib_save_setup(CompressParam *param) +{ @@ -105,7 +90,7 @@ index f78a681ca2..3ed808a4ca 100644 +} + +static ssize_t zlib_compress_data(CompressParam *param, size_t size) -+ ++{ + int err; + uint8_t *dest = NULL; + z_stream *stream = ¶m->stream; @@ -248,7 +233,7 @@ index f78a681ca2..3ed808a4ca 100644 static void *do_data_compress(void *opaque) { CompressParam *param = opaque; -@@ -508,7 +679,7 @@ static void compress_threads_save_cleanup(void) +@@ -141,7 +282,7 @@ void compress_threads_save_cleanup(void) qemu_thread_join(compress_threads + i); qemu_mutex_destroy(&comp_param[i].mutex); qemu_cond_destroy(&comp_param[i].cond); @@ -257,16 +242,16 @@ index f78a681ca2..3ed808a4ca 100644 g_free(comp_param[i].originbuf); qemu_fclose(comp_param[i].file); comp_param[i].file = NULL; -@@ -519,6 +690,7 @@ static void compress_threads_save_cleanup(void) +@@ -152,6 +293,7 @@ void compress_threads_save_cleanup(void) g_free(comp_param); compress_threads = NULL; comp_param = NULL; + clean_compress_ops(); } - static int compress_threads_save_setup(void) -@@ -528,6 +700,12 @@ static int compress_threads_save_setup(void) - if (!migrate_use_compression()) { + int compress_threads_save_setup(void) +@@ -161,6 +303,12 @@ int compress_threads_save_setup(void) + if (!migrate_compress()) { return 0; } + @@ -278,7 +263,7 @@ index f78a681ca2..3ed808a4ca 100644 thread_count = migrate_compress_threads(); compress_threads = g_new0(QemuThread, thread_count); comp_param = g_new0(CompressParam, thread_count); -@@ -539,8 +717,7 @@ static int compress_threads_save_setup(void) +@@ -172,8 +320,7 @@ int compress_threads_save_setup(void) goto exit; } @@ -288,8 +273,8 @@ index f78a681ca2..3ed808a4ca 100644 g_free(comp_param[i].originbuf); goto exit; } -@@ -2208,50 +2385,6 @@ static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, - return 1; +@@ -198,50 +345,6 @@ exit: + return -1; } -/* @@ -336,19 +321,19 @@ index f78a681ca2..3ed808a4ca 100644 - return blen + sizeof(int32_t); -} - - static bool do_compress_ram_page(CompressParam *param, RAMBlock *block) + static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block) { - RAMState *rs = ram_state; -@@ -2274,7 +2407,7 @@ static bool do_compress_ram_page(CompressParam *param, RAMBlock *block) + uint8_t *p = block->host + (param->offset & TARGET_PAGE_MASK); +@@ -260,7 +363,7 @@ static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block * decompression */ - memcpy(param->originbuf, p, TARGET_PAGE_SIZE); -- ret = qemu_put_compression_data(param, TARGET_PAGE_SIZE); -+ ret = compress_ops->compress_data(param, TARGET_PAGE_SIZE); + memcpy(param->originbuf, p, page_size); +- ret = qemu_put_compression_data(param, page_size); ++ ret = compress_ops->compress_data(param, page_size); if (ret < 0) { qemu_file_set_error(migrate_get_current()->to_dst_file, ret); error_report("compressed data failed!"); -@@ -3965,32 +4098,6 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) +@@ -356,32 +459,6 @@ bool compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, } } @@ -381,16 +366,16 @@ index f78a681ca2..3ed808a4ca 100644 static void *do_data_decompress(void *opaque) { DecompressParam *param = opaque; -@@ -4004,7 +4111,7 @@ static void *do_data_decompress(void *opaque) - param->des = 0; - qemu_mutex_unlock(¶m->mutex); +@@ -398,7 +475,7 @@ static void *do_data_decompress(void *opaque) + + pagesize = qemu_target_page_size(); -- ret = qemu_uncompress_data(param, des, TARGET_PAGE_SIZE); -+ ret = decompress_ops->decompress_data(param, des, TARGET_PAGE_SIZE); +- ret = qemu_uncompress_data(param, des, pagesize); ++ ret = decompress_ops->decompress_data(param, des, pagesize); if (ret < 0 && migrate_get_current()->decompress_error_check) { error_report("decompress data failed"); qemu_file_set_error(decomp_file, ret); -@@ -4074,7 +4181,7 @@ static void compress_threads_load_cleanup(void) +@@ -466,7 +543,7 @@ void compress_threads_load_cleanup(void) qemu_thread_join(decompress_threads + i); qemu_mutex_destroy(&decomp_param[i].mutex); qemu_cond_destroy(&decomp_param[i].cond); @@ -399,15 +384,15 @@ index f78a681ca2..3ed808a4ca 100644 g_free(decomp_param[i].compbuf); decomp_param[i].compbuf = NULL; } -@@ -4083,6 +4190,7 @@ static void compress_threads_load_cleanup(void) +@@ -475,6 +552,7 @@ void compress_threads_load_cleanup(void) decompress_threads = NULL; decomp_param = NULL; decomp_file = NULL; + clean_decompress_ops(); } - static int compress_threads_load_setup(QEMUFile *f) -@@ -4093,6 +4201,11 @@ static int compress_threads_load_setup(QEMUFile *f) + int compress_threads_load_setup(QEMUFile *f) +@@ -485,6 +563,11 @@ int compress_threads_load_setup(QEMUFile *f) return 0; } @@ -416,10 +401,10 @@ index f78a681ca2..3ed808a4ca 100644 + return -1; + } + - thread_count = migrate_decompress_threads(); - decompress_threads = g_new0(QemuThread, thread_count); - decomp_param = g_new0(DecompressParam, thread_count); -@@ -4100,7 +4213,7 @@ static int compress_threads_load_setup(QEMUFile *f) + /* + * set compression_counters memory to zero for a new migration + */ +@@ -497,7 +580,7 @@ int compress_threads_load_setup(QEMUFile *f) qemu_cond_init(&decomp_done_cond); decomp_file = f; for (i = 0; i < thread_count; i++) { @@ -428,7 +413,73 @@ index f78a681ca2..3ed808a4ca 100644 goto exit; } -@@ -4642,7 +4755,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) +diff --git a/migration/ram-compress.h b/migration/ram-compress.h +index 0d89a2f55e..daf241987f 100644 +--- a/migration/ram-compress.h ++++ b/migration/ram-compress.h +@@ -39,6 +39,20 @@ enum CompressResult { + }; + typedef enum CompressResult CompressResult; + ++struct DecompressParam { ++ bool done; ++ bool quit; ++ QemuMutex mutex; ++ QemuCond cond; ++ void *des; ++ uint8_t *compbuf; ++ int len; ++ ++ /* for zlib compression */ ++ z_stream stream; ++}; ++typedef struct DecompressParam DecompressParam; ++ + struct CompressParam { + bool done; + bool quit; +@@ -51,11 +65,26 @@ struct CompressParam { + ram_addr_t offset; + + /* internally used fields */ +- z_stream stream; + uint8_t *originbuf; ++ ++ /* for zlib compression */ ++ z_stream stream; + }; + typedef struct CompressParam CompressParam; + ++typedef struct { ++ int (*save_setup)(CompressParam *param); ++ void (*save_cleanup)(CompressParam *param); ++ ssize_t (*compress_data)(CompressParam *param, size_t size); ++} MigrationCompressOps; ++ ++typedef struct { ++ int (*load_setup)(DecompressParam *param); ++ void (*load_cleanup)(DecompressParam *param); ++ int (*decompress_data)(DecompressParam *param, uint8_t *dest, size_t size); ++ int (*check_len)(int len); ++} MigrationDecompressOps; ++ + void compress_threads_save_cleanup(void); + int compress_threads_save_setup(void); + +diff --git a/migration/ram.c b/migration/ram.c +index 8c7886ab79..f9b2b9b985 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -96,6 +96,8 @@ + + XBZRLECacheStats xbzrle_counters; + ++extern MigrationDecompressOps *decompress_ops; ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* The migration channel used for a specific host page */ +@@ -3979,7 +3981,7 @@ static int ram_load_precopy(QEMUFile *f) case RAM_SAVE_FLAG_COMPRESS_PAGE: len = qemu_get_be32(f); diff --git a/migration-Add-zstd-support-in-multi-thread-compressi.patch b/migration-Add-zstd-support-in-multi-thread-compressi.patch index a84bb368ebe5a81e223c805c151ea4896ccc4e51..856e86a6bc0143e297bf1328f57338340d19f70d 100644 --- a/migration-Add-zstd-support-in-multi-thread-compressi.patch +++ b/migration-Add-zstd-support-in-multi-thread-compressi.patch @@ -1,86 +1,38 @@ -From 54a1b546e0bd0cc41669bf7ade806c6c777c96ad Mon Sep 17 00:00:00 2001 -From: Zeyu Jin +From 8c9603270184d8dadf64ec6de263268e846f8c18 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng Date: Sat, 30 Jan 2021 16:15:10 +0800 Subject: [PATCH] migration: Add zstd support in multi-thread compression This patch enables zstd option in multi-thread compression. +Signed-off-by: Chuan Zheng Signed-off-by: Zeyu Jin Signed-off-by: Ying Fang --- hw/core/qdev-properties-system.c | 2 +- - migration/ram.c | 130 ++++++++++++++++++++++++++++++- - qapi/migration.json | 2 +- - 3 files changed, 130 insertions(+), 4 deletions(-) + migration/ram-compress.c | 112 +++++++++++++++++++++++++++++++ + migration/ram-compress.h | 15 +++++ + qapi/migration.json | 3 +- + 4 files changed, 130 insertions(+), 2 deletions(-) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c -index 67ed89b406..6d48903c87 100644 +index cd5571fcfb..c581d46f2e 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c -@@ -406,7 +406,7 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd) +@@ -1206,7 +1206,7 @@ const PropertyInfo qdev_prop_uuid = { const PropertyInfo qdev_prop_compress_method = { .name = "CompressMethod", .description = "multi-thread compression method, " - "zlib", + "zlib/zstd", .enum_table = &CompressMethod_lookup, - .get = get_enum, - .set = set_enum, -diff --git a/migration/ram.c b/migration/ram.c -index 3ed808a4ca..ba1e729c39 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -59,6 +59,10 @@ - #include "savevm.h" - #include "qemu/iov.h" - -+#ifdef CONFIG_ZSTD -+#include -+#include -+#endif - /***********************************************************/ - /* ram save/restore */ - -@@ -415,11 +419,16 @@ struct CompressParam { - ram_addr_t offset; - - /* internally used fields */ -- z_stream stream; - uint8_t *originbuf; - - /* for zlib compression */ - z_stream stream; -+ -+#ifdef CONFIG_ZSTD -+ ZSTD_CStream *zstd_cs; -+ ZSTD_inBuffer in; -+ ZSTD_outBuffer out; -+#endif - }; - typedef struct CompressParam CompressParam; - -@@ -434,6 +443,11 @@ struct DecompressParam { - - /* for zlib compression */ - z_stream stream; -+#ifdef CONFIG_ZSTD -+ ZSTD_DStream *zstd_ds; -+ ZSTD_inBuffer in; -+ ZSTD_outBuffer out; -+#endif - }; - typedef struct DecompressParam DecompressParam; - -@@ -482,7 +496,7 @@ static int zlib_save_setup(CompressParam *param) - } - - static ssize_t zlib_compress_data(CompressParam *param, size_t size) -- -+{ - int err; - uint8_t *dest = NULL; - z_stream *stream = ¶m->stream; -@@ -567,6 +581,103 @@ static int zlib_check_len(int len) + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index 6e37b22492..74703f0ec4 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -171,6 +171,103 @@ static int zlib_check_len(int len) return len < 0 || len > compressBound(TARGET_PAGE_SIZE); } @@ -184,7 +136,7 @@ index 3ed808a4ca..ba1e729c39 100644 static int set_compress_ops(void) { compress_ops = g_new0(MigrationCompressOps, 1); -@@ -577,6 +688,13 @@ static int set_compress_ops(void) +@@ -181,6 +278,13 @@ static int set_compress_ops(void) compress_ops->save_cleanup = zlib_save_cleanup; compress_ops->compress_data = zlib_compress_data; break; @@ -198,7 +150,7 @@ index 3ed808a4ca..ba1e729c39 100644 default: return -1; } -@@ -595,6 +713,14 @@ static int set_decompress_ops(void) +@@ -199,6 +303,14 @@ static int set_decompress_ops(void) decompress_ops->decompress_data = zlib_decompress_data; decompress_ops->check_len = zlib_check_len; break; @@ -213,16 +165,62 @@ index 3ed808a4ca..ba1e729c39 100644 default: return -1; } +diff --git a/migration/ram-compress.h b/migration/ram-compress.h +index daf241987f..e8700eb36f 100644 +--- a/migration/ram-compress.h ++++ b/migration/ram-compress.h +@@ -29,6 +29,10 @@ + #ifndef QEMU_MIGRATION_COMPRESS_H + #define QEMU_MIGRATION_COMPRESS_H + ++#ifdef CONFIG_ZSTD ++#include ++#include ++#endif + #include "qemu-file.h" + #include "qapi/qapi-types-migration.h" + +@@ -50,6 +54,11 @@ struct DecompressParam { + + /* for zlib compression */ + z_stream stream; ++#ifdef CONFIG_ZSTD ++ ZSTD_DStream *zstd_ds; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct DecompressParam DecompressParam; + +@@ -69,6 +78,12 @@ struct CompressParam { + + /* for zlib compression */ + z_stream stream; ++ ++#ifdef CONFIG_ZSTD ++ ZSTD_CStream *zstd_cs; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct CompressParam CompressParam; + diff --git a/qapi/migration.json b/qapi/migration.json -index b0e8c493ee..587ef65872 100644 +index cafaa5ccb3..29af841f4e 100644 --- a/qapi/migration.json +++ b/qapi/migration.json -@@ -493,7 +493,7 @@ +@@ -714,12 +714,13 @@ + # An enumeration of multi-thread compression methods. + # + # @zlib: use zlib compression method. ++# @zstd: use zstd compression method. + # + # Since: 5.0 # ## { 'enum': 'CompressMethod', - 'data': [ 'zlib' ] } -+ 'data': [ 'zlib', { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] } ++ 'data': [ 'zlib', { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] } ## # @MigrationParameter: diff --git a/migration-Change-SaveStateEntry.instance_id-into-uin.patch b/migration-Change-SaveStateEntry.instance_id-into-uin.patch deleted file mode 100644 index 3eb83b3996ccd7b934d7ca5c65800ead9c0ae3bd..0000000000000000000000000000000000000000 --- a/migration-Change-SaveStateEntry.instance_id-into-uin.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 2eadc5c611ca8cc916f74c0f393f1fd942903ef7 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 16 Oct 2019 10:29:31 +0800 -Subject: [PATCH 6/8] migration: Change SaveStateEntry.instance_id into - uint32_t - -It was always used as 32bit, so define it as used to be clear. -Instead of using -1 as the auto-gen magic value, we switch to -UINT32_MAX. We also make sure that we don't auto-gen this value to -avoid overflowed instance IDs without being noticed. - -Suggested-by: Juan Quintela -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - hw/intc/apic_common.c | 2 +- - include/migration/register.h | 2 +- - include/migration/vmstate.h | 2 +- - migration/savevm.c | 18 ++++++++++-------- - stubs/vmstate.c | 2 +- - 5 files changed, 14 insertions(+), 12 deletions(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index faea1af..07adba0 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -313,7 +313,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- int instance_id = s->id; -+ uint32_t instance_id = s->id; - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); -diff --git a/include/migration/register.h b/include/migration/register.h -index 3d0b983..8b2bc5b 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -70,7 +70,7 @@ typedef struct SaveVMHandlers { - - int register_savevm_live(DeviceState *dev, - const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque); -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index 92f531a..8abd2e3 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1117,7 +1117,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - #define VMSTATE_INSTANCE_ID_ANY -1 - - /* Returns: 0 on success, -1 on failure */ --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, -diff --git a/migration/savevm.c b/migration/savevm.c -index 62552ab..7d89c57 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -229,7 +229,7 @@ typedef struct CompatEntry { - typedef struct SaveStateEntry { - QTAILQ_ENTRY(SaveStateEntry) entry; - char idstr[256]; -- int instance_id; -+ uint32_t instance_id; - int alias_id; - int version_id; - /* version id read from the stream */ -@@ -616,10 +616,10 @@ void dump_vmstate_json_to_file(FILE *out_file) - fclose(out_file); - } - --static int calculate_new_instance_id(const char *idstr) -+static uint32_t calculate_new_instance_id(const char *idstr) - { - SaveStateEntry *se; -- int instance_id = 0; -+ uint32_t instance_id = 0; - - QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (strcmp(idstr, se->idstr) == 0 -@@ -627,6 +627,8 @@ static int calculate_new_instance_id(const char *idstr) - instance_id = se->instance_id + 1; - } - } -+ /* Make sure we never loop over without being noticed */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - return instance_id; - } - -@@ -682,7 +684,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) - distinguishing id for all instances of your device class. */ - int register_savevm_live(DeviceState *dev, - const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque) -@@ -756,7 +758,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) - } - } - --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *opaque, int alias_id, - int required_for_version, -@@ -1507,7 +1509,7 @@ int qemu_save_device_state(QEMUFile *f) - return qemu_file_get_error(f); - } - --static SaveStateEntry *find_se(const char *idstr, int instance_id) -+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) - { - SaveStateEntry *se; - -@@ -2187,7 +2189,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - /* Find savevm section */ - se = find_se(idstr, instance_id); - if (se == NULL) { -- error_report("Unknown savevm section or instance '%s' %d. " -+ error_report("Unknown savevm section or instance '%s' %"PRIu32". " - "Make sure that your current VM setup matches your " - "saved VM setup, including any hotplugged devices", - idstr, instance_id); -@@ -2211,7 +2213,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - - ret = vmstate_load(f, se); - if (ret < 0) { -- error_report("error while loading state for instance 0x%x of" -+ error_report("error while loading state for instance 0x%"PRIx32" of" - " device '%s'", instance_id, idstr); - return ret; - } -diff --git a/stubs/vmstate.c b/stubs/vmstate.c -index e1e89b8..4ed5cc6 100644 ---- a/stubs/vmstate.c -+++ b/stubs/vmstate.c -@@ -4,7 +4,7 @@ - const VMStateDescription vmstate_dummy = {}; - - int vmstate_register_with_alias_id(DeviceState *dev, -- int instance_id, -+ uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, --- -1.8.3.1 - diff --git a/migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch b/migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch deleted file mode 100644 index 79548949d7f449db1c57df2b747e347d7b220db5..0000000000000000000000000000000000000000 --- a/migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 79d722679731233ccb1aa775d896a4bf21e13d44 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 27 May 2020 10:02:06 +0800 -Subject: [PATCH] migration: Compat virtual timer adjust for v4.0.1 and v4.1.0 - -Vtimer adjust is used in openEuler qemu-4.0.1, however kvm_adjvtime -is introduced in openEuler qemu-4.1.0. To maintain the compatibility -and enable cross version migration, let's enable vtimer adjust only -if kvm_adjvtime is not enabled, otherwise there may be conflicts -between vtimer adjust and kvm_adjvtime. - -After this modification: -1: openEuler qemu-4.0.1 use vtimer as the default virtual timer -2: openEuler qemu-4.1.0 use kvm_adjvtime as the defaut virtual timer - -Migration from openEuler qemu-4.0.1 to openEuler qemu-4.1.0 will -be ok, but migration path from upstream qemu-4.0.1 to openEuler -qemu-4..0.1 will be broken. - -Since openEuler qemu-4.1.0, kvm_adjvtime is used as the default -virtual timer. So please upgrade to openEuler qemu-4.1.0 and -use the virt-4.1 machine. - -Signed-off-by: Ying Fang - -diff --git a/cpus.c b/cpus.c -index b9aa51f8..6a28bdef 100644 ---- a/cpus.c -+++ b/cpus.c -@@ -1067,6 +1067,12 @@ void cpu_synchronize_all_pre_loadvm(void) - } - - #ifdef __aarch64__ -+static bool kvm_adjvtime_enabled(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ return cpu->kvm_adjvtime == true; -+} -+ - static void get_vcpu_timer_tick(CPUState *cs) - { - CPUARMState *env = &ARM_CPU(cs)->env; -@@ -1096,7 +1102,13 @@ static int do_vm_stop(RunState state, bool send_stop) - cpu_disable_ticks(); - pause_all_vcpus(); - #ifdef __aarch64__ -- if (first_cpu) { -+ /* vtimer adjust is used in openEuler qemu-4.0.1, however kvm_adjvtime -+ * is introduced in openEuler qemu-4.1.0. To maintain the compatibility -+ * and enable cross version migration, let's enable vtimer adjust only -+ * if kvm_adjvtime is not enabled, otherwise there may be conflicts -+ * between vtimer adjust and kvm_adjvtime. -+ */ -+ if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { - get_vcpu_timer_tick(first_cpu); - } - #endif -@@ -1946,6 +1958,7 @@ void cpu_resume(CPUState *cpu) - } - - #ifdef __aarch64__ -+ - static void set_vcpu_timer_tick(CPUState *cs) - { - CPUARMState *env = &ARM_CPU(cs)->env; -@@ -1977,7 +1990,10 @@ void resume_all_vcpus(void) - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); - #ifdef __aarch64__ -- if (first_cpu) { -+ /* Enable vtimer adjust only if kvm_adjvtime is not enabled, otherwise -+ * there may be conflicts between vtimer adjust and kvm_adjvtime. -+ */ -+ if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { - set_vcpu_timer_tick(first_cpu); - } - #endif --- -2.23.0 - diff --git a/migration-Count-new_dirty-instead-of-real_dirty.patch b/migration-Count-new_dirty-instead-of-real_dirty.patch deleted file mode 100644 index a9ff297ffac9fb42ce63ef8a256e648adf1166dd..0000000000000000000000000000000000000000 --- a/migration-Count-new_dirty-instead-of-real_dirty.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 63320ae36834e4ff2f0d139f205c464caa3887b4 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Mon, 22 Jun 2020 11:20:37 +0800 -Subject: [PATCH 04/11] migration: Count new_dirty instead of real_dirty - -real_dirty_pages becomes equal to total ram size after dirty log sync -in ram_init_bitmaps, the reason is that the bitmap of ramblock is -initialized to be all set, so old path counts them as "real dirty" at -beginning. - -This causes wrong dirty rate and false positive throttling. - -Signed-off-by: Keqian Zhu -Message-Id: <20200622032037.31112-1-zhukeqian1@huawei.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: BiaoXiang Ye ---- - include/exec/ram_addr.h | 5 +---- - migration/ram.c | 8 +++++--- - 2 files changed, 6 insertions(+), 7 deletions(-) - -diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h -index b7b2e60f..52344066 100644 ---- a/include/exec/ram_addr.h -+++ b/include/exec/ram_addr.h -@@ -485,8 +485,7 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, - static inline - uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, - ram_addr_t start, -- ram_addr_t length, -- uint64_t *real_dirty_pages) -+ ram_addr_t length) - { - ram_addr_t addr; - unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); -@@ -512,7 +511,6 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, - if (src[idx][offset]) { - unsigned long bits = atomic_xchg(&src[idx][offset], 0); - unsigned long new_dirty; -- *real_dirty_pages += ctpopl(bits); - new_dirty = ~dest[k]; - dest[k] |= bits; - new_dirty &= bits; -@@ -545,7 +543,6 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, - start + addr + offset, - TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { -- *real_dirty_pages += 1; - long k = (start + addr) >> TARGET_PAGE_BITS; - if (!test_and_set_bit(k, dest)) { - num_dirty++; -diff --git a/migration/ram.c b/migration/ram.c -index 840e3548..83cabec6 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1765,9 +1765,11 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, - static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, - ram_addr_t length) - { -- rs->migration_dirty_pages += -- cpu_physical_memory_sync_dirty_bitmap(rb, 0, length, -- &rs->num_dirty_pages_period); -+ uint64_t new_dirty_pages = -+ cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length); -+ -+ rs->migration_dirty_pages += new_dirty_pages; -+ rs->num_dirty_pages_period += new_dirty_pages; - } - - /** --- -2.27.0.dirty - diff --git a/migration-Create-migration_is_running.patch b/migration-Create-migration_is_running.patch deleted file mode 100644 index 86f0e6d3db2a2a51c25ebe9d6f0f4d6c48dc2de0..0000000000000000000000000000000000000000 --- a/migration-Create-migration_is_running.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 3d75adce1b9b465c45a9e841d285b3524e19cd7d Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 14:39:46 +0800 -Subject: [PATCH] migration: Create migration_is_running() - -This function returns true if we are in the middle of a migration. -It is like migration_is_setup_or_active() with CANCELLING and COLO. -Adapt all callers that are needed. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert ---- - migration/migration.c | 28 +++++++++++++++++++++++----- - migration/migration.h | 1 + - migration/savevm.c | 4 +--- - 3 files changed, 25 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 993d77b7d6..923a1d9d3f 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -822,6 +822,26 @@ bool migration_is_setup_or_active(int state) - } - } - -+bool migration_is_running(int state) -+{ -+ switch (state) { -+ case MIGRATION_STATUS_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_PAUSED: -+ case MIGRATION_STATUS_POSTCOPY_RECOVER: -+ case MIGRATION_STATUS_SETUP: -+ case MIGRATION_STATUS_PRE_SWITCHOVER: -+ case MIGRATION_STATUS_DEVICE: -+ case MIGRATION_STATUS_CANCELLING: -+ case MIGRATION_STATUS_COLO: -+ return true; -+ -+ default: -+ return false; -+ -+ } -+} -+ - static void populate_ram_info(MigrationInfo *info, MigrationState *s) - { - info->has_ram = true; -@@ -1074,7 +1094,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - MigrationCapabilityStatusList *cap; - bool cap_list[MIGRATION_CAPABILITY__MAX]; - -- if (migration_is_setup_or_active(s->state)) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } -@@ -1588,7 +1608,7 @@ static void migrate_fd_cancel(MigrationState *s) - - do { - old_state = s->state; -- if (!migration_is_setup_or_active(old_state)) { -+ if (!migration_is_running(old_state)) { - break; - } - /* If the migration is paused, kick it out of the pause */ -@@ -1873,9 +1893,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - return true; - } - -- if (migration_is_setup_or_active(s->state) || -- s->state == MIGRATION_STATUS_CANCELLING || -- s->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return false; - } -diff --git a/migration/migration.h b/migration/migration.h -index e5aaf2ef70..f2bd4ebe33 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -282,6 +282,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); - void migrate_fd_connect(MigrationState *s, Error *error_in); - - bool migration_is_setup_or_active(int state); -+bool migration_is_running(int state); - - void migrate_init(MigrationState *s); - bool migration_is_blocked(Error **errp); -diff --git a/migration/savevm.c b/migration/savevm.c -index 8163de7f21..f0974380e5 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1414,9 +1414,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - MigrationState *ms = migrate_get_current(); - MigrationStatus status; - -- if (migration_is_setup_or_active(ms->state) || -- ms->state == MIGRATION_STATUS_CANCELLING || -- ms->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(ms->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return -EINVAL; - } --- -2.27.0 - diff --git a/migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/migration-Define-VMSTATE_INSTANCE_ID_ANY.patch deleted file mode 100644 index cd32b04997c14345aa7f488cd1a960a106d9aa15..0000000000000000000000000000000000000000 --- a/migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +++ /dev/null @@ -1,237 +0,0 @@ -From 21e049e2941b108df45c9089cbf7539caae538e6 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 16 Oct 2019 10:29:30 +0800 -Subject: [PATCH 5/8] migration: Define VMSTATE_INSTANCE_ID_ANY - -Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to -auto-generate the vmstate instance ID. Previously it was hard coded -as -1 instead of this macro. It helps to change this default value in -the follow up patches. No functional change. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - hw/arm/stellaris.c | 2 +- - hw/core/qdev.c | 4 +++- - hw/display/ads7846.c | 2 +- - hw/i2c/core.c | 2 +- - hw/input/stellaris_input.c | 3 ++- - hw/intc/apic_common.c | 2 +- - hw/misc/max111x.c | 3 ++- - hw/net/eepro100.c | 3 ++- - hw/pci/pci.c | 2 +- - hw/ppc/spapr.c | 2 +- - hw/timer/arm_timer.c | 2 +- - hw/tpm/tpm_emulator.c | 3 ++- - include/migration/vmstate.h | 2 ++ - migration/savevm.c | 8 ++++---- - 14 files changed, 24 insertions(+), 16 deletions(-) - -diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c -index 499035f..3432033 100644 ---- a/hw/arm/stellaris.c -+++ b/hw/arm/stellaris.c -@@ -705,7 +705,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, - memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); - memory_region_add_subregion(get_system_memory(), base, &s->iomem); - ssys_reset(s); -- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); - return 0; - } - -diff --git a/hw/core/qdev.c b/hw/core/qdev.c -index 94ebc0a..4b32f2f 100644 ---- a/hw/core/qdev.c -+++ b/hw/core/qdev.c -@@ -848,7 +848,9 @@ static void device_set_realized(Object *obj, bool value, Error **errp) - dev->canonical_path = object_get_canonical_path(OBJECT(dev)); - - if (qdev_get_vmsd(dev)) { -- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, -+ if (vmstate_register_with_alias_id(dev, -+ VMSTATE_INSTANCE_ID_ANY, -+ qdev_get_vmsd(dev), dev, - dev->instance_id_alias, - dev->alias_required_for_version, - &local_err) < 0) { -diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c -index 1a97e97..be1802e 100644 ---- a/hw/display/ads7846.c -+++ b/hw/display/ads7846.c -@@ -152,7 +152,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) - - ads7846_int_update(s); - -- vmstate_register(NULL, -1, &vmstate_ads7846, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); - } - - static void ads7846_class_init(ObjectClass *klass, void *data) -diff --git a/hw/i2c/core.c b/hw/i2c/core.c -index 20f36f1..186702b 100644 ---- a/hw/i2c/core.c -+++ b/hw/i2c/core.c -@@ -59,7 +59,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) - - bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); - QLIST_INIT(&bus->current_devs); -- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); - return bus; - } - -diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c -index 3a666d6..6c5b6d8 100644 ---- a/hw/input/stellaris_input.c -+++ b/hw/input/stellaris_input.c -@@ -86,5 +86,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) - } - s->num_buttons = n; - qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); -- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_stellaris_gamepad, s); - } -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index e764a2b..faea1af 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -329,7 +329,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - } - - if (s->legacy_instance_id) { -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, - s, -1, 0, NULL); -diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c -index d373ece..364cb01 100644 ---- a/hw/misc/max111x.c -+++ b/hw/misc/max111x.c -@@ -144,7 +144,8 @@ static int max111x_init(SSISlave *d, int inputs) - s->input[7] = 0x80; - s->com = 0; - -- vmstate_register(dev, -1, &vmstate_max111x, s); -+ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_max111x, s); - return 0; - } - -diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c -index 6607c91..03edd25 100644 ---- a/hw/net/eepro100.c -+++ b/hw/net/eepro100.c -@@ -1872,7 +1872,8 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) - - s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); - s->vmstate->name = qemu_get_queue(s->nic)->model; -- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); -+ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, -+ s->vmstate, s); - } - - static void eepro100_instance_init(Object *obj) -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index 8076a80..e74143c 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -118,7 +118,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) - bus->machine_done.notify = pcibus_machine_done; - qemu_add_machine_init_done_notifier(&bus->machine_done); - -- vmstate_register(NULL, -1, &vmstate_pcibus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); - } - - static void pcie_bus_realize(BusState *qbus, Error **errp) -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 12ed4b0..b0f37c3 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3069,7 +3069,7 @@ static void spapr_machine_init(MachineState *machine) - * interface, this is a legacy from the sPAPREnvironment structure - * which predated MachineState but had a similar function */ - vmstate_register(NULL, 0, &vmstate_spapr, spapr); -- register_savevm_live(NULL, "spapr/htab", -1, 1, -+ register_savevm_live(NULL, "spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_htab_handlers, spapr); - - qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), -diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c -index f0a7534..1ce4e01 100644 ---- a/hw/timer/arm_timer.c -+++ b/hw/timer/arm_timer.c -@@ -172,7 +172,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) - - bh = qemu_bh_new(arm_timer_tick, s); - s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT); -- vmstate_register(NULL, -1, &vmstate_arm_timer, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); - return s; - } - -diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c -index 38bf5fd..836c489 100644 ---- a/hw/tpm/tpm_emulator.c -+++ b/hw/tpm/tpm_emulator.c -@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) - tpm_emu->cur_locty_number = ~0; - qemu_mutex_init(&tpm_emu->mutex); - -- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_tpm_emulator, obj); - } - - /* -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index c2bfa7a..92f531a 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1114,6 +1114,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, - - bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - -+#define VMSTATE_INSTANCE_ID_ANY -1 -+ - /* Returns: 0 on success, -1 on failure */ - int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - const VMStateDescription *vmsd, -diff --git a/migration/savevm.c b/migration/savevm.c -index 480c511..62552ab 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -722,7 +722,7 @@ int register_savevm_live(DeviceState *dev, - } - pstrcat(se->idstr, sizeof(se->idstr), idstr); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; -@@ -789,14 +789,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - - se->compat = g_new0(CompatEntry, 1); - pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); -- se->compat->instance_id = instance_id == -1 ? -+ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? - calculate_compat_instance_id(vmsd->name) : instance_id; -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - } - pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; --- -1.8.3.1 - diff --git a/migration-Don-t-send-data-if-we-have-stopped.patch b/migration-Don-t-send-data-if-we-have-stopped.patch deleted file mode 100644 index 08d5d3bbbdd0b8a6f7d3dbc485cc3814a830e483..0000000000000000000000000000000000000000 --- a/migration-Don-t-send-data-if-we-have-stopped.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 855404b4766ddda851035587aa1b84768abbaf11 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 22 Jan 2020 11:36:12 +0100 -Subject: [PATCH] migration: Don't send data if we have stopped - -If we do a cancel, we got out without one error, but we can't do the -rest of the output as in a normal situation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert ---- - migration/ram.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index b74929542d..dc9831d7f3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3686,7 +3686,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- if (ret >= 0) { -+ if (ret >= 0 -+ && migration_is_setup_or_active(migrate_get_current()->state)) { - multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); --- -2.27.0 - diff --git a/migration-Ensure-vmstate_save-sets-errp.patch b/migration-Ensure-vmstate_save-sets-errp.patch new file mode 100644 index 0000000000000000000000000000000000000000..e35e2c28fb9bb3aef97372dc3a688a2317115fd5 --- /dev/null +++ b/migration-Ensure-vmstate_save-sets-errp.patch @@ -0,0 +1,85 @@ +From 72aa575da11b3a897eeaae926802c50dc8ff7a84 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 15 Oct 2024 19:04:37 +0200 +Subject: [PATCH] migration: Ensure vmstate_save() sets errp + +migration/savevm.c contains some calls to vmstate_save() that are +followed by migrate_set_error() if the integer return value indicates an +error. migrate_set_error() requires that the `Error *` object passed to +it is set. Therefore, vmstate_save() is assumed to always set *errp on +error. + +Right now, that assumption is not met: vmstate_save_state_v() (called +internally by vmstate_save()) will not set *errp if +vmstate_subsection_save() or vmsd->post_save() fail. Fix that by adding +an *errp parameter to vmstate_subsection_save(), and by generating a +generic error in case post_save() fails (as is already done for +pre_save()). + +Without this patch, qemu will crash after vmstate_subsection_save() or +post_save() have failed inside of a vmstate_save() call (unless +migrate_set_error() then happen to discard the new error because +s->error is already set). This happens e.g. when receiving the state +from a virtio-fs back-end (virtiofsd) fails. + +Signed-off-by: Hanna Czenczek +Link: https://lore.kernel.org/r/20241015170437.310358-1-hreitz@redhat.com +Signed-off-by: Peter Xu +(cherry picked from commit 37dfcba1a04989830c706f9cbc00450e5d3a7447) +Signed-off-by: zhujun2 +--- + migration/vmstate.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/migration/vmstate.c b/migration/vmstate.c +index b7723a4187..bd08e390c5 100644 +--- a/migration/vmstate.c ++++ b/migration/vmstate.c +@@ -22,7 +22,8 @@ + #include "trace.h" + + static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, +- void *opaque, JSONWriter *vmdesc); ++ void *opaque, JSONWriter *vmdesc, ++ Error **errp); + static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, + void *opaque); + +@@ -440,12 +441,13 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, + json_writer_end_array(vmdesc); + } + +- ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc); ++ ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc, errp); + + if (vmsd->post_save) { + int ps_ret = vmsd->post_save(opaque); +- if (!ret) { ++ if (!ret && ps_ret) { + ret = ps_ret; ++ error_setg(errp, "post-save failed: %s", vmsd->name); + } + } + return ret; +@@ -515,7 +517,8 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, + } + + static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, +- void *opaque, JSONWriter *vmdesc) ++ void *opaque, JSONWriter *vmdesc, ++ Error **errp) + { + const VMStateDescription **sub = vmsd->subsections; + bool vmdesc_has_subsections = false; +@@ -543,7 +546,7 @@ static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)vmsdsub->name, len); + qemu_put_be32(f, vmsdsub->version_id); +- ret = vmstate_save_state(f, vmsdsub, opaque, vmdesc); ++ ret = vmstate_save_state_with_err(f, vmsdsub, opaque, vmdesc, errp); + if (ret) { + return ret; + } +-- +2.41.0.windows.1 + diff --git a/migration-Extand-the-fdtable-in-the-incoming-phase-o.patch b/migration-Extand-the-fdtable-in-the-incoming-phase-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..a5082707c503c9c9083b376a09992f515f7817b0 --- /dev/null +++ b/migration-Extand-the-fdtable-in-the-incoming-phase-o.patch @@ -0,0 +1,71 @@ +From 0e3d3b9a3cd54340b2d9991918a172ed38670bcd Mon Sep 17 00:00:00 2001 +From: libai +Date: Wed, 2 Apr 2025 20:14:10 +0800 +Subject: [PATCH] migration:Extand the fdtable in the incoming phase of + migration + +Perform the fdtable extension in advance to avoid time consumption +caused by triggering the fdtable extension during the migration downtime. + +Signed-off-by: libai +--- + migration/migration.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index dce22c2da5..9a433e615b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -68,6 +68,8 @@ + #include "sysemu/dirtylimit.h" + #include "qemu/sockets.h" + ++#define DEFAULT_FD_MAX 4096 ++ + static NotifierList migration_state_notifiers = + NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); + +@@ -1712,6 +1714,31 @@ void migrate_del_blocker(Error **reasonp) + } + } + ++/* ++ * Kernel will expand the fatable allocated to the qemu process when ++ * the number of fds held by qemu process exceeds a power of 2 (starting from 64). ++ * Each expansion introduces tens of ms of latency due to RCU synchronization. ++ * The expansion is completed during qemu process initialization to avoid ++ * triggering this action during the migration downtime phase. ++ */ ++static void qemu_pre_extend_fdtable(void) ++{ ++ int buffer[DEFAULT_FD_MAX] = {0}; ++ int i; ++ ++ /* expand fdtable */ ++ for (i = 0; i < DEFAULT_FD_MAX; i++) { ++ buffer[i] = qemu_dup(STDIN_FILENO); ++ } ++ ++ /* close tmp fd */ ++ for (i = 0; i < DEFAULT_FD_MAX; i++) { ++ if (buffer[i] > 0) { ++ (void)qemu_close(buffer[i]); ++ } ++ } ++} ++ + void qmp_migrate_incoming(const char *uri, bool has_channels, + MigrationChannelList *channels, Error **errp) + { +@@ -1731,6 +1758,8 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, + return; + } + ++ qemu_pre_extend_fdtable(); ++ + qemu_start_incoming_migration(uri, has_channels, channels, &local_err); + + if (local_err) { +-- +2.41.0.windows.1 + diff --git a/migration-Fix-file-migration-with-fdset.patch b/migration-Fix-file-migration-with-fdset.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8f9c95aeff5874945ff5ea392239713bf660b34 --- /dev/null +++ b/migration-Fix-file-migration-with-fdset.patch @@ -0,0 +1,65 @@ +From 6c76354fdfbebca55e080fea5ae6bfc8a3db2d91 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Mon, 17 Jun 2024 15:57:17 -0300 +Subject: [PATCH] migration: Fix file migration with fdset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When the "file:" migration support was added we missed the special +case in the qemu_open_old implementation that allows for a particular +file name format to be used to refer to a set of file descriptors that +have been previously provided to QEMU via the add-fd QMP command. + +When using this fdset feature, we should not truncate the migration +file because being given an fd means that the management layer is in +control of the file and will likely already have some data written to +it. This is further indicated by the presence of the 'offset' +argument, which indicates the start of the region where QEMU is +allowed to write. + +Fix the issue by replacing the O_TRUNC flag on open by an ftruncate +call, which will take the offset into consideration. + +Fixes: 385f510df5 ("migration: file URI offset") +Suggested-by: Daniel P. Berrangé +Reviewed-by: Prasad Pandit +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Fabiano Rosas +(cherry picked from commit 6d3279655ac49b806265f08415165f471d33e032) +Signed-off-by: Michael Tokarev +Signed-off-by: zhujun2 +--- + migration/file.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/migration/file.c b/migration/file.c +index 5d4975f43e..fb3f743e54 100644 +--- a/migration/file.c ++++ b/migration/file.c +@@ -46,12 +46,19 @@ void file_start_outgoing_migration(MigrationState *s, + + trace_migration_file_outgoing(filename); + +- fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY | O_TRUNC, +- 0600, errp); ++ fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY, 0600, errp); + if (!fioc) { + return; + } + ++ if (ftruncate(fioc->fd, offset)) { ++ error_setg_errno(errp, errno, ++ "failed to truncate migration file to offset %" PRIx64, ++ offset); ++ object_unref(OBJECT(fioc)); ++ return; ++ } ++ + ioc = QIO_CHANNEL(fioc); + if (offset && qio_channel_io_seek(ioc, offset, SEEK_SET, errp) < 0) { + return; +-- +2.41.0.windows.1 + diff --git a/migration-Fix-logic-of-channels-and-transport-compat.patch b/migration-Fix-logic-of-channels-and-transport-compat.patch new file mode 100644 index 0000000000000000000000000000000000000000..d251b782f77a42034944c8fc133500651899c20a --- /dev/null +++ b/migration-Fix-logic-of-channels-and-transport-compat.patch @@ -0,0 +1,72 @@ +From d5a21de3aa2a13ab8bfb4d9d815ae60e04e08f94 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Thu, 25 Jan 2024 18:25:12 +0200 +Subject: [48/99] migration: Fix logic of channels and transport compatibility + check + +commit 3205bebd4fc6dd501fb8b10c93ddce9da18e09db upstream. + +The commit in the fixes line mistakenly modified the channels and +transport compatibility check logic so it now checks multi-channel +support only for socket transport type. + +Thus, running multifd migration using a transport other than socket that +is incompatible with multi-channels (such as "exec") would lead to a +segmentation fault instead of an error message. +For example: + (qemu) migrate_set_capability multifd on + (qemu) migrate -d "exec:cat > /tmp/vm_state" + Segmentation fault (core dumped) + +Fix it by checking multi-channel compatibility for all transport types. + +Cc: qemu-stable +Fixes: d95533e1cdcc ("migration: modify migration_channels_and_uri_compatible() for new QAPI syntax") +Signed-off-by: Avihai Horon +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240125162528.7552-2-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f428839dd6..0e8255180d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -127,11 +127,17 @@ static bool migration_needs_multiple_sockets(void) + return migrate_multifd() || migrate_postcopy_preempt(); + } + +-static bool transport_supports_multi_channels(SocketAddress *saddr) ++static bool transport_supports_multi_channels(MigrationAddress *addr) + { +- return saddr->type == SOCKET_ADDRESS_TYPE_INET || +- saddr->type == SOCKET_ADDRESS_TYPE_UNIX || +- saddr->type == SOCKET_ADDRESS_TYPE_VSOCK; ++ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { ++ SocketAddress *saddr = &addr->u.socket; ++ ++ return saddr->type == SOCKET_ADDRESS_TYPE_INET || ++ saddr->type == SOCKET_ADDRESS_TYPE_UNIX || ++ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK; ++ } ++ ++ return false; + } + + static bool +@@ -139,8 +145,7 @@ migration_channels_and_transport_compatible(MigrationAddress *addr, + Error **errp) + { + if (migration_needs_multiple_sockets() && +- (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) && +- !transport_supports_multi_channels(&addr->u.socket)) { ++ !transport_supports_multi_channels(addr)) { + error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); + return false; + } +-- +2.33.0 + diff --git a/migration-Fix-migration_channel_read_peek-error-path.patch b/migration-Fix-migration_channel_read_peek-error-path.patch new file mode 100644 index 0000000000000000000000000000000000000000..5cb8552e34eb36afe2f00e8cdc6c957e9019b02c --- /dev/null +++ b/migration-Fix-migration_channel_read_peek-error-path.patch @@ -0,0 +1,52 @@ +From 3a81455a093f3b06fd76d4964d0073c78ddbcc49 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:14 +0200 +Subject: [05/99] migration: Fix migration_channel_read_peek() error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 4f8cf323e80c17f7d4b5604f1699591326df6262 upstream. + +migration_channel_read_peek() calls qio_channel_readv_full() and handles +both cases of return value == 0 and return value < 0 the same way, by +calling error_setg() with errp. However, if return value < 0, errp is +already set, so calling error_setg() with errp will lead to an assert. + +Fix it by handling these cases separately, calling error_setg() with +errp only in return value == 0 case. + +Fixes: 6720c2b32725 ("migration: check magic value for deciding the mapping of channels") +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/20231231093016.14204-10-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/channel.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index ca3319a309..f9de064f3b 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -117,9 +117,12 @@ int migration_channel_read_peek(QIOChannel *ioc, + len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, + QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); + +- if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { +- error_setg(errp, +- "Failed to peek at channel"); ++ if (len < 0 && len != QIO_CHANNEL_ERR_BLOCK) { ++ return -1; ++ } ++ ++ if (len == 0) { ++ error_setg(errp, "Failed to peek at channel"); + return -1; + } + +-- +2.33.0 + diff --git a/migration-Introduce-qatzip-compression-method.patch b/migration-Introduce-qatzip-compression-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffe4ae32d53a3e3bd0a1385f9aa1d2710a2c2ad1 --- /dev/null +++ b/migration-Introduce-qatzip-compression-method.patch @@ -0,0 +1,500 @@ +From d5ad8ffdf67cb6a76d5b4bf7145488abaa53c2ae Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:21 -0700 +Subject: [91/99] migration: Introduce 'qatzip' compression method + +commit 80484f945989988091c5cd729c3e8bde6c14907a upstream. + +Adds support for 'qatzip' as an option for the multifd compression +method parameter, and implements using QAT for 'qatzip' compression and +decompression. + +Acked-by: Markus Armbruster +Reviewed-by: Fabiano Rosas +Reviewed-by: Prasad Pandit +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-5-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-qatzip.c | 394 +++++++++++++++++++++++++++++++ + migration/multifd.h | 5 +- + qapi/migration.json | 3 + + 5 files changed, 402 insertions(+), 3 deletions(-) + create mode 100644 migration/multifd-qatzip.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 650c42eaf8..9cc2e38aba 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -711,7 +711,7 @@ const PropertyInfo qdev_prop_fdc_drive_type = { + const PropertyInfo qdev_prop_multifd_compression = { + .name = "MultiFDCompression", + .description = "multifd_compression values, " +- "none/zlib/zstd/qpl/uadk", ++ "none/zlib/zstd/qpl/uadk/qatzip", + .enum_table = &MultiFDCompression_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index 264d04657f..aba2581705 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -42,6 +42,7 @@ endif + system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) + system_ss.add(when: qpl, if_true: files('multifd-qpl.c')) + system_ss.add(when: uadk, if_true: files('multifd-uadk.c')) ++system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c +new file mode 100644 +index 0000000000..3c787ed879 +--- /dev/null ++++ b/migration/multifd-qatzip.c +@@ -0,0 +1,394 @@ ++/* ++ * Multifd QATzip compression implementation ++ * ++ * Copyright (c) Bytedance ++ * ++ * Authors: ++ * Bryan Zhang ++ * Hao Xiang ++ * Yichen Wang ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "exec/ramblock.h" ++#include "qapi/error.h" ++#include "qemu/error-report.h" ++#include "qapi/qapi-types-migration.h" ++#include "options.h" ++#include "multifd.h" ++#include ++ ++typedef struct { ++ /* ++ * Unique session for use with QATzip API ++ */ ++ QzSession_T sess; ++ ++ /* ++ * For compression: Buffer for pages to compress ++ * For decompression: Buffer for data to decompress ++ */ ++ uint8_t *in_buf; ++ uint32_t in_len; ++ ++ /* ++ * For compression: Output buffer of compressed data ++ * For decompression: Output buffer of decompressed data ++ */ ++ uint8_t *out_buf; ++ uint32_t out_len; ++} QatzipData; ++ ++/** ++ * qatzip_send_setup: Set up QATzip session and private buffers. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) ++{ ++ QatzipData *q; ++ QzSessionParamsDeflate_T params; ++ const char *err_msg; ++ int ret; ++ ++ q = g_new0(QatzipData, 1); ++ p->compress_data = q; ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, 2); ++ ++ /* ++ * Initialize QAT device with software fallback by default. This allows ++ * QATzip to use CPU path when QAT hardware reaches maximum throughput. ++ */ ++ ret = qzInit(&q->sess, true); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzInit failed"; ++ goto err; ++ } ++ ++ ret = qzGetDefaultsDeflate(¶ms); ++ if (ret != QZ_OK) { ++ err_msg = "qzGetDefaultsDeflate failed"; ++ goto err; ++ } ++ ++ /* Make sure to use configured QATzip compression level. */ ++ params.common_params.comp_lvl = migrate_multifd_qatzip_level(); ++ ret = qzSetupSessionDeflate(&q->sess, ¶ms); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzSetupSessionDeflate failed"; ++ goto err; ++ } ++ ++ if (MULTIFD_PACKET_SIZE > UINT32_MAX) { ++ err_msg = "packet size too large for QAT"; ++ goto err; ++ } ++ ++ q->in_len = MULTIFD_PACKET_SIZE; ++ /* ++ * PINNED_MEM is an enum from qatzip headers, which means to use ++ * kzalloc_node() to allocate memory for QAT DMA purposes. When QAT device ++ * is not available or software fallback is used, the malloc flag needs to ++ * be set as COMMON_MEM. ++ */ ++ q->in_buf = qzMalloc(q->in_len, 0, PINNED_MEM); ++ if (!q->in_buf) { ++ q->in_buf = qzMalloc(q->in_len, 0, COMMON_MEM); ++ if (!q->in_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ q->out_len = qzMaxCompressedLength(MULTIFD_PACKET_SIZE, &q->sess); ++ q->out_buf = qzMalloc(q->out_len, 0, PINNED_MEM); ++ if (!q->out_buf) { ++ q->out_buf = qzMalloc(q->out_len, 0, COMMON_MEM); ++ if (!q->out_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ error_setg(errp, "multifd %u: [sender] %s", p->id, err_msg); ++ return -1; ++} ++ ++/** ++ * qatzip_send_cleanup: Tear down QATzip session and release private buffers. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return None ++ */ ++static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) ++{ ++ QatzipData *q = p->compress_data; ++ ++ if (q) { ++ if (q->in_buf) { ++ qzFree(q->in_buf); ++ } ++ if (q->out_buf) { ++ qzFree(q->out_buf); ++ } ++ (void)qzTeardownSession(&q->sess); ++ (void)qzClose(&q->sess); ++ g_free(q); ++ } ++ ++ g_free(p->iov); ++ p->iov = NULL; ++ p->compress_data = NULL; ++} ++ ++/** ++ * qatzip_send_prepare: Compress pages and update IO channel info. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) ++{ ++ MultiFDPages_t *pages = p->pages; ++ QatzipData *q = p->compress_data; ++ int ret; ++ unsigned int in_len, out_len; ++ ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } ++ ++ /* ++ * Unlike other multifd compression implementations, we use a non-streaming ++ * API and place all the data into one buffer, rather than sending each ++ * page to the compression API at a time. Based on initial benchmarks, the ++ * non-streaming API outperforms the streaming API. Plus, the logic in QEMU ++ * is friendly to using the non-streaming API anyway. If either of these ++ * statements becomes no longer true, we can revisit adding a streaming ++ * implementation. ++ */ ++ for (int i = 0; i < pages->normal_num; i++) { ++ memcpy(q->in_buf + (i * p->page_size), ++ pages->block->host + pages->offset[i], ++ p->page_size); ++ } ++ ++ in_len = pages->normal_num * p->page_size; ++ if (in_len > q->in_len) { ++ error_setg(errp, "multifd %u: unexpectedly large input", p->id); ++ return -1; ++ } ++ out_len = q->out_len; ++ ++ ret = qzCompress(&q->sess, q->in_buf, &in_len, q->out_buf, &out_len, 1); ++ if (ret != QZ_OK) { ++ error_setg(errp, "multifd %u: QATzip returned %d instead of QZ_OK", ++ p->id, ret); ++ return -1; ++ } ++ if (in_len != pages->normal_num * p->page_size) { ++ error_setg(errp, "multifd %u: QATzip failed to compress all input", ++ p->id); ++ return -1; ++ } ++ ++ p->iov[p->iovs_num].iov_base = q->out_buf; ++ p->iov[p->iovs_num].iov_len = out_len; ++ p->iovs_num++; ++ p->next_packet_size = out_len; ++ ++out: ++ p->flags |= MULTIFD_FLAG_QATZIP; ++ multifd_send_fill_packet(p); ++ return 0; ++} ++ ++/** ++ * qatzip_recv_setup: Set up QATzip session and allocate private buffers. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) ++{ ++ QatzipData *q; ++ QzSessionParamsDeflate_T params; ++ const char *err_msg; ++ int ret; ++ ++ q = g_new0(QatzipData, 1); ++ p->compress_data = q; ++ ++ /* ++ * Initialize QAT device with software fallback by default. This allows ++ * QATzip to use CPU path when QAT hardware reaches maximum throughput. ++ */ ++ ret = qzInit(&q->sess, true); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzInit failed"; ++ goto err; ++ } ++ ++ ret = qzGetDefaultsDeflate(¶ms); ++ if (ret != QZ_OK) { ++ err_msg = "qzGetDefaultsDeflate failed"; ++ goto err; ++ } ++ ++ ret = qzSetupSessionDeflate(&q->sess, ¶ms); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzSetupSessionDeflate failed"; ++ goto err; ++ } ++ ++ /* ++ * Reserve extra spaces for the incoming packets. Current implementation ++ * doesn't send uncompressed pages in case the compression gets too big. ++ */ ++ q->in_len = MULTIFD_PACKET_SIZE * 2; ++ /* ++ * PINNED_MEM is an enum from qatzip headers, which means to use ++ * kzalloc_node() to allocate memory for QAT DMA purposes. When QAT device ++ * is not available or software fallback is used, the malloc flag needs to ++ * be set as COMMON_MEM. ++ */ ++ q->in_buf = qzMalloc(q->in_len, 0, PINNED_MEM); ++ if (!q->in_buf) { ++ q->in_buf = qzMalloc(q->in_len, 0, COMMON_MEM); ++ if (!q->in_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ q->out_len = MULTIFD_PACKET_SIZE; ++ q->out_buf = qzMalloc(q->out_len, 0, PINNED_MEM); ++ if (!q->out_buf) { ++ q->out_buf = qzMalloc(q->out_len, 0, COMMON_MEM); ++ if (!q->out_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ error_setg(errp, "multifd %u: [receiver] %s", p->id, err_msg); ++ return -1; ++} ++ ++/** ++ * qatzip_recv_cleanup: Tear down QATzip session and release private buffers. ++ * ++ * @param p Multifd channel params ++ * @return None ++ */ ++static void qatzip_recv_cleanup(MultiFDRecvParams *p) ++{ ++ QatzipData *q = p->compress_data; ++ ++ if (q) { ++ if (q->in_buf) { ++ qzFree(q->in_buf); ++ } ++ if (q->out_buf) { ++ qzFree(q->out_buf); ++ } ++ (void)qzTeardownSession(&q->sess); ++ (void)qzClose(&q->sess); ++ g_free(q); ++ } ++ p->compress_data = NULL; ++} ++ ++ ++/** ++ * qatzip_recv: Decompress pages and copy them to the appropriate ++ * locations. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_recv(MultiFDRecvParams *p, Error **errp) ++{ ++ QatzipData *q = p->compress_data; ++ int ret; ++ unsigned int in_len, out_len; ++ uint32_t in_size = p->next_packet_size; ++ uint32_t expected_size = p->normal_num * p->page_size; ++ uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ ++ if (in_size > q->in_len) { ++ error_setg(errp, "multifd %u: received unexpectedly large packet", ++ p->id); ++ return -1; ++ } ++ ++ if (flags != MULTIFD_FLAG_QATZIP) { ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", ++ p->id, flags, MULTIFD_FLAG_QATZIP); ++ return -1; ++ } ++ ++ multifd_recv_zero_page_process(p); ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ ++ ret = qio_channel_read_all(p->c, (void *)q->in_buf, in_size, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ in_len = in_size; ++ out_len = q->out_len; ++ ret = qzDecompress(&q->sess, q->in_buf, &in_len, q->out_buf, &out_len); ++ if (ret != QZ_OK) { ++ error_setg(errp, "multifd %u: qzDecompress failed", p->id); ++ return -1; ++ } ++ if (out_len != expected_size) { ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", ++ p->id, out_len, expected_size); ++ return -1; ++ } ++ ++ /* Copy each page to its appropriate location. */ ++ for (int i = 0; i < p->normal_num; i++) { ++ memcpy(p->host + p->normal[i], ++ q->out_buf + p->page_size * i, ++ p->page_size); ++ } ++ return 0; ++} ++ ++static MultiFDMethods multifd_qatzip_ops = { ++ .send_setup = qatzip_send_setup, ++ .send_cleanup = qatzip_send_cleanup, ++ .send_prepare = qatzip_send_prepare, ++ .recv_setup = qatzip_recv_setup, ++ .recv_cleanup = qatzip_recv_cleanup, ++ .recv = qatzip_recv ++}; ++ ++static void multifd_qatzip_register(void) ++{ ++ multifd_register_ops(MULTIFD_COMPRESSION_QATZIP, &multifd_qatzip_ops); ++} ++ ++migration_init(multifd_qatzip_register); +diff --git a/migration/multifd.h b/migration/multifd.h +index ace4ba050d..57c1334788 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -29,14 +29,15 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) + +-/* We reserve 4 bits for compression methods */ +-#define MULTIFD_FLAG_COMPRESSION_MASK (0xf << 1) ++/* We reserve 5 bits for compression methods */ ++#define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1) + /* we need to be compatible. Before compression value was 0 */ + #define MULTIFD_FLAG_NOCOMP (0 << 1) + #define MULTIFD_FLAG_ZLIB (1 << 1) + #define MULTIFD_FLAG_ZSTD (2 << 1) + #define MULTIFD_FLAG_QPL (4 << 1) + #define MULTIFD_FLAG_UADK (8 << 1) ++#define MULTIFD_FLAG_QATZIP (16 << 1) + + /* This value needs to be a multiple of qemu_target_page_size() */ + #define MULTIFD_PACKET_SIZE (512 * 1024) +diff --git a/qapi/migration.json b/qapi/migration.json +index 255f5b50a6..37e1d4857e 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -625,6 +625,8 @@ + # + # @zstd: use zstd compression method. + # ++# @qatzip: use qatzip compression method. (Since 9.2) ++# + # @qpl: use qpl compression method. Query Processing Library(qpl) is + # based on the deflate compression algorithm and use the Intel + # In-Memory Analytics Accelerator(IAA) accelerated compression +@@ -637,6 +639,7 @@ + { 'enum': 'MultiFDCompression', + 'data': [ 'none', 'zlib', + { 'name': 'zstd', 'if': 'CONFIG_ZSTD' }, ++ { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'}, + { 'name': 'qpl', 'if': 'CONFIG_QPL' }, + { 'name': 'uadk', 'if': 'CONFIG_UADK' } ] } + +-- +2.33.0 + diff --git a/migration-Make-global-sem_sync-semaphore-by-channel.patch b/migration-Make-global-sem_sync-semaphore-by-channel.patch deleted file mode 100644 index d9dbab23e4f83d88595956668c61385618864fd7..0000000000000000000000000000000000000000 --- a/migration-Make-global-sem_sync-semaphore-by-channel.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 8c3794d709eefdae777477bef7ff3511d55bf418 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 14 Aug 2019 04:02:14 +0200 -Subject: [PATCH 05/10] migration: Make global sem_sync semaphore by channel - -This makes easy to debug things because when you want for all threads -to arrive at that semaphore, you know which one your are waiting for. - -Change-Id: I533af8cdc68f619b68eff8e4e573c4de371a3954 -Signed-off-by: Juan Quintela -Message-Id: <20190814020218.1868-3-quintela@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index c75716bb..51811c2d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -661,6 +661,8 @@ typedef struct { - uint64_t num_packets; - /* pages sent through this channel */ - uint64_t num_pages; -+ /* syncs main thread and channels */ -+ QemuSemaphore sem_sync; - } MultiFDSendParams; - - typedef struct { -@@ -896,8 +898,6 @@ struct { - MultiFDSendParams *params; - /* array of pages to sent */ - MultiFDPages_t *pages; -- /* syncs main thread and channels */ -- QemuSemaphore sem_sync; - /* global number of generated multifd packets */ - uint64_t packet_num; - /* send channels ready */ -@@ -1037,6 +1037,7 @@ void multifd_save_cleanup(void) - p->c = NULL; - qemu_mutex_destroy(&p->mutex); - qemu_sem_destroy(&p->sem); -+ qemu_sem_destroy(&p->sem_sync); - g_free(p->name); - p->name = NULL; - multifd_pages_clear(p->pages); -@@ -1046,7 +1047,6 @@ void multifd_save_cleanup(void) - p->packet = NULL; - } - qemu_sem_destroy(&multifd_send_state->channels_ready); -- qemu_sem_destroy(&multifd_send_state->sem_sync); - g_free(multifd_send_state->params); - multifd_send_state->params = NULL; - multifd_pages_clear(multifd_send_state->pages); -@@ -1096,7 +1096,7 @@ static void multifd_send_sync_main(RAMState *rs) - MultiFDSendParams *p = &multifd_send_state->params[i]; - - trace_multifd_send_sync_main_wait(p->id); -- qemu_sem_wait(&multifd_send_state->sem_sync); -+ qemu_sem_wait(&p->sem_sync); - } - trace_multifd_send_sync_main(multifd_send_state->packet_num); - } -@@ -1156,7 +1156,7 @@ static void *multifd_send_thread(void *opaque) - qemu_mutex_unlock(&p->mutex); - - if (flags & MULTIFD_FLAG_SYNC) { -- qemu_sem_post(&multifd_send_state->sem_sync); -+ qemu_sem_post(&p->sem_sync); - } - qemu_sem_post(&multifd_send_state->channels_ready); - } else if (p->quit) { -@@ -1179,7 +1179,7 @@ out: - */ - if (ret != 0) { - if (flags & MULTIFD_FLAG_SYNC) { -- qemu_sem_post(&multifd_send_state->sem_sync); -+ qemu_sem_post(&p->sem_sync); - } - qemu_sem_post(&multifd_send_state->channels_ready); - } -@@ -1225,7 +1225,6 @@ int multifd_save_setup(void) - multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); - multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); - multifd_send_state->pages = multifd_pages_init(page_count); -- qemu_sem_init(&multifd_send_state->sem_sync, 0); - qemu_sem_init(&multifd_send_state->channels_ready, 0); - - for (i = 0; i < thread_count; i++) { -@@ -1233,6 +1232,7 @@ int multifd_save_setup(void) - - qemu_mutex_init(&p->mutex); - qemu_sem_init(&p->sem, 0); -+ qemu_sem_init(&p->sem_sync, 0); - p->quit = false; - p->pending_job = 0; - p->id = i; --- -2.19.1 diff --git a/migration-Make-sure-that-we-don-t-call-write-in-case.patch b/migration-Make-sure-that-we-don-t-call-write-in-case.patch deleted file mode 100644 index 73e3fe41d0ee74e22d7e5434ca89b421bbce0708..0000000000000000000000000000000000000000 --- a/migration-Make-sure-that-we-don-t-call-write-in-case.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 2898f8669445d38d4a6a8986c1e6d94381a7e869 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:35 +0000 -Subject: [PATCH] migration: Make sure that we don't call write() in case of - error - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-3-quintela@redhat.com> -Patchwork-id: 94113 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -If we are exiting due to an error/finish/.... Just don't try to even -touch the channel with one IO operation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Juan Quintela -(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index d4ac696899..27585a4f3e 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1195,6 +1195,12 @@ struct { - uint64_t packet_num; - /* send channels ready */ - QemuSemaphore channels_ready; -+ /* -+ * Have we already run terminate threads. There is a race when it -+ * happens that we got one error while we are exiting. -+ * We will use atomic operations. Only valid values are 0 and 1. -+ */ -+ int exiting; - } *multifd_send_state; - - /* -@@ -1223,6 +1229,10 @@ static int multifd_send_pages(RAMState *rs) - MultiFDPages_t *pages = multifd_send_state->pages; - uint64_t transferred; - -+ if (atomic_read(&multifd_send_state->exiting)) { -+ return -1; -+ } -+ - qemu_sem_wait(&multifd_send_state->channels_ready); - /* - * next_channel can remain from a previous migration that was -@@ -1308,6 +1318,16 @@ static void multifd_send_terminate_threads(Error *err) - } - } - -+ /* -+ * We don't want to exit each threads twice. Depending on where -+ * we get the error, or if there are two independent errors in two -+ * threads at the same time, we can end calling this function -+ * twice. -+ */ -+ if (atomic_xchg(&multifd_send_state->exiting, 1)) { -+ return; -+ } -+ - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -@@ -1425,6 +1445,10 @@ static void *multifd_send_thread(void *opaque) - - while (true) { - qemu_sem_wait(&p->sem); -+ -+ if (atomic_read(&multifd_send_state->exiting)) { -+ break; -+ } - qemu_mutex_lock(&p->mutex); - - if (p->pending_job) { -@@ -1655,6 +1679,7 @@ int multifd_save_setup(void) - multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); - multifd_send_state->pages = multifd_pages_init(page_count); - qemu_sem_init(&multifd_send_state->channels_ready, 0); -+ atomic_set(&multifd_send_state->exiting, 0); - - for (i = 0; i < thread_count; i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; --- -2.27.0 - diff --git a/migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/migration-Maybe-VM-is-paused-when-migration-is-cance.patch deleted file mode 100644 index 6c918f3c17ca619dfb88e1856d3d26625419f465..0000000000000000000000000000000000000000 --- a/migration-Maybe-VM-is-paused-when-migration-is-cance.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 5e99e1329fa52dce8ab784a960e64a3e19b429aa Mon Sep 17 00:00:00 2001 -From: Zhimin Feng -Date: Tue, 14 Jan 2020 17:43:09 +0800 -Subject: [PATCH 07/10] migration: Maybe VM is paused when migration is - cancelled - -If the migration is cancelled when it is in the completion phase, -the migration state is set to MIGRATION_STATUS_CANCELLING. -The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause -function, so that VM always is paused. - -Change-Id: Ib2f2f42ee1edbb14da269ee19ba1fe16dd363822 -Reported-by: Euler Robot -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/migration.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index bea9b1d7..114c33a1 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2731,14 +2731,22 @@ static int migration_maybe_pause(MigrationState *s, - /* This block intentionally left blank */ - } - -- qemu_mutex_unlock_iothread(); -- migrate_set_state(&s->state, *current_active_state, -- MIGRATION_STATUS_PRE_SWITCHOVER); -- qemu_sem_wait(&s->pause_sem); -- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -- new_state); -- *current_active_state = new_state; -- qemu_mutex_lock_iothread(); -+ /* -+ * If the migration is cancelled when it is in the completion phase, -+ * the migration state is set to MIGRATION_STATUS_CANCELLING. -+ * So we don't need to wait a semaphore, otherwise we would always -+ * wait for the 'pause_sem' semaphore. -+ */ -+ if (s->state != MIGRATION_STATUS_CANCELLING) { -+ qemu_mutex_unlock_iothread(); -+ migrate_set_state(&s->state, *current_active_state, -+ MIGRATION_STATUS_PRE_SWITCHOVER); -+ qemu_sem_wait(&s->pause_sem); -+ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -+ new_state); -+ *current_active_state = new_state; -+ qemu_mutex_lock_iothread(); -+ } - - return s->state == new_state ? 0 : -EINVAL; - } --- -2.19.1 diff --git a/migration-Properly-apply-migration-compression-level.patch b/migration-Properly-apply-migration-compression-level.patch new file mode 100644 index 0000000000000000000000000000000000000000..21ed06eaef78741b8fe6f930a05ea77df44e485c --- /dev/null +++ b/migration-Properly-apply-migration-compression-level.patch @@ -0,0 +1,53 @@ +From c17b6d51225501c92cfe6b086ea9217659d67bd1 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 1 Mar 2024 03:59:00 +0000 +Subject: [62/99] migration: Properly apply migration compression level + parameters + +commit b4014a2bf57ce08e2f6458cd82e9f968facf25c8 upstream. + +Some glue code was missing, so that using `qmp_migrate_set_parameters` +to set `multifd-zstd-level` or `multifd-zlib-level` did not work. This +commit adds the glue code to fix that. + +Signed-off-by: Bryan Zhang +Link: https://lore.kernel.org/r/20240301035901.4006936-2-bryan.zhang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/options.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/migration/options.c b/migration/options.c +index 71645c8721..52ddbac35f 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -1377,6 +1377,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_zlib_level) { ++ dest->multifd_zlib_level = params->multifd_zlib_level; ++ } ++ if (params->has_multifd_zstd_level) { ++ dest->multifd_zstd_level = params->multifd_zstd_level; ++ } + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1533,6 +1539,12 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_zlib_level) { ++ s->parameters.multifd_zlib_level = params->multifd_zlib_level; ++ } ++ if (params->has_multifd_zstd_level) { ++ s->parameters.multifd_zstd_level = params->multifd_zstd_level; ++ } + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +-- +2.33.0 + diff --git a/migration-Rate-limit-inside-host-pages.patch b/migration-Rate-limit-inside-host-pages.patch deleted file mode 100644 index 17eb46f82ce18a6e2e17583ea6a77879178bc9bc..0000000000000000000000000000000000000000 --- a/migration-Rate-limit-inside-host-pages.patch +++ /dev/null @@ -1,173 +0,0 @@ -From 3e8a587b055f0e3cabf91921fca0777fe7e349f5 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 17 Mar 2020 17:05:18 +0000 -Subject: [PATCH] migration: Rate limit inside host pages - -RH-Author: Laurent Vivier -Message-id: <20200317170518.9303-1-lvivier@redhat.com> -Patchwork-id: 94374 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages -Bugzilla: 1814336 -RH-Acked-by: Peter Xu -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -From: "Dr. David Alan Gilbert" - -When using hugepages, rate limiting is necessary within each huge -page, since a 1G huge page can take a significant time to send, so -you end up with bursty behaviour. - -Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") -Reported-by: Lin Ma -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) -Signed-off-by: Laurent Vivier - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 -BRANCH: rhel-av-8.2.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 -TESTED: Tested that the migration abort doesn't trigger an error message in - the kernel logs on P9 - -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 57 ++++++++++++++++++++++++------------------ - migration/migration.h | 1 + - migration/ram.c | 2 ++ - migration/trace-events | 4 +-- - 4 files changed, 37 insertions(+), 27 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index fd7d81d4b6..b0b9430822 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3260,6 +3260,37 @@ void migration_consume_urgent_request(void) - qemu_sem_wait(&migrate_get_current()->rate_limit_sem); - } - -+/* Returns true if the rate limiting was broken by an urgent request */ -+bool migration_rate_limit(void) -+{ -+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ MigrationState *s = migrate_get_current(); -+ -+ bool urgent = false; -+ migration_update_counters(s, now); -+ if (qemu_file_rate_limit(s->to_dst_file)) { -+ /* -+ * Wait for a delay to do rate limiting OR -+ * something urgent to post the semaphore. -+ */ -+ int ms = s->iteration_start_time + BUFFER_DELAY - now; -+ trace_migration_rate_limit_pre(ms); -+ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -+ /* -+ * We were woken by one or more urgent things but -+ * the timedwait will have consumed one of them. -+ * The service routine for the urgent wake will dec -+ * the semaphore itself for each item it consumes, -+ * so add this one we just eat back. -+ */ -+ qemu_sem_post(&s->rate_limit_sem); -+ urgent = true; -+ } -+ trace_migration_rate_limit_post(urgent); -+ } -+ return urgent; -+} -+ - /* - * Master migration thread on the source VM. - * It drives the migration and pumps the data down the outgoing channel. -@@ -3313,8 +3344,6 @@ static void *migration_thread(void *opaque) - trace_migration_thread_setup_complete(); - - while (migration_is_active(s)) { -- int64_t current_time; -- - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { - MigIterateState iter_state = migration_iteration_run(s); - if (iter_state == MIG_ITERATE_SKIP) { -@@ -3341,29 +3370,7 @@ static void *migration_thread(void *opaque) - update_iteration_initial_status(s); - } - -- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -- -- migration_update_counters(s, current_time); -- -- urgent = false; -- if (qemu_file_rate_limit(s->to_dst_file)) { -- /* Wait for a delay to do rate limiting OR -- * something urgent to post the semaphore. -- */ -- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; -- trace_migration_thread_ratelimit_pre(ms); -- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -- /* We were worken by one or more urgent things but -- * the timedwait will have consumed one of them. -- * The service routine for the urgent wake will dec -- * the semaphore itself for each item it consumes, -- * so add this one we just eat back. -- */ -- qemu_sem_post(&s->rate_limit_sem); -- urgent = true; -- } -- trace_migration_thread_ratelimit_post(urgent); -- } -+ urgent = migration_rate_limit(); - } - - trace_migration_thread_after_loop(); -diff --git a/migration/migration.h b/migration/migration.h -index 4aa72297fc..ff8a0bf12d 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -345,6 +345,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); - - void migration_make_urgent_request(void); - void migration_consume_urgent_request(void); -+bool migration_rate_limit(void); - - int migration_send_initial_packet(QIOChannel *c, uint8_t id, Error **errp); - int migration_recv_initial_packet(QIOChannel *c, Error **errp); -diff --git a/migration/ram.c b/migration/ram.c -index 27585a4f3e..d6657a8093 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3076,6 +3076,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - } - - pss->page++; -+ /* Allow rate limiting to happen in the middle of huge pages */ -+ migration_rate_limit(); - } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); - -diff --git a/migration/trace-events b/migration/trace-events -index c0640cd424..b4d85229d9 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -131,12 +131,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 - migration_completion_file_err(void) "" - migration_completion_postcopy_end(void) "" - migration_completion_postcopy_end_after_complete(void) "" -+migration_rate_limit_pre(int ms) "%d ms" -+migration_rate_limit_post(int urgent) "urgent: %d" - migration_return_path_end_before(void) "" - migration_return_path_end_after(int rp_error) "%d" - migration_thread_after_loop(void) "" - migration_thread_file_err(void) "" --migration_thread_ratelimit_pre(int ms) "%d ms" --migration_thread_ratelimit_post(int urgent) "urgent: %d" - migration_thread_setup_complete(void) "" - open_return_path_on_source(void) "" - open_return_path_on_source_continue(void) "" --- -2.27.0 - diff --git a/migration-Refactoring-multi-thread-compress-migratio.patch b/migration-Refactoring-multi-thread-compress-migratio.patch index d3ab4d0d97a8a1cf1c945a38b97b47a33277ab9b..7f2a5acbd929af6feaeb19c57d612730bf9f36f1 100644 --- a/migration-Refactoring-multi-thread-compress-migratio.patch +++ b/migration-Refactoring-multi-thread-compress-migratio.patch @@ -1,5 +1,5 @@ -From 524d8cee48006918cf181f2817e4ec3ce5a3bb12 Mon Sep 17 00:00:00 2001 -From: Zeyu Jin +From cf6f31249817380e91cbc4e55b189216645fac18 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng Date: Sat, 30 Jan 2021 15:21:17 +0800 Subject: [PATCH] migration: Refactoring multi-thread compress migration @@ -13,19 +13,53 @@ with qemu-file. Besides, the decompression code is located at ram.c only. Wrap the input into the param structure which already exists. This change also makes the function much more flexible for other compression methods. +Signed-off-by: Chuan Zheng Signed-off-by: Zeyu Jin Signed-off-by: Ying Fang --- - migration/qemu-file.c | 78 ++++++--------------------------------- - migration/qemu-file.h | 4 +- - migration/ram.c | 85 +++++++++++++++++++++++++++++++------------ - 3 files changed, 75 insertions(+), 92 deletions(-) + migration/meson.build | 4 +- + migration/migration-hmp-cmds.c | 1 - + migration/qemu-file.c | 61 +++++------------------- + migration/qemu-file.h | 4 +- + migration/ram-compress.c | 87 ++++++++++++++++++++++++---------- + 5 files changed, 77 insertions(+), 80 deletions(-) +diff --git a/migration/meson.build b/migration/meson.build +index 92b1cc4297..d9b46ef0df 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,7 +22,6 @@ system_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', +- 'ram-compress.c', + 'options.c', + 'postcopy-ram.c', + 'savevm.c', +@@ -43,4 +42,5 @@ system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +- 'target.c')) ++ 'target.c', ++ 'ram-compress.c')) +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 261ec1e35c..1fa6a5f478 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -22,7 +22,6 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qdict.h" +-#include "qapi/qapi-visit-migration.h" + #include "qapi/string-input-visitor.h" + #include "qapi/string-output-visitor.h" + #include "qemu/cutils.h" diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index be0d6c8ca8..3bba694ed4 100644 +index 94231ff295..bd1dbc3db1 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c -@@ -695,72 +695,6 @@ uint64_t qemu_get_be64(QEMUFile *f) +@@ -669,55 +669,6 @@ uint64_t qemu_get_be64(QEMUFile *f) return v; } @@ -56,11 +90,8 @@ index be0d6c8ca8..3bba694ed4 100644 -/* Compress size bytes of data start at p and store the compressed - * data to the buffer of f. - * -- * When f is not writable, return -1 if f has no space to save the -- * compressed data. -- * When f is wirtable and it has no space to save the compressed data, -- * do fflush first, if f still has no space to save the compressed -- * data, return -1. +- * Since the file is dummy file with empty_ops, return -1 if f has no space to +- * save the compressed data. - */ -ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, - const uint8_t *p, size_t size) @@ -68,14 +99,7 @@ index be0d6c8ca8..3bba694ed4 100644 - ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t); - - if (blen < compressBound(size)) { -- if (!qemu_file_is_writable(f)) { -- return -1; -- } -- qemu_fflush(f); -- blen = IO_BUF_SIZE - sizeof(int32_t); -- if (blen < compressBound(size)) { -- return -1; -- } +- return -1; - } - - blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t), @@ -85,22 +109,15 @@ index be0d6c8ca8..3bba694ed4 100644 - } - - qemu_put_be32(f, blen); -- if (f->ops->writev_buffer) { -- add_to_iovec(f, f->buf + f->buf_index, blen, false); -- } -- f->buf_index += blen; -- if (f->buf_index == IO_BUF_SIZE) { -- qemu_fflush(f); -- } +- add_buf_to_iovec(f, blen); - return blen + sizeof(int32_t); -} -- + /* Put the data in the buffer of f_src to the buffer of f_des, and * then reset the buf_index of f_src to 0. - */ -@@ -820,3 +754,15 @@ void qemu_file_set_blocking(QEMUFile *f, bool block) - f->ops->set_blocking(f->opaque, block); - } +@@ -834,3 +785,15 @@ int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size) + + return 0; } + +ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr) @@ -115,66 +132,78 @@ index be0d6c8ca8..3bba694ed4 100644 + add_buf_to_iovec(f, v); +} diff --git a/migration/qemu-file.h b/migration/qemu-file.h -index 5de9fa2e96..6570e53e13 100644 +index 8aec9fabf7..8afa95732b 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h -@@ -134,8 +134,6 @@ bool qemu_file_is_writable(QEMUFile *f); +@@ -54,8 +54,8 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, - size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); - size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); + size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); + size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); -ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, - const uint8_t *p, size_t size); ++ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr); ++void qemu_put_compress_end(QEMUFile *f, unsigned int v); int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + bool qemu_file_buffer_empty(QEMUFile *file); - /* -@@ -162,6 +160,8 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t flags); - void ram_control_after_iterate(QEMUFile *f, uint64_t flags); - void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index fa4388f6a6..2be344acbc 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -28,7 +28,6 @@ -+ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr); -+void qemu_put_compress_end(QEMUFile *f, unsigned int v); - /* Whenever this is found in the data stream, the flags - * will be passed to ram_control_load_hook in the incoming-migration - * side. This lets before_ram_iterate/after_ram_iterate add -diff --git a/migration/ram.c b/migration/ram.c -index 92ce1a53e7..f78a681ca2 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -449,26 +449,22 @@ static QemuThread *decompress_threads; + #include "qemu/osdep.h" + #include "qemu/cutils.h" +- + #include "ram-compress.h" + + #include "qemu/error-report.h" +@@ -40,6 +39,7 @@ + #include "exec/ramblock.h" + #include "ram.h" + #include "migration-stats.h" ++#include "exec/ram_addr.h" + + static struct { + int64_t pages; +@@ -83,28 +83,22 @@ static QemuThread *decompress_threads; static QemuMutex decomp_done_lock; static QemuCond decomp_done_cond; --static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, -- ram_addr_t offset, uint8_t *source_buf); -+static bool do_compress_ram_page(CompressParam *param, RAMBlock *block); +-static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, +- RAMBlock *block, ram_addr_t offset, +- uint8_t *source_buf); ++static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block); static void *do_data_compress(void *opaque) { CompressParam *param = opaque; RAMBlock *block; - ram_addr_t offset; - bool zero_page; + CompressResult result; qemu_mutex_lock(¶m->mutex); while (!param->quit) { - if (param->block) { + if (param->trigger) { block = param->block; - offset = param->offset; - param->block = NULL; + param->trigger = false; qemu_mutex_unlock(¶m->mutex); -- zero_page = do_compress_ram_page(param->file, ¶m->stream, -- block, offset, param->originbuf); -+ zero_page = do_compress_ram_page(param, block); - +- result = do_compress_ram_page(param->file, ¶m->stream, +- block, offset, param->originbuf); +- ++ result = do_compress_ram_page(param, block); qemu_mutex_lock(&comp_done_lock); param->done = true; -@@ -2212,28 +2208,73 @@ static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, - return 1; + param->result = result; +@@ -204,15 +198,57 @@ exit: + return -1; } --static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, -- ram_addr_t offset, uint8_t *source_buf) +-static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, +- RAMBlock *block, ram_addr_t offset, +- uint8_t *source_buf) +/* + * Compress size bytes of data start at p and store the compressed + * data to the buffer of f. @@ -219,37 +248,35 @@ index 92ce1a53e7..f78a681ca2 100644 + return blen + sizeof(int32_t); +} + -+static bool do_compress_ram_page(CompressParam *param, RAMBlock *block) ++static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block) { - RAMState *rs = ram_state; -+ ram_addr_t offset = param->offset; - uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); - bool zero_page = false; +- uint8_t *p = block->host + offset; ++ uint8_t *p = block->host + (param->offset & TARGET_PAGE_MASK); + size_t page_size = qemu_target_page_size(); int ret; -- if (save_zero_page_to_file(rs, f, block, offset)) { -+ if (save_zero_page_to_file(rs, param->file, block, offset)) { - zero_page = true; - goto exit; - } +- assert(qemu_file_buffer_empty(f)); ++ assert(qemu_file_buffer_empty(param->file)); -- save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); -+ save_page_header(rs, param->file, block, -+ offset | RAM_SAVE_FLAG_COMPRESS_PAGE); - - /* - * copy it to a internal buffer to avoid it being modified by VM + if (buffer_is_zero(p, page_size)) { + return RES_ZEROPAGE; +@@ -223,12 +259,12 @@ static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, * so that we can catch up the error during compression and * decompression */ -- memcpy(source_buf, p, TARGET_PAGE_SIZE); -- ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE); -+ memcpy(param->originbuf, p, TARGET_PAGE_SIZE); -+ ret = qemu_put_compression_data(param, TARGET_PAGE_SIZE); +- memcpy(source_buf, p, page_size); +- ret = qemu_put_compression_data(f, stream, source_buf, page_size); ++ memcpy(param->originbuf, p, page_size); ++ ret = qemu_put_compression_data(param, page_size); if (ret < 0) { qemu_file_set_error(migrate_get_current()->to_dst_file, ret); error_report("compressed data failed!"); -@@ -3926,19 +3967,20 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) +- qemu_fflush(f); ++ qemu_fflush(param->file); + return RES_NONE; + } + return RES_COMPRESS; +@@ -322,19 +358,20 @@ bool compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, /* return the size after decompression, or negative value on error */ static int @@ -275,11 +302,9 @@ index 92ce1a53e7..f78a681ca2 100644 stream->next_out = dest; err = inflate(stream, Z_NO_FLUSH); -@@ -3952,22 +3994,17 @@ qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, - static void *do_data_decompress(void *opaque) - { +@@ -350,20 +387,18 @@ static void *do_data_decompress(void *opaque) DecompressParam *param = opaque; -- unsigned long pagesize; + unsigned long pagesize; uint8_t *des; - int len, ret; + int ret; @@ -292,11 +317,11 @@ index 92ce1a53e7..f78a681ca2 100644 param->des = 0; qemu_mutex_unlock(¶m->mutex); -- pagesize = TARGET_PAGE_SIZE; -- + pagesize = qemu_target_page_size(); + - ret = qemu_uncompress_data(¶m->stream, des, pagesize, - param->compbuf, len); -+ ret = qemu_uncompress_data(param, des, TARGET_PAGE_SIZE); ++ ret = qemu_uncompress_data(param, des, pagesize); if (ret < 0 && migrate_get_current()->decompress_error_check) { error_report("decompress data failed"); qemu_file_set_error(decomp_file, ret); diff --git a/migration-Report-error-in-incoming-migration.patch b/migration-Report-error-in-incoming-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..a12cced44a2fd30f01d64de8cdc920925180a908 --- /dev/null +++ b/migration-Report-error-in-incoming-migration.patch @@ -0,0 +1,40 @@ +From 8235f51444f1147a36733474278476d7de83d545 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:41 -0300 +Subject: [10/99] migration: Report error in incoming migration + +commit e3b8ad5c13714cca5e3fc1445472171fbcd469bc upstream. + +We're not currently reporting the errors set with migrate_set_error() +when incoming migration fails. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-5-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 5829565f9c..2c5258d0b0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -698,6 +698,13 @@ process_incoming_migration_co(void *opaque) + } + + if (ret < 0) { ++ MigrationState *s = migrate_get_current(); ++ ++ if (migrate_has_error(s)) { ++ WITH_QEMU_LOCK_GUARD(&s->error_mutex) { ++ error_report_err(s->error); ++ } ++ } + error_report("load of migration failed: %s", strerror(-ret)); + goto fail; + } +-- +2.33.0 + diff --git a/migration-Skip-only-empty-block-devicesi.patch b/migration-Skip-only-empty-block-devicesi.patch new file mode 100644 index 0000000000000000000000000000000000000000..6787e2920d09b68bf6eb42dc58517a26e0bbae1d --- /dev/null +++ b/migration-Skip-only-empty-block-devicesi.patch @@ -0,0 +1,86 @@ +From 4506b31c0fff0b7a69ec4c7e264715ed70df75a8 Mon Sep 17 00:00:00 2001 +From: gaojiazhen +Date: Mon, 25 Mar 2024 22:13:43 +0800 +Subject: [PATCH] migration: Skip only empty block devicesi +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 2e128776dc56f502c2ee41750afe83938f389528 + +The block .save_setup() handler calls a helper routine +init_blk_migration() which builds a list of block devices to take into +account for migration. When one device is found to be empty (sectors +== 0), the loop exits and all the remaining devices are ignored. This +is a regression introduced when bdrv_iterate() was removed. + +Change that by skipping only empty devices. + +Cc: Markus Armbruster +Cc: qemu-stable +Suggested-by: Kevin Wolf +Fixes: fea68bb ("block: Eliminate bdrv_iterate(), use bdrv_next()") +Signed-off-by: Cédric Le Goater +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Link: https://lore.kernel.org/r/20240312120431.550054-1-clg@redhat.com +[peterx: fix "Suggested-by:"] +Signed-off-by: Peter Xu +Signed-off-by: Gao Jiazhen +--- + migration/block.c | 5 ++++- + tests/qemu-iotests/198.out | 2 -- + tests/qemu-iotests/206.out | 1 - + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index a15f9bddcb..710ef6f490 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -409,7 +409,10 @@ static int init_blk_migration(QEMUFile *f) + } + + sectors = bdrv_nb_sectors(bs); +- if (sectors <= 0) { ++ if (sectors == 0) { ++ continue; ++ } ++ if (sectors < 0) { + ret = sectors; + bdrv_next_cleanup(&it); + goto out; +diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out +index 62fb73fa3e..805494916f 100644 +--- a/tests/qemu-iotests/198.out ++++ b/tests/qemu-iotests/198.out +@@ -39,7 +39,6 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +@@ -85,7 +84,6 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out +index 979f00f9bf..7e95694777 100644 +--- a/tests/qemu-iotests/206.out ++++ b/tests/qemu-iotests/206.out +@@ -114,7 +114,6 @@ Format specific information: + refcount bits: 16 + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha1 + cipher alg: aes-128 + uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +-- +2.27.0 + diff --git a/migration-add-qemu_file_update_transfer-interface.patch b/migration-add-qemu_file_update_transfer-interface.patch deleted file mode 100644 index 4222fd0adb202051cd57a3f0cab01e5ad52f8248..0000000000000000000000000000000000000000 --- a/migration-add-qemu_file_update_transfer-interface.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 7572495245a437da717e6829a9ce852cc3f229c9 Mon Sep 17 00:00:00 2001 -From: Zheng Chuan -Date: Mon, 20 Apr 2020 15:13:47 +0800 -Subject: [PATCH 02/10] migration: add qemu_file_update_transfer interface - -Add qemu_file_update_transfer for just update bytes_xfer for speed -limitation. This will be used for further migration feature such as -multifd migration. - -Change-Id: I969aa15305c961254b6fb9805b0ed2d65826cc5d -Signed-off-by: Ivan Ren -Reviewed-by: Wei Yang -Reviewed-by: Juan Quintela -Message-Id: <1564464816-21804-2-git-send-email-ivanren@tencent.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/qemu-file.c | 5 +++++ - migration/qemu-file.h | 1 + - 2 files changed, 6 insertions(+) - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 04315855..18f48052 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -615,6 +615,11 @@ void qemu_file_reset_rate_limit(QEMUFile *f) - f->bytes_xfer = 0; - } - -+void qemu_file_update_transfer(QEMUFile *f, int64_t len) -+{ -+ f->bytes_xfer += len; -+} -+ - void qemu_put_be16(QEMUFile *f, unsigned int v) - { - qemu_put_byte(f, v >> 8); -diff --git a/migration/qemu-file.h b/migration/qemu-file.h -index 13baf896..5de9fa2e 100644 ---- a/migration/qemu-file.h -+++ b/migration/qemu-file.h -@@ -147,6 +147,7 @@ int qemu_peek_byte(QEMUFile *f, int offset); - void qemu_file_skip(QEMUFile *f, int size); - void qemu_update_position(QEMUFile *f, size_t size); - void qemu_file_reset_rate_limit(QEMUFile *f); -+void qemu_file_update_transfer(QEMUFile *f, int64_t len); - void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); - int64_t qemu_file_get_rate_limit(QEMUFile *f); - void qemu_file_set_error(QEMUFile *f, int ret); --- -2.19.1 diff --git a/migration-add-speed-limit-for-multifd-migration.patch b/migration-add-speed-limit-for-multifd-migration.patch deleted file mode 100644 index 690d9c9cf095cafdb2ff18025d70ee57a2527de2..0000000000000000000000000000000000000000 --- a/migration-add-speed-limit-for-multifd-migration.patch +++ /dev/null @@ -1,127 +0,0 @@ -From bc5780480db9e38699df0b4697e60a9f36258dc4 Mon Sep 17 00:00:00 2001 -From: Ivan Ren -Date: Tue, 30 Jul 2019 13:33:35 +0800 -Subject: [PATCH 03/10] migration: add speed limit for multifd migration - -Limit the speed of multifd migration through common speed limitation -qemu file. - -Change-Id: Id2abfc7ea85679bd53130a43043cc70179a52e87 -Signed-off-by: Ivan Ren -Message-Id: <1564464816-21804-3-git-send-email-ivanren@tencent.com> -Reviewed-by: Wei Yang -Reviewed-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 22 ++++++++++++---------- - 1 file changed, 12 insertions(+), 10 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 889148dd..88ddd2bb 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -922,7 +922,7 @@ struct { - * false. - */ - --static int multifd_send_pages(void) -+static int multifd_send_pages(RAMState *rs) - { - int i; - static int next_channel; -@@ -954,6 +954,7 @@ static int multifd_send_pages(void) - multifd_send_state->pages = p->pages; - p->pages = pages; - transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; -+ qemu_file_update_transfer(rs->f, transferred); - ram_counters.multifd_bytes += transferred; - ram_counters.transferred += transferred;; - qemu_mutex_unlock(&p->mutex); -@@ -962,7 +963,7 @@ static int multifd_send_pages(void) - return 1; - } - --static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) -+static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) - { - MultiFDPages_t *pages = multifd_send_state->pages; - -@@ -981,12 +982,12 @@ static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) - } - } - -- if (multifd_send_pages() < 0) { -+ if (multifd_send_pages(rs) < 0) { - return -1; - } - - if (pages->block != block) { -- return multifd_queue_page(block, offset); -+ return multifd_queue_page(rs, block, offset); - } - - return 1; -@@ -1054,7 +1055,7 @@ void multifd_save_cleanup(void) - multifd_send_state = NULL; - } - --static void multifd_send_sync_main(void) -+static void multifd_send_sync_main(RAMState *rs) - { - int i; - -@@ -1062,7 +1063,7 @@ static void multifd_send_sync_main(void) - return; - } - if (multifd_send_state->pages->used) { -- if (multifd_send_pages() < 0) { -+ if (multifd_send_pages(rs) < 0) { - error_report("%s: multifd_send_pages fail", __func__); - return; - } -@@ -1083,6 +1084,7 @@ static void multifd_send_sync_main(void) - p->packet_num = multifd_send_state->packet_num++; - p->flags |= MULTIFD_FLAG_SYNC; - p->pending_job++; -+ qemu_file_update_transfer(rs->f, p->packet_len); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - } -@@ -2079,7 +2081,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) - static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, - ram_addr_t offset) - { -- if (multifd_queue_page(block, offset) < 0) { -+ if (multifd_queue_page(rs, block, offset) < 0) { - return -1; - } - ram_counters.normal++; -@@ -3482,7 +3484,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - ram_control_before_iterate(f, RAM_CONTROL_SETUP); - ram_control_after_iterate(f, RAM_CONTROL_SETUP); - -- multifd_send_sync_main(); -+ multifd_send_sync_main(*rsp); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -3570,7 +3572,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- multifd_send_sync_main(); -+ multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - ram_counters.transferred += 8; -@@ -3629,7 +3631,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - - rcu_read_unlock(); - -- multifd_send_sync_main(); -+ multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - --- -2.19.1 diff --git a/migration-add-support-to-migrate-shared-regions-list.patch b/migration-add-support-to-migrate-shared-regions-list.patch new file mode 100644 index 0000000000000000000000000000000000000000..f17b7c560fc7512d9ddf15d8d26d1e60d3d9f27e --- /dev/null +++ b/migration-add-support-to-migrate-shared-regions-list.patch @@ -0,0 +1,120 @@ +From 0f85e3a486c2d0130cb3be322900aa839d77d4bd Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 16:31:36 +0000 +Subject: [PATCH] migration: add support to migrate shared regions list + +cherry-picked from https://github.com/AMDESE/qemu/commit/9236f522e48b6. + +When memory encryption is enabled, the hypervisor maintains a shared +regions list which is referred by hypervisor during migration to check +if page is private or shared. This list is built during the VM bootup and +must be migrated to the target host so that hypervisor on target host can +use it for future migration. + +Signed-off-by: Brijesh Singh +Co-developed-by: Ashish Kalra +Signed-off-by: Ashish Kalra +[ Fix conflicts. ] +Signed-off-by: hanliyang +--- + include/exec/confidential-guest-support.h | 2 +- + target/i386/sev.c | 45 +++++++++++++++++++++++ + target/i386/sev.h | 2 + + 3 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index 343f686fc2..dd4887f65f 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -73,7 +73,7 @@ struct ConfidentialGuestMemoryEncryptionOps { + bool (*is_gfn_in_unshared_region)(unsigned long gfn); + + /* Write the shared regions list */ +- int (*save_outgoing_shared_regions_list)(QEMUFile *f); ++ int (*save_outgoing_shared_regions_list)(QEMUFile *f, uint64_t *bytes_sent); + + /* Load the shared regions list */ + int (*load_incoming_shared_regions_list)(QEMUFile *f); +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 8525a7351f..92aedf0503 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -176,10 +176,15 @@ static const char *const sev_fw_errlist[] = { + + #define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */ + ++#define SHARED_REGION_LIST_CONT 0x1 ++#define SHARED_REGION_LIST_END 0x2 ++ + static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_setup = sev_save_setup, + .save_outgoing_page = sev_save_outgoing_page, + .load_incoming_page = sev_load_incoming_page, ++ .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, ++ .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, + }; + + static int +@@ -1777,6 +1782,46 @@ int sev_add_shared_regions_list(unsigned long start, unsigned long end) + return 1; + } + ++int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent) ++{ ++ SevGuestState *s = sev_guest; ++ struct shared_region *pos; ++ ++ QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { ++ qemu_put_be32(f, SHARED_REGION_LIST_CONT); ++ qemu_put_be32(f, pos->gfn_start); ++ qemu_put_be32(f, pos->gfn_end); ++ *bytes_sent += 12; ++ } ++ ++ qemu_put_be32(f, SHARED_REGION_LIST_END); ++ *bytes_sent += 4; ++ return 0; ++} ++ ++int sev_load_incoming_shared_regions_list(QEMUFile *f) ++{ ++ SevGuestState *s = sev_guest; ++ struct shared_region *shrd_region; ++ int status; ++ ++ status = qemu_get_be32(f); ++ while (status == SHARED_REGION_LIST_CONT) { ++ ++ shrd_region = g_malloc0(sizeof(*shrd_region)); ++ if (!shrd_region) { ++ return 0; ++ } ++ shrd_region->gfn_start = qemu_get_be32(f); ++ shrd_region->gfn_end = qemu_get_be32(f); ++ ++ QTAILQ_INSERT_TAIL(&s->shared_regions_list, shrd_region, list); ++ ++ status = qemu_get_be32(f); ++ } ++ return 0; ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +diff --git a/target/i386/sev.h b/target/i386/sev.h +index acf69d4e6f..5b4231c859 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -64,6 +64,8 @@ void sev_es_set_reset_vector(CPUState *cpu); + int sev_remove_shared_regions_list(unsigned long gfn_start, + unsigned long gfn_end); + int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end); ++int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent); ++int sev_load_incoming_shared_regions_list(QEMUFile *f); + + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); + +-- +2.41.0.windows.1 + diff --git a/migration-always-initialise-ram_counters-for-a-new-m.patch b/migration-always-initialise-ram_counters-for-a-new-m.patch deleted file mode 100644 index ccd0db9ab87204ca41e17c1120618748eb4383fe..0000000000000000000000000000000000000000 --- a/migration-always-initialise-ram_counters-for-a-new-m.patch +++ /dev/null @@ -1,125 +0,0 @@ -From af2aa4f553565ae6b2248204c154748f38ec4746 Mon Sep 17 00:00:00 2001 -From: Ivan Ren -Date: Fri, 2 Aug 2019 18:18:41 +0800 -Subject: [PATCH 01/10] migration: always initialise ram_counters for a new - migration - -This patch fix a multifd migration bug in migration speed calculation, this -problem can be reproduced as follows: -1. start a vm and give a heavy memory write stress to prevent the vm be - successfully migrated to destination -2. begin a migration with multifd -3. migrate for a long time [actually, this can be measured by transferred bytes] -4. migrate cancel -5. begin a new migration with multifd, the migration will directly run into - migration_completion phase - -Reason as follows: - -Migration update bandwidth and s->threshold_size in function -migration_update_counters after BUFFER_DELAY time: - - current_bytes = migration_total_bytes(s); - transferred = current_bytes - s->iteration_initial_bytes; - time_spent = current_time - s->iteration_start_time; - bandwidth = (double)transferred / time_spent; - s->threshold_size = bandwidth * s->parameters.downtime_limit; - -In multifd migration, migration_total_bytes function return -qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes. -s->iteration_initial_bytes will be initialized to 0 at every new migration, -but ram_counters is a global variable, and history migration data will be -accumulated. So if the ram_counters.multifd_bytes is big enough, it may lead -pending_size >= s->threshold_size become false in migration_iteration_run -after the first migration_update_counters. - -Change-Id: Ib153d8676a5b82650bfb1156060e09f0d29f3ac6 -Signed-off-by: Ivan Ren -Reviewed-by: Juan Quintela -Reviewed-by: Wei Yang -Suggested-by: Wei Yang -Message-Id: <1564741121-1840-1-git-send-email-ivanren@tencent.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/migration.c | 25 +++++++++++++++++++------ - migration/savevm.c | 1 + - 2 files changed, 20 insertions(+), 6 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8a607fe1..bea9b1d7 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1908,6 +1908,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - } - - migrate_init(s); -+ /* -+ * set ram_counters memory to zero for a -+ * new migration -+ */ -+ memset(&ram_counters, 0, sizeof(ram_counters)); - - return true; - } -@@ -3025,6 +3030,17 @@ static void migration_calculate_complete(MigrationState *s) - } - } - -+static void update_iteration_initial_status(MigrationState *s) -+{ -+ /* -+ * Update these three fields at the same time to avoid mismatch info lead -+ * wrong speed calculation. -+ */ -+ s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ s->iteration_initial_bytes = migration_total_bytes(s); -+ s->iteration_initial_pages = ram_get_total_transferred_pages(); -+} -+ - static void migration_update_counters(MigrationState *s, - int64_t current_time) - { -@@ -3060,9 +3076,7 @@ static void migration_update_counters(MigrationState *s, - - qemu_file_reset_rate_limit(s->to_dst_file); - -- s->iteration_start_time = current_time; -- s->iteration_initial_bytes = current_bytes; -- s->iteration_initial_pages = ram_get_total_transferred_pages(); -+ update_iteration_initial_status(s); - - trace_migrate_transferred(transferred, time_spent, - bandwidth, s->threshold_size); -@@ -3186,7 +3200,7 @@ static void *migration_thread(void *opaque) - rcu_register_thread(); - - object_ref(OBJECT(s)); -- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ update_iteration_initial_status(s); - - qemu_savevm_state_header(s->to_dst_file); - -@@ -3251,8 +3265,7 @@ static void *migration_thread(void *opaque) - * the local variables. This is important to avoid - * breaking transferred_bytes and bandwidth calculation - */ -- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -- s->iteration_initial_bytes = 0; -+ update_iteration_initial_status(s); - } - - current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -diff --git a/migration/savevm.c b/migration/savevm.c -index 79ed44d4..480c511b 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1424,6 +1424,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - } - - migrate_init(ms); -+ memset(&ram_counters, 0, sizeof(ram_counters)); - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); --- -2.19.1 diff --git a/migration-colo-Fix-bdrv_graph_rdlock_main_loop-Asser.patch b/migration-colo-Fix-bdrv_graph_rdlock_main_loop-Asser.patch new file mode 100644 index 0000000000000000000000000000000000000000..ae3db8e3cdb7d2db24981590092504f1bfc1a89c --- /dev/null +++ b/migration-colo-Fix-bdrv_graph_rdlock_main_loop-Asser.patch @@ -0,0 +1,87 @@ +From 015fc431353ae348e7e9cef2036b674a4e33eb1c Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 15:04:16 +0800 +Subject: [PATCH] =?UTF-8?q?migration/colo:=20Fix=20bdrv=5Fgraph=5Frdlock?= + =?UTF-8?q?=5Fmain=5Floop:=20Assertion=20`!qemu=5Fin=5F=E2=80=A6?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry picked from commit 2cc637f1ea08d2a1b19fc5b1a30bc609f948de93 + +…coroutine()' failed. + +bdrv_activate_all() should not be called from the coroutine context, move +it to the QEMU thread colo_process_incoming_thread() with the bql_lock +protected. + +The backtrace is as follows: + #4 0x0000561af7948362 in bdrv_graph_rdlock_main_loop () at ../block/graph-lock.c:260 + #5 0x0000561af7907a68 in graph_lockable_auto_lock_mainloop (x=0x7fd29810be7b) at /patch/to/qemu/include/block/graph-lock.h:259 + #6 0x0000561af79167d1 in bdrv_activate_all (errp=0x7fd29810bed0) at ../block.c:6906 + #7 0x0000561af762b4af in colo_incoming_co () at ../migration/colo.c:935 + #8 0x0000561af7607e57 in process_incoming_migration_co (opaque=0x0) at ../migration/migration.c:793 + #9 0x0000561af7adbeeb in coroutine_trampoline (i0=-106876144, i1=22042) at ../util/coroutine-ucontext.c:175 + #10 0x00007fd2a5cf21c0 in () at /lib64/libc.so.6 + +Cc: qemu-stable@nongnu.org +Cc: Fabiano Rosas +Closes: https://gitlab.com/qemu-project/qemu/-/issues/2277 +Fixes: 2b3912f ("block: Mark bdrv_first_blk() and bdrv_is_root_node() GRAPH_RDLOCK") +Signed-off-by: Li Zhijian +Reviewed-by: Zhang Chen +Tested-by: Zhang Chen +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240417025634.1014582-1-lizhijian@fujitsu.com +Signed-off-by: Peter Xu +Signed-off-by: Gao Jiazhen +--- + migration/colo.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/migration/colo.c b/migration/colo.c +index 4447e34914..8f301b7e57 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -830,6 +830,16 @@ static void *colo_process_incoming_thread(void *opaque) + return NULL; + } + ++ /* Make sure all file formats throw away their mutable metadata */ ++ qemu_mutex_lock_iothread(); ++ bdrv_activate_all(&local_err); ++ if (local_err) { ++ qemu_mutex_unlock_iothread(); ++ error_report_err(local_err); ++ return NULL; ++ } ++ qemu_mutex_unlock_iothread(); ++ + failover_init_state(); + + mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); +@@ -917,7 +927,6 @@ out: + int coroutine_fn colo_incoming_co(void) + { + MigrationIncomingState *mis = migration_incoming_get_current(); +- Error *local_err = NULL; + QemuThread th; + + assert(qemu_mutex_iothread_locked()); +@@ -926,13 +935,6 @@ int coroutine_fn colo_incoming_co(void) + return 0; + } + +- /* Make sure all file formats throw away their mutable metadata */ +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_report_err(local_err); +- return -EINVAL; +- } +- + qemu_thread_create(&th, "COLO incoming", colo_process_incoming_thread, + mis, QEMU_THREAD_JOINABLE); + +-- +2.41.0.windows.1 + diff --git a/migration-colo-fix-use-after-free-of-local_err.patch b/migration-colo-fix-use-after-free-of-local_err.patch deleted file mode 100644 index c03ceb5120bc3069ac123cc9c2702653c4d2da17..0000000000000000000000000000000000000000 --- a/migration-colo-fix-use-after-free-of-local_err.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 663e9b5f25d22834260a0686f77a27c957cd7b2f Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 24 Mar 2020 18:36:28 +0300 -Subject: [PATCH 07/14] migration/colo: fix use after free of local_err - -local_err is used again in secondary_vm_do_failover() after -replication_stop_all(), so we must zero it. Otherwise try to set -non-NULL local_err will crash. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200324153630.11882-5-vsementsov@virtuozzo.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Peng Liang ---- - migration/colo.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/migration/colo.c b/migration/colo.c -index 9f84b1fa3c0f..761b3544d472 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -89,6 +89,7 @@ static void secondary_vm_do_failover(void) - replication_stop_all(true, &local_err); - if (local_err) { - error_report_err(local_err); -+ local_err = NULL; - } - - /* Notify all filters of all NIC to do checkpoint */ --- -2.26.2 - diff --git a/migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch b/migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch deleted file mode 100644 index 8028a29dd7141ce2fc2e139559c1a54661b31109..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 17b0582ebba622afbd8f454bbee8141ed2785f13 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:21:58 +0800 -Subject: [PATCH] migration/dirtyrate: Add RamblockDirtyInfo to store sampled - page info - -Add RamblockDirtyInfo to store sampled page info of each ramblock. - -Signed-off-by: Chuan Zheng -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: David Edmondson -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-4-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.h | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h -index 84ab9409ac..8707df852d 100644 ---- a/migration/dirtyrate.h -+++ b/migration/dirtyrate.h -@@ -19,10 +19,28 @@ - */ - #define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 - -+/* -+ * Record ramblock idstr -+ */ -+#define RAMBLOCK_INFO_MAX_LEN 256 -+ - struct DirtyRateConfig { - uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ - int64_t sample_period_seconds; /* time duration between two sampling */ - }; - -+/* -+ * Store dirtypage info for each ramblock. -+ */ -+struct RamblockDirtyInfo { -+ char idstr[RAMBLOCK_INFO_MAX_LEN]; /* idstr for each ramblock */ -+ uint8_t *ramblock_addr; /* base address of ramblock we measure */ -+ uint64_t ramblock_pages; /* ramblock size in TARGET_PAGE_SIZE */ -+ uint64_t *sample_page_vfn; /* relative offset address for sampled page */ -+ uint64_t sample_pages_count; /* count of sampled pages */ -+ uint64_t sample_dirty_count; /* count of dirty pages we measure */ -+ uint32_t *hash_result; /* array of hash result for sampled pages */ -+}; -+ - void *get_dirtyrate_thread(void *arg); - #endif --- -2.27.0 - diff --git a/migration-dirtyrate-Add-dirtyrate-statistics-series-.patch b/migration-dirtyrate-Add-dirtyrate-statistics-series-.patch deleted file mode 100644 index 1d56f8029ca9b35243e87984538c93113cd4513a..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Add-dirtyrate-statistics-series-.patch +++ /dev/null @@ -1,93 +0,0 @@ -From d1340703e127c02e9a586143039507ba10d73cfb Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:21:59 +0800 -Subject: [PATCH] migration/dirtyrate: Add dirtyrate statistics series - functions - -Add dirtyrate statistics functions to record/update dirtyrate info. - -Signed-off-by: Chuan Zheng -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-5-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 32 ++++++++++++++++++++++++++++++++ - migration/dirtyrate.h | 12 ++++++++++++ - 2 files changed, 44 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 44a60bf10d..cbb323d6ec 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -23,6 +23,7 @@ - #include "dirtyrate.h" - - static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; -+static struct DirtyRateStat DirtyStat; - - static int dirtyrate_set_state(int *state, int old_state, int new_state) - { -@@ -34,6 +35,37 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) - } - } - -+static void reset_dirtyrate_stat(void) -+{ -+ DirtyStat.total_dirty_samples = 0; -+ DirtyStat.total_sample_count = 0; -+ DirtyStat.total_block_mem_MB = 0; -+ DirtyStat.dirty_rate = -1; -+ DirtyStat.start_time = 0; -+ DirtyStat.calc_time = 0; -+} -+ -+static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) -+{ -+ DirtyStat.total_dirty_samples += info->sample_dirty_count; -+ DirtyStat.total_sample_count += info->sample_pages_count; -+ /* size of total pages in MB */ -+ DirtyStat.total_block_mem_MB += (info->ramblock_pages * -+ TARGET_PAGE_SIZE) >> 20; -+} -+ -+static void update_dirtyrate(uint64_t msec) -+{ -+ uint64_t dirtyrate; -+ uint64_t total_dirty_samples = DirtyStat.total_dirty_samples; -+ uint64_t total_sample_count = DirtyStat.total_sample_count; -+ uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB; -+ -+ dirtyrate = total_dirty_samples * total_block_mem_MB * -+ 1000 / (total_sample_count * msec); -+ -+ DirtyStat.dirty_rate = dirtyrate; -+} - - static void calculate_dirtyrate(struct DirtyRateConfig config) - { -diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h -index 8707df852d..312debca6f 100644 ---- a/migration/dirtyrate.h -+++ b/migration/dirtyrate.h -@@ -42,5 +42,17 @@ struct RamblockDirtyInfo { - uint32_t *hash_result; /* array of hash result for sampled pages */ - }; - -+/* -+ * Store calculation statistics for each measure. -+ */ -+struct DirtyRateStat { -+ uint64_t total_dirty_samples; /* total dirty sampled page */ -+ uint64_t total_sample_count; /* total sampled pages */ -+ uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ -+ int64_t dirty_rate; /* dirty rate in MB/s */ -+ int64_t start_time; /* calculation start time in units of second */ -+ int64_t calc_time; /* time duration of two sampling in units of second */ -+}; -+ - void *get_dirtyrate_thread(void *arg); - #endif --- -2.27.0 - diff --git a/migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch b/migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch deleted file mode 100644 index 79d825c8a34d1229876b03e1ca64e464d7e0d91c..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 8a36332d38c0c0ba6b7d8c096367a4ec7c94e522 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:07 +0800 -Subject: [PATCH] migration/dirtyrate: Add trace_calls to make it easier to - debug - -Add trace_calls to make it easier to debug - -Signed-off-by: Chuan Zheng -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: David Edmondson -Message-Id: <1600237327-33618-13-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 9 +++++++++ - migration/trace-events | 8 ++++++++ - 2 files changed, 17 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 9d9155f8ab..80936a4ca6 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -22,6 +22,7 @@ - #include "qapi/qapi-commands-migration.h" - #include "migration.h" - #include "ram.h" -+#include "trace.h" - #include "dirtyrate.h" - - static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; -@@ -54,6 +55,7 @@ static bool is_sample_period_valid(int64_t sec) - static int dirtyrate_set_state(int *state, int old_state, int new_state) - { - assert(new_state < DIRTY_RATE_STATUS__MAX); -+ trace_dirtyrate_set_state(DirtyRateStatus_str(new_state)); - if (atomic_cmpxchg(state, old_state, new_state) == old_state) { - return 0; - } else { -@@ -76,6 +78,8 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) - info->start_time = DirtyStat.start_time; - info->calc_time = DirtyStat.calc_time; - -+ trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState)); -+ - return info; - } - -@@ -123,6 +127,7 @@ static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, - crc = crc32(0, (info->ramblock_addr + - vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); - -+ trace_get_ramblock_vfn_hash(info->idstr, vfn, crc); - return crc; - } - -@@ -201,6 +206,8 @@ static bool skip_sample_ramblock(RAMBlock *block) - * Sample only blocks larger than MIN_RAMBLOCK_SIZE. - */ - if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { -+ trace_skip_sample_ramblock(block->idstr, -+ qemu_ram_get_used_length(block)); - return true; - } - -@@ -260,6 +267,7 @@ static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) - for (i = 0; i < info->sample_pages_count; i++) { - crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); - if (crc != info->hash_result[i]) { -+ trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]); - info->sample_dirty_count++; - } - } -@@ -285,6 +293,7 @@ find_block_matched(RAMBlock *block, int count, - if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || - infos[i].ramblock_pages != - (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { -+ trace_find_page_matched(block->idstr); - return NULL; - } - -diff --git a/migration/trace-events b/migration/trace-events -index d8e54c367a..69620c43c2 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -296,3 +296,11 @@ dirty_bitmap_load_bits_zeroes(void) "" - dirty_bitmap_load_header(uint32_t flags) "flags 0x%x" - dirty_bitmap_load_enter(void) "" - dirty_bitmap_load_success(void) "" -+ -+# dirtyrate.c -+dirtyrate_set_state(const char *new_state) "new state %s" -+query_dirty_rate_info(const char *new_state) "current state %s" -+get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock name: %s, vfn: %"PRIu64 ", crc: %" PRIu32 -+calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32 -+skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64 -+find_page_matched(const char *idstr) "ramblock %s addr or size changed" --- -2.27.0 - diff --git a/migration-dirtyrate-Compare-page-hash-results-for-re.patch b/migration-dirtyrate-Compare-page-hash-results-for-re.patch deleted file mode 100644 index b9277d5faa65a35cc1d044023683ea37e452fc65..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Compare-page-hash-results-for-re.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 949612c5bbc5414970aed7d7ec9390a058ee2246 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:02 +0800 -Subject: [PATCH] migration/dirtyrate: Compare page hash results for recorded - sampled page - -Compare page hash results for recorded sampled page. - -Signed-off-by: Chuan Zheng -Signed-off-by: YanYing Zhuang -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-8-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 63 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 63 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index f93601f8ab..0412f825dc 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -177,6 +177,69 @@ out: - return ret; - } - -+static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) -+{ -+ uint32_t crc; -+ int i; -+ -+ for (i = 0; i < info->sample_pages_count; i++) { -+ crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); -+ if (crc != info->hash_result[i]) { -+ info->sample_dirty_count++; -+ } -+ } -+} -+ -+static struct RamblockDirtyInfo * -+find_block_matched(RAMBlock *block, int count, -+ struct RamblockDirtyInfo *infos) -+{ -+ int i; -+ struct RamblockDirtyInfo *matched; -+ -+ for (i = 0; i < count; i++) { -+ if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) { -+ break; -+ } -+ } -+ -+ if (i == count) { -+ return NULL; -+ } -+ -+ if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || -+ infos[i].ramblock_pages != -+ (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { -+ return NULL; -+ } -+ -+ matched = &infos[i]; -+ -+ return matched; -+} -+ -+static bool compare_page_hash_info(struct RamblockDirtyInfo *info, -+ int block_count) -+{ -+ struct RamblockDirtyInfo *block_dinfo = NULL; -+ RAMBlock *block = NULL; -+ -+ RAMBLOCK_FOREACH_MIGRATABLE(block) { -+ block_dinfo = find_block_matched(block, block_count, info); -+ if (block_dinfo == NULL) { -+ continue; -+ } -+ calc_page_dirty_rate(block_dinfo); -+ update_dirtyrate_stat(block_dinfo); -+ } -+ -+ if (DirtyStat.total_sample_count == 0) { -+ return false; -+ } -+ -+ return true; -+} -+ - static void calculate_dirtyrate(struct DirtyRateConfig config) - { - /* todo */ --- -2.27.0 - diff --git a/migration-dirtyrate-Fix-segmentation-fault.patch b/migration-dirtyrate-Fix-segmentation-fault.patch new file mode 100644 index 0000000000000000000000000000000000000000..b41a921dd343c553494519e0aaf187e5c089d005 --- /dev/null +++ b/migration-dirtyrate-Fix-segmentation-fault.patch @@ -0,0 +1,36 @@ +From 44b6911233ea62a6a57afd90b259064fac3855ea Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 18 Jun 2024 09:50:38 +0800 +Subject: [PATCH] migration/dirtyrate: Fix segmentation fault + +cheery-pick from e65152d5483b2c847ec7a947ed52650152cfdcc0 + +Since the kvm_dirty_ring_enabled function accesses a null kvm_state +pointer when the KVM acceleration parameter is not specified, running +calc_dirty_rate with the -r or -b option causes a segmentation fault. + +Signed-off-by: Masato Imai +Message-ID: <20240507025010.1968881-1-mii@sfc.wide.ad.jp> +[Assert kvm_state when kvm_dirty_ring_enabled was called to fix it. - Hyman] +Signed-off-by: Hyman Huang +Signed-off-by: qihao_yewu +--- + accel/kvm/kvm-all.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index b791aad1d6..ade7841ca3 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2343,7 +2343,7 @@ bool kvm_vcpu_id_is_valid(int vcpu_id) + + bool kvm_dirty_ring_enabled(void) + { +- return kvm_state->kvm_dirty_ring_size ? true : false; ++ return kvm_state && kvm_state->kvm_dirty_ring_size; + } + + static void query_stats_cb(StatsResultList **result, StatsTarget target, +-- +2.41.0.windows.1 + diff --git a/migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch b/migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch deleted file mode 100644 index 1fcb2c07c2bfd83d91ee582ec79c39d759c8335b..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 18102266fb18c4bfcdd4760e7111ca03a7520588 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:05 +0800 -Subject: [PATCH] migration/dirtyrate: Implement calculate_dirtyrate() function - -Implement calculate_dirtyrate() function. - -Signed-off-by: Chuan Zheng -Signed-off-by: YanYing Zhuang -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-11-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 45 +++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 43 insertions(+), 2 deletions(-) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 485d6467c9..c7a389a527 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -162,6 +162,21 @@ static void get_ramblock_dirty_info(RAMBlock *block, - strcpy(info->idstr, qemu_ram_get_idstr(block)); - } - -+static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count) -+{ -+ int i; -+ -+ if (!infos) { -+ return; -+ } -+ -+ for (i = 0; i < count; i++) { -+ g_free(infos[i].sample_page_vfn); -+ g_free(infos[i].hash_result); -+ } -+ g_free(infos); -+} -+ - static bool skip_sample_ramblock(RAMBlock *block) - { - /* -@@ -287,8 +302,34 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, - - static void calculate_dirtyrate(struct DirtyRateConfig config) - { -- /* todo */ -- return; -+ struct RamblockDirtyInfo *block_dinfo = NULL; -+ int block_count = 0; -+ int64_t msec = 0; -+ int64_t initial_time; -+ -+ rcu_register_thread(); -+ reset_dirtyrate_stat(); -+ rcu_read_lock(); -+ initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { -+ goto out; -+ } -+ rcu_read_unlock(); -+ -+ msec = config.sample_period_seconds * 1000; -+ msec = set_sample_page_period(msec, initial_time); -+ -+ rcu_read_lock(); -+ if (!compare_page_hash_info(block_dinfo, block_count)) { -+ goto out; -+ } -+ -+ update_dirtyrate(msec); -+ -+out: -+ rcu_read_unlock(); -+ free_ramblock_dirty_info(block_dinfo, block_count); -+ rcu_unregister_thread(); - } - - void *get_dirtyrate_thread(void *arg) --- -2.27.0 - diff --git a/migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch b/migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch deleted file mode 100644 index 04893d36e579d9c4b78dfa9d1bd488d3a842cddb..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 1f5f7156988cee6e678eff253df0e79788c950d7 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:06 +0800 -Subject: [PATCH] migration/dirtyrate: Implement - qmp_cal_dirty_rate()/qmp_get_dirty_rate() function - -Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function which could be called - -Signed-off-by: Chuan Zheng -Message-Id: <1600237327-33618-12-git-send-email-zhengchuan@huawei.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert - atomic function fixup - Wording fixup in migration.json based on Eric's review ---- - migration/dirtyrate.c | 62 +++++++++++++++++++++++++++++++++++++++++++ - qapi/migration.json | 50 ++++++++++++++++++++++++++++++++++ - 2 files changed, 112 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index c7a389a527..9d9155f8ab 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -61,6 +61,24 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) - } - } - -+static struct DirtyRateInfo *query_dirty_rate_info(void) -+{ -+ int64_t dirty_rate = DirtyStat.dirty_rate; -+ struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); -+ -+ if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { -+ info->dirty_rate = dirty_rate; -+ } else { -+ info->dirty_rate = -1; -+ } -+ -+ info->status = CalculatingState; -+ info->start_time = DirtyStat.start_time; -+ info->calc_time = DirtyStat.calc_time; -+ -+ return info; -+} -+ - static void reset_dirtyrate_stat(void) - { - DirtyStat.total_dirty_samples = 0; -@@ -318,6 +336,8 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) - - msec = config.sample_period_seconds * 1000; - msec = set_sample_page_period(msec, initial_time); -+ DirtyStat.start_time = initial_time / 1000; -+ DirtyStat.calc_time = msec / 1000; - - rcu_read_lock(); - if (!compare_page_hash_info(block_dinfo, block_count)) { -@@ -353,3 +373,45 @@ void *get_dirtyrate_thread(void *arg) - } - return NULL; - } -+ -+void qmp_calc_dirty_rate(int64_t calc_time, Error **errp) -+{ -+ static struct DirtyRateConfig config; -+ QemuThread thread; -+ int ret; -+ -+ /* -+ * If the dirty rate is already being measured, don't attempt to start. -+ */ -+ if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) { -+ error_setg(errp, "the dirty rate is already being measured."); -+ return; -+ } -+ -+ if (!is_sample_period_valid(calc_time)) { -+ error_setg(errp, "calc-time is out of range[%d, %d].", -+ MIN_FETCH_DIRTYRATE_TIME_SEC, -+ MAX_FETCH_DIRTYRATE_TIME_SEC); -+ return; -+ } -+ -+ /* -+ * Init calculation state as unstarted. -+ */ -+ ret = dirtyrate_set_state(&CalculatingState, CalculatingState, -+ DIRTY_RATE_STATUS_UNSTARTED); -+ if (ret == -1) { -+ error_setg(errp, "init dirty rate calculation state failed."); -+ return; -+ } -+ -+ config.sample_period_seconds = calc_time; -+ config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES; -+ qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread, -+ (void *)&config, QEMU_THREAD_DETACHED); -+} -+ -+struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp) -+{ -+ return query_dirty_rate_info(); -+} -diff --git a/qapi/migration.json b/qapi/migration.json -index fdddde0af7..76f5b42493 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -1462,3 +1462,53 @@ - ## - { 'enum': 'DirtyRateStatus', - 'data': [ 'unstarted', 'measuring', 'measured'] } -+ -+## -+# @DirtyRateInfo: -+# -+# Information about current dirty page rate of vm. -+# -+# @dirty-rate: @dirtyrate describing the dirty page rate of vm -+# in units of MB/s. -+# If this field returns '-1', it means querying has not -+# yet started or completed. -+# -+# @status: status containing dirtyrate query status includes -+# 'unstarted' or 'measuring' or 'measured' -+# -+# @start-time: start time in units of second for calculation -+# -+# @calc-time: time in units of second for sample dirty pages -+# -+# Since: 5.2 -+# -+## -+{ 'struct': 'DirtyRateInfo', -+ 'data': {'dirty-rate': 'int64', -+ 'status': 'DirtyRateStatus', -+ 'start-time': 'int64', -+ 'calc-time': 'int64'} } -+ -+## -+# @calc-dirty-rate: -+# -+# start calculating dirty page rate for vm -+# -+# @calc-time: time in units of second for sample dirty pages -+# -+# Since: 5.2 -+# -+# Example: -+# {"command": "calc-dirty-rate", "data": {"calc-time": 1} } -+# -+## -+{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} } -+ -+## -+# @query-dirty-rate: -+# -+# query dirty page rate in units of MB/s for vm -+# -+# Since: 5.2 -+## -+{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } --- -2.27.0 - diff --git a/migration-dirtyrate-Implement-set_sample_page_period.patch b/migration-dirtyrate-Implement-set_sample_page_period.patch deleted file mode 100644 index fdb9c22431a0d74a4055f01839a42beaa2fa1f51..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Implement-set_sample_page_period.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 905082a502e0600d40e784df2443ae99948cf52d Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:04 +0800 -Subject: [PATCH] migration/dirtyrate: Implement set_sample_page_period() and - is_sample_period_valid() - -Implement is_sample_period_valid() to check if the sample period is vaild and -do set_sample_page_period() to sleep specific time between sample actions. - -Signed-off-by: Chuan Zheng -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: David Edmondson -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-10-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 24 ++++++++++++++++++++++++ - migration/dirtyrate.h | 6 ++++++ - 2 files changed, 30 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 97bb883850..485d6467c9 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -27,6 +27,30 @@ - static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; - static struct DirtyRateStat DirtyStat; - -+static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) -+{ -+ int64_t current_time; -+ -+ current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ if ((current_time - initial_time) >= msec) { -+ msec = current_time - initial_time; -+ } else { -+ g_usleep((msec + initial_time - current_time) * 1000); -+ } -+ -+ return msec; -+} -+ -+static bool is_sample_period_valid(int64_t sec) -+{ -+ if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || -+ sec > MAX_FETCH_DIRTYRATE_TIME_SEC) { -+ return false; -+ } -+ -+ return true; -+} -+ - static int dirtyrate_set_state(int *state, int old_state, int new_state) - { - assert(new_state < DIRTY_RATE_STATUS__MAX); -diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h -index be5b8ec2b1..6ec429534d 100644 ---- a/migration/dirtyrate.h -+++ b/migration/dirtyrate.h -@@ -29,6 +29,12 @@ - */ - #define MIN_RAMBLOCK_SIZE 128 - -+/* -+ * Take 1s as minimum time for calculation duration -+ */ -+#define MIN_FETCH_DIRTYRATE_TIME_SEC 1 -+#define MAX_FETCH_DIRTYRATE_TIME_SEC 60 -+ - struct DirtyRateConfig { - uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ - int64_t sample_period_seconds; /* time duration between two sampling */ --- -2.27.0 - diff --git a/migration-dirtyrate-Record-hash-results-for-each-sam.patch b/migration-dirtyrate-Record-hash-results-for-each-sam.patch deleted file mode 100644 index 5a5a8a9476155c56c740941143b899e80b5a2472..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-Record-hash-results-for-each-sam.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 751dbc44b4ac0e0c0bce2f53d2ee79a6e6318188 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:01 +0800 -Subject: [PATCH] migration/dirtyrate: Record hash results for each sampled - page - -Record hash results for each sampled page, crc32 is taken to calculate -hash results for each sampled length in TARGET_PAGE_SIZE. - -Signed-off-by: Chuan Zheng -Signed-off-by: YanYing Zhuang -Reviewed-by: David Edmondson -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-7-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 109 ++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 109 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 1ccc71077d..f93601f8ab 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -10,6 +10,7 @@ - * See the COPYING file in the top-level directory. - */ - -+#include - #include "qemu/osdep.h" - #include "qapi/error.h" - #include "cpu.h" -@@ -68,6 +69,114 @@ static void update_dirtyrate(uint64_t msec) - DirtyStat.dirty_rate = dirtyrate; - } - -+/* -+ * get hash result for the sampled memory with length of TARGET_PAGE_SIZE -+ * in ramblock, which starts from ramblock base address. -+ */ -+static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, -+ uint64_t vfn) -+{ -+ uint32_t crc; -+ -+ crc = crc32(0, (info->ramblock_addr + -+ vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); -+ -+ return crc; -+} -+ -+static bool save_ramblock_hash(struct RamblockDirtyInfo *info) -+{ -+ unsigned int sample_pages_count; -+ int i; -+ GRand *rand; -+ -+ sample_pages_count = info->sample_pages_count; -+ -+ /* ramblock size less than one page, return success to skip this ramblock */ -+ if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) { -+ return true; -+ } -+ -+ info->hash_result = g_try_malloc0_n(sample_pages_count, -+ sizeof(uint32_t)); -+ if (!info->hash_result) { -+ return false; -+ } -+ -+ info->sample_page_vfn = g_try_malloc0_n(sample_pages_count, -+ sizeof(uint64_t)); -+ if (!info->sample_page_vfn) { -+ g_free(info->hash_result); -+ return false; -+ } -+ -+ rand = g_rand_new(); -+ for (i = 0; i < sample_pages_count; i++) { -+ info->sample_page_vfn[i] = g_rand_int_range(rand, 0, -+ info->ramblock_pages - 1); -+ info->hash_result[i] = get_ramblock_vfn_hash(info, -+ info->sample_page_vfn[i]); -+ } -+ g_rand_free(rand); -+ -+ return true; -+} -+ -+static void get_ramblock_dirty_info(RAMBlock *block, -+ struct RamblockDirtyInfo *info, -+ struct DirtyRateConfig *config) -+{ -+ uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes; -+ -+ /* Right shift 30 bits to calc ramblock size in GB */ -+ info->sample_pages_count = (qemu_ram_get_used_length(block) * -+ sample_pages_per_gigabytes) >> 30; -+ /* Right shift TARGET_PAGE_BITS to calc page count */ -+ info->ramblock_pages = qemu_ram_get_used_length(block) >> -+ TARGET_PAGE_BITS; -+ info->ramblock_addr = qemu_ram_get_host_addr(block); -+ strcpy(info->idstr, qemu_ram_get_idstr(block)); -+} -+ -+static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, -+ struct DirtyRateConfig config, -+ int *block_count) -+{ -+ struct RamblockDirtyInfo *info = NULL; -+ struct RamblockDirtyInfo *dinfo = NULL; -+ RAMBlock *block = NULL; -+ int total_count = 0; -+ int index = 0; -+ bool ret = false; -+ -+ RAMBLOCK_FOREACH_MIGRATABLE(block) { -+ total_count++; -+ } -+ -+ dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo)); -+ if (dinfo == NULL) { -+ goto out; -+ } -+ -+ RAMBLOCK_FOREACH_MIGRATABLE(block) { -+ if (index >= total_count) { -+ break; -+ } -+ info = &dinfo[index]; -+ get_ramblock_dirty_info(block, info, &config); -+ if (!save_ramblock_hash(info)) { -+ goto out; -+ } -+ index++; -+ } -+ ret = true; -+ -+out: -+ *block_count = index; -+ *block_dinfo = dinfo; -+ return ret; -+} -+ - static void calculate_dirtyrate(struct DirtyRateConfig config) - { - /* todo */ --- -2.27.0 - diff --git a/migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch b/migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch deleted file mode 100644 index e0ebb2a70e3771da65a340b081094e63318d42fe..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 466b3eee340f022e53478e706e8d4dc02136b1e1 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:21:57 +0800 -Subject: [PATCH] migration/dirtyrate: add DirtyRateStatus to denote - calculation status - -add DirtyRateStatus to denote calculating status. - -Signed-off-by: Chuan Zheng -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-3-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert - atomic name fixup ---- - migration/dirtyrate.c | 26 ++++++++++++++++++++++++++ - qapi/migration.json | 17 +++++++++++++++++ - 2 files changed, 43 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 29ef663acb..44a60bf10d 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -22,6 +22,19 @@ - #include "migration.h" - #include "dirtyrate.h" - -+static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; -+ -+static int dirtyrate_set_state(int *state, int old_state, int new_state) -+{ -+ assert(new_state < DIRTY_RATE_STATUS__MAX); -+ if (atomic_cmpxchg(state, old_state, new_state) == old_state) { -+ return 0; -+ } else { -+ return -1; -+ } -+} -+ -+ - static void calculate_dirtyrate(struct DirtyRateConfig config) - { - /* todo */ -@@ -31,8 +44,21 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) - void *get_dirtyrate_thread(void *arg) - { - struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; -+ int ret; -+ -+ ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, -+ DIRTY_RATE_STATUS_MEASURING); -+ if (ret == -1) { -+ error_report("change dirtyrate state failed."); -+ return NULL; -+ } - - calculate_dirtyrate(config); - -+ ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, -+ DIRTY_RATE_STATUS_MEASURED); -+ if (ret == -1) { -+ error_report("change dirtyrate state failed."); -+ } - return NULL; - } -diff --git a/qapi/migration.json b/qapi/migration.json -index 9cfbaf8c6c..fdddde0af7 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -1445,3 +1445,20 @@ - # Since: 3.0 - ## - { 'command': 'migrate-pause', 'allow-oob': true } -+ -+## -+# @DirtyRateStatus: -+# -+# An enumeration of dirtyrate status. -+# -+# @unstarted: the dirtyrate thread has not been started. -+# -+# @measuring: the dirtyrate thread is measuring. -+# -+# @measured: the dirtyrate thread has measured and results are available. -+# -+# Since: 5.2 -+# -+## -+{ 'enum': 'DirtyRateStatus', -+ 'data': [ 'unstarted', 'measuring', 'measured'] } --- -2.27.0 - diff --git a/migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch b/migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch deleted file mode 100644 index 16660d7dd46f4ec3712f9714de37c41a3c3554f4..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 1cee10fe37193c6b5ed4e765a2a6d1e6c1411922 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:00 +0800 -Subject: [PATCH] migration/dirtyrate: move RAMBLOCK_FOREACH_MIGRATABLE into - ram.h - -RAMBLOCK_FOREACH_MIGRATABLE is need in dirtyrate measure, -move the existing definition up into migration/ram.h - -Signed-off-by: Chuan Zheng -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: David Edmondson -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-6-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 1 + - migration/ram.c | 11 +---------- - migration/ram.h | 10 ++++++++++ - 3 files changed, 12 insertions(+), 10 deletions(-) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index cbb323d6ec..1ccc71077d 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -20,6 +20,7 @@ - #include "qemu/rcu_queue.h" - #include "qapi/qapi-commands-migration.h" - #include "migration.h" -+#include "ram.h" - #include "dirtyrate.h" - - static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; -diff --git a/migration/ram.c b/migration/ram.c -index 848059d9fb..1a33c7b3e2 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -159,21 +159,12 @@ out: - return ret; - } - --static bool ramblock_is_ignored(RAMBlock *block) -+bool ramblock_is_ignored(RAMBlock *block) - { - return !qemu_ram_is_migratable(block) || - (migrate_ignore_shared() && qemu_ram_is_shared(block)); - } - --/* Should be holding either ram_list.mutex, or the RCU lock. */ --#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ -- INTERNAL_RAMBLOCK_FOREACH(block) \ -- if (ramblock_is_ignored(block)) {} else -- --#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ -- INTERNAL_RAMBLOCK_FOREACH(block) \ -- if (!qemu_ram_is_migratable(block)) {} else -- - #undef RAMBLOCK_FOREACH - - int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) -diff --git a/migration/ram.h b/migration/ram.h -index a788ff0e8e..565ec86b1f 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -37,6 +37,16 @@ extern MigrationStats ram_counters; - extern XBZRLECacheStats xbzrle_counters; - extern CompressionStats compression_counters; - -+bool ramblock_is_ignored(RAMBlock *block); -+/* Should be holding either ram_list.mutex, or the RCU lock. */ -+#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ -+ INTERNAL_RAMBLOCK_FOREACH(block) \ -+ if (ramblock_is_ignored(block)) {} else -+ -+#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ -+ INTERNAL_RAMBLOCK_FOREACH(block) \ -+ if (!qemu_ram_is_migratable(block)) {} else -+ - int xbzrle_cache_resize(int64_t new_size, Error **errp); - uint64_t ram_bytes_remaining(void); - uint64_t ram_bytes_total(void); --- -2.27.0 - diff --git a/migration-dirtyrate-present-dirty-rate-only-when-que.patch b/migration-dirtyrate-present-dirty-rate-only-when-que.patch deleted file mode 100644 index d6d5dd423913c89888c29e530b18009bf1f63f50..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-present-dirty-rate-only-when-que.patch +++ /dev/null @@ -1,69 +0,0 @@ -From ba399ad806d195f31d0b76fa55363a4147459a5b Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Tue, 29 Sep 2020 11:42:18 +0800 -Subject: [PATCH] migration/dirtyrate: present dirty rate only when querying - the rate has completed - -Make dirty_rate field optional, present dirty rate only when querying -the rate has completed. -The qmp results is shown as follow: -@unstarted: -{"return":{"status":"unstarted","start-time":0,"calc-time":0},"id":"libvirt-12"} -@measuring: -{"return":{"status":"measuring","start-time":102931,"calc-time":1},"id":"libvirt-85"} -@measured: -{"return":{"status":"measured","dirty-rate":4,"start-time":150146,"calc-time":1},"id":"libvirt-15"} - -Signed-off-by: Chuan Zheng -Reviewed-by: David Edmondson -Message-Id: <1601350938-128320-3-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 3 +-- - qapi/migration.json | 8 +++----- - 2 files changed, 4 insertions(+), 7 deletions(-) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index f1c007d569..00c8085456 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -69,9 +69,8 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) - struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); - - if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { -+ info->has_dirty_rate = true; - info->dirty_rate = dirty_rate; -- } else { -- info->dirty_rate = -1; - } - - info->status = CalculatingState; -diff --git a/qapi/migration.json b/qapi/migration.json -index 76f5b42493..6844ddfab3 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -1468,10 +1468,8 @@ - # - # Information about current dirty page rate of vm. - # --# @dirty-rate: @dirtyrate describing the dirty page rate of vm --# in units of MB/s. --# If this field returns '-1', it means querying has not --# yet started or completed. -+# @dirty-rate: an estimate of the dirty page rate of the VM in units of -+# MB/s, present only when estimating the rate has completed. - # - # @status: status containing dirtyrate query status includes - # 'unstarted' or 'measuring' or 'measured' -@@ -1484,7 +1482,7 @@ - # - ## - { 'struct': 'DirtyRateInfo', -- 'data': {'dirty-rate': 'int64', -+ 'data': {'*dirty-rate': 'int64', - 'status': 'DirtyRateStatus', - 'start-time': 'int64', - 'calc-time': 'int64'} } --- -2.27.0 - diff --git a/migration-dirtyrate-record-start_time-and-calc_time-.patch b/migration-dirtyrate-record-start_time-and-calc_time-.patch deleted file mode 100644 index a4a4fed2c90eb99565d7712a6565c2284331b29c..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-record-start_time-and-calc_time-.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 5de3e40a6c1a4afcc2612ac109326956e7cded63 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Tue, 29 Sep 2020 11:42:17 +0800 -Subject: [PATCH] migration/dirtyrate: record start_time and calc_time while at - the measuring state - -Querying could include both the start-time and the calc-time while at the measuring -state, allowing a caller to determine when they should expect to come back looking -for a result. - -Signed-off-by: Chuan Zheng -Message-Id: <1601350938-128320-2-git-send-email-zhengchuan@huawei.com> -Reviewed-by: David Edmondson -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 13 +++++++++---- - 1 file changed, 9 insertions(+), 4 deletions(-) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 80936a4ca6..f1c007d569 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -83,14 +83,14 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) - return info; - } - --static void reset_dirtyrate_stat(void) -+static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time) - { - DirtyStat.total_dirty_samples = 0; - DirtyStat.total_sample_count = 0; - DirtyStat.total_block_mem_MB = 0; - DirtyStat.dirty_rate = -1; -- DirtyStat.start_time = 0; -- DirtyStat.calc_time = 0; -+ DirtyStat.start_time = start_time; -+ DirtyStat.calc_time = calc_time; - } - - static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) -@@ -335,7 +335,6 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) - int64_t initial_time; - - rcu_register_thread(); -- reset_dirtyrate_stat(); - rcu_read_lock(); - initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { -@@ -365,6 +364,8 @@ void *get_dirtyrate_thread(void *arg) - { - struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; - int ret; -+ int64_t start_time; -+ int64_t calc_time; - - ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, - DIRTY_RATE_STATUS_MEASURING); -@@ -373,6 +374,10 @@ void *get_dirtyrate_thread(void *arg) - return NULL; - } - -+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; -+ calc_time = config.sample_period_seconds; -+ init_dirtyrate_stat(start_time, calc_time); -+ - calculate_dirtyrate(config); - - ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, --- -2.27.0 - diff --git a/migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch b/migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch deleted file mode 100644 index f4a2b4ff12bf88e0f37211df631867d0ee6f6a6d..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 18dbd0efc14aa190b2f4c364fa614b0994af5af0 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:21:56 +0800 -Subject: [PATCH] migration/dirtyrate: setup up query-dirtyrate framwork - -Add get_dirtyrate_thread() functions to setup query-dirtyrate -framework. - -Signed-off-by: Chuan Zheng -Signed-off-by: YanYing Zhuang -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: David Edmondson -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-2-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - Makefile.target | 1 + - migration/dirtyrate.c | 38 ++++++++++++++++++++++++++++++++++++++ - migration/dirtyrate.h | 28 ++++++++++++++++++++++++++++ - 3 files changed, 67 insertions(+) - create mode 100644 migration/dirtyrate.c - create mode 100644 migration/dirtyrate.h - -diff --git a/Makefile.target b/Makefile.target -index 933b27453a..5ea840964c 100644 ---- a/Makefile.target -+++ b/Makefile.target -@@ -161,6 +161,7 @@ obj-y += qapi/ - obj-y += memory.o - obj-y += memory_mapping.o - obj-y += migration/ram.o -+obj-y += migration/dirtyrate.o - LIBS := $(libs_softmmu) $(LIBS) - - # Hardware support -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -new file mode 100644 -index 0000000000..29ef663acb ---- /dev/null -+++ b/migration/dirtyrate.c -@@ -0,0 +1,38 @@ -+/* -+ * Dirtyrate implement code -+ * -+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. -+ * -+ * Authors: -+ * Chuan Zheng -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "cpu.h" -+#include "qemu/config-file.h" -+#include "exec/memory.h" -+#include "exec/ram_addr.h" -+#include "exec/target_page.h" -+#include "qemu/rcu_queue.h" -+#include "qapi/qapi-commands-migration.h" -+#include "migration.h" -+#include "dirtyrate.h" -+ -+static void calculate_dirtyrate(struct DirtyRateConfig config) -+{ -+ /* todo */ -+ return; -+} -+ -+void *get_dirtyrate_thread(void *arg) -+{ -+ struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; -+ -+ calculate_dirtyrate(config); -+ -+ return NULL; -+} -diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h -new file mode 100644 -index 0000000000..84ab9409ac ---- /dev/null -+++ b/migration/dirtyrate.h -@@ -0,0 +1,28 @@ -+/* -+ * Dirtyrate common functions -+ * -+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD. -+ * -+ * Authors: -+ * Chuan Zheng -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#ifndef QEMU_MIGRATION_DIRTYRATE_H -+#define QEMU_MIGRATION_DIRTYRATE_H -+ -+/* -+ * Sample 512 pages per GB as default. -+ * TODO: Make it configurable. -+ */ -+#define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 -+ -+struct DirtyRateConfig { -+ uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ -+ int64_t sample_period_seconds; /* time duration between two sampling */ -+}; -+ -+void *get_dirtyrate_thread(void *arg); -+#endif --- -2.27.0 - diff --git a/migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch b/migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch deleted file mode 100644 index 3bdb51b5d936d3a51bc9321815f98b742070b3ab..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 91eed005e1af25f49ab38732cd3c9ea8071331b0 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Fri, 30 Oct 2020 11:58:01 +0800 -Subject: [PATCH] migration/dirtyrate: simplify includes in dirtyrate.c - -Remove redundant blank line which is left by Commit 662770af7c6e8c, -also take this opportunity to remove redundant includes in dirtyrate.c. - -Signed-off-by: Chuan Zheng -Message-Id: <1604030281-112946-1-git-send-email-zhengchuan@huawei.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 00c8085456..9a6d0e2cc6 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -10,17 +10,16 @@ - * See the COPYING file in the top-level directory. - */ - --#include - #include "qemu/osdep.h" -+#include - #include "qapi/error.h" - #include "cpu.h" --#include "qemu/config-file.h" - #include "exec/memory.h" - #include "exec/ram_addr.h" - #include "exec/target_page.h" - #include "qemu/rcu_queue.h" -+#include "qemu/error-report.h" - #include "qapi/qapi-commands-migration.h" --#include "migration.h" - #include "ram.h" - #include "trace.h" - #include "dirtyrate.h" --- -2.27.0 - diff --git a/migration-dirtyrate-skip-sampling-ramblock-with-size.patch b/migration-dirtyrate-skip-sampling-ramblock-with-size.patch deleted file mode 100644 index 0e649e3cdef1a3283d1ed0fde909902d5f3274a7..0000000000000000000000000000000000000000 --- a/migration-dirtyrate-skip-sampling-ramblock-with-size.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 0fcff073292e78e08ee24eb784783156b2974f4a Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Wed, 16 Sep 2020 14:22:03 +0800 -Subject: [PATCH] migration/dirtyrate: skip sampling ramblock with size below - MIN_RAMBLOCK_SIZE - -In order to sample real RAM, skip ramblock with size below MIN_RAMBLOCK_SIZE -which is set as 128M. - -Signed-off-by: Chuan Zheng -Reviewed-by: David Edmondson -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Li Qiang -Message-Id: <1600237327-33618-9-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/dirtyrate.c | 21 +++++++++++++++++++++ - migration/dirtyrate.h | 5 +++++ - 2 files changed, 26 insertions(+) - -diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c -index 0412f825dc..97bb883850 100644 ---- a/migration/dirtyrate.c -+++ b/migration/dirtyrate.c -@@ -138,6 +138,18 @@ static void get_ramblock_dirty_info(RAMBlock *block, - strcpy(info->idstr, qemu_ram_get_idstr(block)); - } - -+static bool skip_sample_ramblock(RAMBlock *block) -+{ -+ /* -+ * Sample only blocks larger than MIN_RAMBLOCK_SIZE. -+ */ -+ if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { -+ return true; -+ } -+ -+ return false; -+} -+ - static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, - struct DirtyRateConfig config, - int *block_count) -@@ -150,6 +162,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, - bool ret = false; - - RAMBLOCK_FOREACH_MIGRATABLE(block) { -+ if (skip_sample_ramblock(block)) { -+ continue; -+ } - total_count++; - } - -@@ -159,6 +174,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, - } - - RAMBLOCK_FOREACH_MIGRATABLE(block) { -+ if (skip_sample_ramblock(block)) { -+ continue; -+ } - if (index >= total_count) { - break; - } -@@ -225,6 +243,9 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, - RAMBlock *block = NULL; - - RAMBLOCK_FOREACH_MIGRATABLE(block) { -+ if (skip_sample_ramblock(block)) { -+ continue; -+ } - block_dinfo = find_block_matched(block, block_count, info); - if (block_dinfo == NULL) { - continue; -diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h -index 312debca6f..be5b8ec2b1 100644 ---- a/migration/dirtyrate.h -+++ b/migration/dirtyrate.h -@@ -24,6 +24,11 @@ - */ - #define RAMBLOCK_INFO_MAX_LEN 256 - -+/* -+ * Minimum RAMBlock size to sample, in megabytes. -+ */ -+#define MIN_RAMBLOCK_SIZE 128 -+ - struct DirtyRateConfig { - uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ - int64_t sample_period_seconds; /* time duration between two sampling */ --- -2.27.0 - diff --git a/migration-fix-COLO-broken-caused-by-a-previous-commi.patch b/migration-fix-COLO-broken-caused-by-a-previous-commi.patch deleted file mode 100644 index 3ac65d9c79a43e8233c80d633dc02835e9f8344c..0000000000000000000000000000000000000000 --- a/migration-fix-COLO-broken-caused-by-a-previous-commi.patch +++ /dev/null @@ -1,39 +0,0 @@ -From c635692b4e75db3f9547f6d4ed9d73d1cdb34989 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 14:43:45 +0800 -Subject: [PATCH] migration: fix COLO broken caused by a previous commit - -This commit "migration: Create migration_is_running()" broke -COLO. Becuase there is a process broken by this commit. - -colo_process_checkpoint - ->colo_do_checkpoint_transaction - ->migrate_set_block_enabled - ->qmp_migrate_set_capabilities - -It can be fixed by make COLO process as an exception, -Maybe we need a better way to fix it. - -Cc: Juan Quintela -Signed-off-by: zhanghailiang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/migration.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 923a1d9d3f..0e396f22b4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -833,7 +833,6 @@ bool migration_is_running(int state) - case MIGRATION_STATUS_PRE_SWITCHOVER: - case MIGRATION_STATUS_DEVICE: - case MIGRATION_STATUS_CANCELLING: -- case MIGRATION_STATUS_COLO: - return true; - - default: --- -2.27.0 - diff --git a/migration-fix-cleanup_bh-leak-on-resume.patch b/migration-fix-cleanup_bh-leak-on-resume.patch deleted file mode 100644 index 6b75ed01b8faa4c3d5b9d1e17e6d3d205daa2396..0000000000000000000000000000000000000000 --- a/migration-fix-cleanup_bh-leak-on-resume.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 1d7c227bbb24665cea03f96a984ad6be223ac40c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 25 Mar 2020 19:47:21 +0100 -Subject: [PATCH 2/5] migration: fix cleanup_bh leak on resume -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Since commit 8c6b0356b53977bcfdea5299db07884915425b0c ("util/async: -make bh_aio_poll() O(1)"), migration-test reveals a leak: - -QTEST_QEMU_BINARY=x86_64-softmmu/qemu-system-x86_64 -tests/qtest/migration-test -p /x86_64/migration/postcopy/recovery -tests/qtest/libqtest.c:140: kill_qemu() tried to terminate QEMU -process but encountered exit status 1 (expected 0) - -================================================================= -==2082571==ERROR: LeakSanitizer: detected memory leaks - -Direct leak of 40 byte(s) in 1 object(s) allocated from: - #0 0x7f25971dfc58 in __interceptor_malloc (/lib64/libasan.so.5+0x10dc58) - #1 0x7f2596d08358 in g_malloc (/lib64/libglib-2.0.so.0+0x57358) - #2 0x560970d006f8 in qemu_bh_new /home/elmarco/src/qemu/util/main-loop.c:532 - #3 0x5609704afa02 in migrate_fd_connect -/home/elmarco/src/qemu/migration/migration.c:3407 - #4 0x5609704b6b6f in migration_channel_connect -/home/elmarco/src/qemu/migration/channel.c:92 - #5 0x5609704b2bfb in socket_outgoing_migration -/home/elmarco/src/qemu/migration/socket.c:108 - #6 0x560970b9bd6c in qio_task_complete /home/elmarco/src/qemu/io/task.c:196 - #7 0x560970b9aa97 in qio_task_thread_result -/home/elmarco/src/qemu/io/task.c:111 - #8 0x7f2596cfee3a (/lib64/libglib-2.0.so.0+0x4de3a) - -Signed-off-by: Marc-André Lureau -Message-Id: <20200325184723.2029630-2-marcandre.lureau@redhat.com> -Reviewed-by: Juan Quintela -Signed-off-by: Paolo Bonzini -Signed-off-by: Zhenyu Ye ---- - migration/migration.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8f2fc2b4..7949f2a4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3313,7 +3313,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; - - s->expected_downtime = s->parameters.downtime_limit; -- s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); -+ if (resume) { -+ assert(s->cleanup_bh); -+ } else { -+ assert(!s->cleanup_bh); -+ s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); -+ } - if (error_in) { - migrate_fd_error(s, error_in); - migrate_fd_cleanup(s); --- -2.22.0.windows.1 - diff --git a/migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch b/migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch deleted file mode 100644 index 46775ae5ee200005e5d56f10ccd2c02e75685c7a..0000000000000000000000000000000000000000 --- a/migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch +++ /dev/null @@ -1,78 +0,0 @@ -From d65b5b20f4ada9e6c5af37b0fb59fa4709c4bdc9 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Fri, 5 Mar 2021 16:06:52 +0800 -Subject: [PATCH] migration: fix memory leak in qmp_migrate_set_parameters - -"tmp.tls_hostname" and "tmp.tls_creds" allocated by migrate_params_test_apply() -is forgot to free at the end of qmp_migrate_set_parameters(). Fix that. - -The leak stack: -Direct leak of 2 byte(s) in 2 object(s) allocated from: - #0 0xffffb597c20b in __interceptor_malloc (/usr/lib64/libasan.so.4+0xd320b) - #1 0xffffb52dcb1b in g_malloc (/usr/lib64/libglib-2.0.so.0+0x58b1b) - #2 0xffffb52f8143 in g_strdup (/usr/lib64/libglib-2.0.so.0+0x74143) - #3 0xaaaac52447fb in migrate_params_test_apply (/usr/src/debug/qemu-4.1.0/migration/migration.c:1377) - #4 0xaaaac52fdca7 in qmp_migrate_set_parameters (/usr/src/debug/qemu-4.1.0/qapi/qapi-commands-migration.c:192) - #5 0xaaaac551d543 in qmp_dispatch (/usr/src/debug/qemu-4.1.0/qapi/qmp-dispatch.c:165) - #6 0xaaaac52a0a8f in qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:125) - #7 0xaaaac52a1c7f in monitor_qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:214) - #8 0xaaaac55cb0cf in aio_bh_call (/usr/src/debug/qemu-4.1.0/util/async.c:117) - #9 0xaaaac55d4543 in aio_bh_poll (/usr/src/debug/qemu-4.1.0/util/aio-posix.c:459) - #10 0xaaaac55cae0f in aio_dispatch (/usr/src/debug/qemu-4.1.0/util/async.c:268) - #11 0xffffb52d6a7b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a7b) - #12 0xaaaac55d1e3b(/usr/bin/qemu-kvm-4.1.0+0x1622e3b) - #13 0xaaaac4e314bb(/usr/bin/qemu-kvm-4.1.0+0xe824bb) - #14 0xaaaac47f45ef(/usr/bin/qemu-kvm-4.1.0+0x8455ef) - #15 0xffffb4bfef3f in __libc_start_main (/usr/lib64/libc.so.6+0x23f3f) - #16 0xaaaac47ffacb(/usr/bin/qemu-kvm-4.1.0+0x850acb) - -Direct leak of 2 byte(s) in 2 object(s) allocated from: - #0 0xffffb597c20b in __interceptor_malloc (/usr/lib64/libasan.so.4+0xd320b) - #1 0xffffb52dcb1b in g_malloc (/usr/lib64/libglib-2.0.so.0+0x58b1b) - #2 0xffffb52f8143 in g_strdup (/usr/lib64/libglib-2.0.so.0+0x74143) - #3 0xaaaac5244893 in migrate_params_test_apply (/usr/src/debug/qemu-4.1.0/migration/migration.c:1382) - #4 0xaaaac52fdca7 in qmp_migrate_set_parameters (/usr/src/debug/qemu-4.1.0/qapi/qapi-commands-migration.c:192) - #5 0xaaaac551d543 in qmp_dispatch (/usr/src/debug/qemu-4.1.0/qapi/qmp-dispatch.c) - #6 0xaaaac52a0a8f in qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:125) - #7 0xaaaac52a1c7f in monitor_qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:214) - #8 0xaaaac55cb0cf in aio_bh_call (/usr/src/debug/qemu-4.1.0/util/async.c:117) - #9 0xaaaac55d4543 in aio_bh_poll (/usr/src/debug/qemu-4.1.0/util/aio-posix.c:459) - #10 0xaaaac55cae0f in in aio_dispatch (/usr/src/debug/qemu-4.1.0/util/async.c:268) - #11 0xffffb52d6a7b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a7b) - #12 0xaaaac55d1e3b(/usr/bin/qemu-kvm-4.1.0+0x1622e3b) - #13 0xaaaac4e314bb(/usr/bin/qemu-kvm-4.1.0+0xe824bb) - #14 0xaaaac47f45ef (/usr/bin/qemu-kvm-4.1.0+0x8455ef) - #15 0xffffb4bfef3f in __libc_start_main (/usr/lib64/libc.so.6+0x23f3f) - #16 0xaaaac47ffacb(/usr/bin/qemu-kvm-4.1.0+0x850acb) - -Signed-off-by: Chuan Zheng -Reviewed-by: KeQian Zhu -Reviewed-by: HaiLiang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/migration.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 17a5c16c79..9b40380d7c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1291,12 +1291,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, - - if (params->has_tls_creds) { - assert(params->tls_creds->type == QTYPE_QSTRING); -- dest->tls_creds = g_strdup(params->tls_creds->u.s); -+ dest->tls_creds = params->tls_creds->u.s; - } - - if (params->has_tls_hostname) { - assert(params->tls_hostname->type == QTYPE_QSTRING); -- dest->tls_hostname = g_strdup(params->tls_hostname->u.s); -+ dest->tls_hostname = params->tls_hostname->u.s; - } - - if (params->has_max_bandwidth) { --- -2.27.0 - diff --git a/migration-fix-multifd_send_pages-next-channel.patch b/migration-fix-multifd_send_pages-next-channel.patch deleted file mode 100644 index 4bb113c644c4175386636e02a5d7188e8c2e408c..0000000000000000000000000000000000000000 --- a/migration-fix-multifd_send_pages-next-channel.patch +++ /dev/null @@ -1,50 +0,0 @@ -From c11a23b92334ae86eddfdc2b155d404293891985 Mon Sep 17 00:00:00 2001 -From: alexchen -Date: Tue, 8 Sep 2020 11:18:50 +0000 -Subject: [PATCH 08/11] migration: fix multifd_send_pages() next channel - -multifd_send_pages() loops around the available channels, -the next channel to use between two calls to multifd_send_pages() is stored -inside a local static variable, next_channel. - -It works well, except if the number of channels decreases between two calls -to multifd_send_pages(). In this case, the loop can try to access the -data of a channel that doesn't exist anymore. - -The problem can be triggered if we start a migration with a given number of -channels and then we cancel the migration to restart it with a lower number. -This ends generally with an error like: -qemu-system-ppc64: .../util/qemu-thread-posix.c:77: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed. - -This patch fixes the error by capping next_channel with the current number -of channels before using it. - -Signed-off-by: Laurent Vivier -Message-Id: <20200617113154.593233-1-lvivier@redhat.com> -Reviewed-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: BiaoXiang Ye ---- - migration/ram.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 83cabec6..ac033f22 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -931,6 +931,12 @@ static int multifd_send_pages(RAMState *rs) - uint64_t transferred; - - qemu_sem_wait(&multifd_send_state->channels_ready); -+ /* -+ * next_channel can remain from a previous migration that was -+ * using more channels, so ensure it doesn't overflow if the -+ * limit is lower now. -+ */ -+ next_channel %= migrate_multifd_channels(); - for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { - p = &multifd_send_state->params[i]; - --- -2.27.0.dirty - diff --git a/migration-fix-possible-int-overflow.patch b/migration-fix-possible-int-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..71b7875395c9a61be28a4f7e4d27076290d06e1d --- /dev/null +++ b/migration-fix-possible-int-overflow.patch @@ -0,0 +1,35 @@ +From 254c67a88ab54fdfe1eb55d7efaf4386a9597cd0 Mon Sep 17 00:00:00 2001 +From: tangzhongrui +Date: Sat, 16 Nov 2024 17:38:50 +0800 +Subject: [PATCH] migration: fix-possible-int-overflow + +stat64_add() takes uint64_t as 2nd argument, but both +"p->next_packet_size" and "p->packet_len" are uint32_t. +Thus, theyr sum may overflow uint32_t. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Signed-off-by: Dmitry Frolov +Link: https://lore.kernel.org/r/20241113140509.325732-2-frolov@swemel.ru +Signed-off-by: Peter Xu +Signed-off-by: Zhongrui Tang +--- + migration/multifd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 7d373a245e..f3bf6888c0 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -735,7 +735,7 @@ static void *multifd_send_thread(void *opaque) + } + + stat64_add(&mig_stats.multifd_bytes, +- p->next_packet_size + p->packet_len); ++ (uint64_t)p->next_packet_size + p->packet_len); + p->next_packet_size = 0; + qemu_mutex_lock(&p->mutex); + p->pending_job--; +-- +2.41.0.windows.1 + diff --git a/migration-memory-Optimize-unnecessary-memory-region-.patch b/migration-memory-Optimize-unnecessary-memory-region-.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ff8292479387ade70ea142a0a1fb6d556a98f70 --- /dev/null +++ b/migration-memory-Optimize-unnecessary-memory-region-.patch @@ -0,0 +1,177 @@ +From 0cc093ba0d25536162685a0bd45b80f97d91cf15 Mon Sep 17 00:00:00 2001 +From: libai +Date: Wed, 9 Apr 2025 11:06:52 +0800 +Subject: [PATCH] migration/memory:Optimize unnecessary memory region updates + during live migration + +During the startup phase of the destination VM for live migration, +there is no need to update the memory region in real time. +Instead, just force commit once before each device load state. + +Signed-off-by: libai +--- + include/exec/memory.h | 5 +++++ + migration/savevm.c | 7 ++++++ + migration/vmstate.c | 8 +++++++ + system/memory.c | 45 ++++++++++++++++++++++----------------- + tests/unit/test-vmstate.c | 6 ++++++ + 5 files changed, 51 insertions(+), 20 deletions(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index c14dc69d27..924bdbd481 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2567,6 +2567,11 @@ void memory_region_transaction_begin(void); + */ + void memory_region_transaction_commit(void); + ++/** ++ * memory_region_commit: Force commit memory region immediately. ++ */ ++void memory_region_commit(void); ++ + /** + * memory_listener_register: register callbacks to be called when memory + * sections are mapped or unmapped into an address +diff --git a/migration/savevm.c b/migration/savevm.c +index cc65da605e..030a4bf7d2 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2857,6 +2857,10 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) + uint8_t section_type; + int ret = 0; + ++ if (qemu_mutex_iothread_locked()) { ++ memory_region_transaction_begin(); ++ } ++ + retry: + while (true) { + section_type = qemu_get_byte(f); +@@ -2900,6 +2904,9 @@ retry: + } + + out: ++ if (qemu_mutex_iothread_locked()) { ++ memory_region_transaction_commit(); ++ } + if (ret < 0) { + qemu_file_set_error(f, ret); + +diff --git a/migration/vmstate.c b/migration/vmstate.c +index bd08e390c5..e621d8ddb7 100644 +--- a/migration/vmstate.c ++++ b/migration/vmstate.c +@@ -20,6 +20,7 @@ + #include "qemu/bitops.h" + #include "qemu/error-report.h" + #include "trace.h" ++#include "exec/memory.h" + + static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, + void *opaque, JSONWriter *vmdesc, +@@ -184,6 +185,13 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, + return ret; + } + if (vmsd->post_load) { ++ /** ++ * We call memory_transaction_begin in qemu_loadvm_state_main, ++ * so address space will not be updated during vm state loading. ++ * But some dev need to use address space here, force commit ++ * memory region transaction before call post_load. ++ */ ++ memory_region_commit(); + ret = vmsd->post_load(opaque, version_id); + } + trace_vmstate_load_state_end(vmsd->name, "end", ret); +diff --git a/system/memory.c b/system/memory.c +index 9db07fd832..fd76eb7048 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -1117,34 +1117,39 @@ void memory_region_transaction_begin(void) + ++memory_region_transaction_depth; + } + +-void memory_region_transaction_commit(void) ++void memory_region_commit(void) + { + AddressSpace *as; + ++ if (memory_region_update_pending) { ++ flatviews_reset(); ++ ++ MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); ++ ++ QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { ++ address_space_set_flatview(as); ++ address_space_update_ioeventfds(as); ++ } ++ memory_region_update_pending = false; ++ ioeventfd_update_pending = false; ++ MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); ++ } else if (ioeventfd_update_pending) { ++ QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { ++ address_space_update_ioeventfds(as); ++ } ++ ioeventfd_update_pending = false; ++ } ++} ++ ++void memory_region_transaction_commit(void) ++{ + assert(memory_region_transaction_depth); + assert(qemu_mutex_iothread_locked()); + + --memory_region_transaction_depth; + if (!memory_region_transaction_depth) { +- if (memory_region_update_pending) { +- flatviews_reset(); +- +- MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); +- +- QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { +- address_space_set_flatview(as); +- address_space_update_ioeventfds(as); +- } +- memory_region_update_pending = false; +- ioeventfd_update_pending = false; +- MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); +- } else if (ioeventfd_update_pending) { +- QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { +- address_space_update_ioeventfds(as); +- } +- ioeventfd_update_pending = false; +- } +- } ++ memory_region_commit(); ++ } + } + + static void memory_region_destructor_none(MemoryRegion *mr) +diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c +index 0b7d5ecd68..22c586eee0 100644 +--- a/tests/unit/test-vmstate.c ++++ b/tests/unit/test-vmstate.c +@@ -31,6 +31,7 @@ + #include "../migration/savevm.h" + #include "qemu/module.h" + #include "io/channel-file.h" ++#include "exec/memory.h" + + static int temp_fd; + +@@ -1479,6 +1480,11 @@ static void test_tmp_struct(void) + g_assert_cmpint(obj.f, ==, 8); /* From the child->parent */ + } + ++/* stub for ut */ ++void memory_region_commit(void) ++{ ++} ++ + int main(int argc, char **argv) + { + g_autofree char *temp_file = g_strdup_printf("%s/vmst.test.XXXXXX", +-- +2.41.0.windows.1 + diff --git a/migration-multifd-Add-UADK-based-compression-and-dec.patch b/migration-multifd-Add-UADK-based-compression-and-dec.patch new file mode 100644 index 0000000000000000000000000000000000000000..1ee0de4d3e4dcfa510e94e12b285db9fa079d46d --- /dev/null +++ b/migration-multifd-Add-UADK-based-compression-and-dec.patch @@ -0,0 +1,187 @@ +From 7b83023e2ecc2debc243cd34032cbf143538f26c Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:08 +0100 +Subject: [84/99] migration/multifd: Add UADK based compression and + decompression + +commit 3c49191a0d011d941b347fda8fdadd88c988e753 upstream. + +Uses UADK wd_do_comp_sync() API to (de)compress a normal page using +hardware accelerator. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 132 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 130 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index 535411a405..70bba92eaa 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include "qemu/module.h" + #include "qapi/error.h" ++#include "exec/ramblock.h" + #include "migration.h" + #include "multifd.h" + #include "options.h" +@@ -142,6 +143,15 @@ static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) + p->compress_data = NULL; + } + ++static inline void prepare_next_iov(MultiFDSendParams *p, void *base, ++ uint32_t len) ++{ ++ p->iov[p->iovs_num].iov_base = (uint8_t *)base; ++ p->iov[p->iovs_num].iov_len = len; ++ p->next_packet_size += len; ++ p->iovs_num++; ++} ++ + /** + * multifd_uadk_send_prepare: prepare data to be able to send + * +@@ -155,7 +165,56 @@ static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp) + { +- return -1; ++ struct wd_data *uadk_data = p->compress_data; ++ uint32_t hdr_size; ++ uint8_t *buf = uadk_data->buf; ++ int ret = 0; ++ ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } ++ ++ hdr_size = p->pages->normal_num * sizeof(uint32_t); ++ /* prepare the header that stores the lengths of all compressed data */ ++ prepare_next_iov(p, uadk_data->buf_hdr, hdr_size); ++ ++ for (int i = 0; i < p->pages->normal_num; i++) { ++ struct wd_comp_req creq = { ++ .op_type = WD_DIR_COMPRESS, ++ .src = p->pages->block->host + p->pages->offset[i], ++ .src_len = p->page_size, ++ .dst = buf, ++ /* Set dst_len to double the src in case compressed out >= page_size */ ++ .dst_len = p->page_size * 2, ++ }; ++ ++ ret = wd_do_comp_sync(uadk_data->handle, &creq); ++ if (ret || creq.status) { ++ error_setg(errp, "multifd %u: failed compression, ret %d status %d", ++ p->id, ret, creq.status); ++ return -1; ++ } ++ if (creq.dst_len < p->page_size) { ++ uadk_data->buf_hdr[i] = cpu_to_be32(creq.dst_len); ++ prepare_next_iov(p, buf, creq.dst_len); ++ buf += creq.dst_len; ++ } else { ++ /* ++ * Send raw data if compressed out >= page_size. We might be better ++ * off sending raw data if output is slightly less than page_size ++ * as well because at the receive end we can skip the decompression. ++ * But it is tricky to find the right number here. ++ */ ++ uadk_data->buf_hdr[i] = cpu_to_be32(p->page_size); ++ prepare_next_iov(p, p->pages->block->host + p->pages->offset[i], ++ p->page_size); ++ buf += p->page_size; ++ } ++ } ++out: ++ p->flags |= MULTIFD_FLAG_UADK; ++ multifd_send_fill_packet(p); ++ return 0; + } + + /** +@@ -208,7 +267,76 @@ static void multifd_uadk_recv_cleanup(MultiFDRecvParams *p) + */ + static int multifd_uadk_recv(MultiFDRecvParams *p, Error **errp) + { +- return -1; ++ struct wd_data *uadk_data = p->compress_data; ++ uint32_t in_size = p->next_packet_size; ++ uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ uint32_t hdr_len = p->normal_num * sizeof(uint32_t); ++ uint32_t data_len = 0; ++ uint8_t *buf = uadk_data->buf; ++ int ret = 0; ++ ++ if (flags != MULTIFD_FLAG_UADK) { ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", ++ p->id, flags, MULTIFD_FLAG_ZLIB); ++ return -1; ++ } ++ ++ multifd_recv_zero_page_process(p); ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ ++ /* read compressed data lengths */ ++ assert(hdr_len < in_size); ++ ret = qio_channel_read_all(p->c, (void *) uadk_data->buf_hdr, ++ hdr_len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ uadk_data->buf_hdr[i] = be32_to_cpu(uadk_data->buf_hdr[i]); ++ data_len += uadk_data->buf_hdr[i]; ++ assert(uadk_data->buf_hdr[i] <= p->page_size); ++ } ++ ++ /* read compressed data */ ++ assert(in_size == hdr_len + data_len); ++ ret = qio_channel_read_all(p->c, (void *)buf, data_len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ struct wd_comp_req creq = { ++ .op_type = WD_DIR_DECOMPRESS, ++ .src = buf, ++ .src_len = uadk_data->buf_hdr[i], ++ .dst = p->host + p->normal[i], ++ .dst_len = p->page_size, ++ }; ++ ++ if (uadk_data->buf_hdr[i] == p->page_size) { ++ memcpy(p->host + p->normal[i], buf, p->page_size); ++ buf += p->page_size; ++ continue; ++ } ++ ++ ret = wd_do_comp_sync(uadk_data->handle, &creq); ++ if (ret || creq.status) { ++ error_setg(errp, "multifd %u: failed decompression, ret %d status %d", ++ p->id, ret, creq.status); ++ return -1; ++ } ++ if (creq.dst_len != p->page_size) { ++ error_setg(errp, "multifd %u: decompressed length error", p->id); ++ return -1; ++ } ++ buf += uadk_data->buf_hdr[i]; ++ } ++ ++ return 0; + } + + static MultiFDMethods multifd_uadk_ops = { +-- +2.33.0 + diff --git a/migration-multifd-Add-UADK-initialization.patch b/migration-multifd-Add-UADK-initialization.patch new file mode 100644 index 0000000000000000000000000000000000000000..14bdca891fb81dca2133d48d85b18ee26da3f10a --- /dev/null +++ b/migration-multifd-Add-UADK-initialization.patch @@ -0,0 +1,244 @@ +From f6ef2126594a919c5f921dfedf79631167efbc40 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:07 +0100 +Subject: [83/99] migration/multifd: Add UADK initialization + +commit 819dd20636d51d5dc9d42aa28edb3dd9c1b8b863 upstream. + +Initialize UADK session and allocate buffers required. The actual +compression/decompression will only be done in a subsequent patch. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Fabiano Rosas +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 209 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 208 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index c2bb07535b..535411a405 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -12,9 +12,216 @@ + + #include "qemu/osdep.h" + #include "qemu/module.h" ++#include "qapi/error.h" ++#include "migration.h" ++#include "multifd.h" ++#include "options.h" ++#include "uadk/wd_comp.h" ++#include "uadk/wd_sched.h" ++ ++struct wd_data { ++ handle_t handle; ++ uint8_t *buf; ++ uint32_t *buf_hdr; ++}; ++ ++static bool uadk_hw_init(void) ++{ ++ char alg[] = "zlib"; ++ int ret; ++ ++ ret = wd_comp_init2(alg, SCHED_POLICY_RR, TASK_HW); ++ if (ret && ret != -WD_EEXIST) { ++ return false; ++ } else { ++ return true; ++ } ++} ++ ++static struct wd_data *multifd_uadk_init_sess(uint32_t count, ++ uint32_t page_size, ++ bool compress, Error **errp) ++{ ++ struct wd_comp_sess_setup ss = {0}; ++ struct sched_params param = {0}; ++ uint32_t size = count * page_size; ++ struct wd_data *wd; ++ ++ if (!uadk_hw_init()) { ++ error_setg(errp, "multifd: UADK hardware not available"); ++ return NULL; ++ } ++ ++ wd = g_new0(struct wd_data, 1); ++ ss.alg_type = WD_ZLIB; ++ if (compress) { ++ ss.op_type = WD_DIR_COMPRESS; ++ /* Add an additional page for handling output > input */ ++ size += page_size; ++ } else { ++ ss.op_type = WD_DIR_DECOMPRESS; ++ } ++ ++ /* We use default level 1 compression and 4K window size */ ++ param.type = ss.op_type; ++ ss.sched_param = ¶m; ++ ++ wd->handle = wd_comp_alloc_sess(&ss); ++ if (!wd->handle) { ++ error_setg(errp, "multifd: failed wd_comp_alloc_sess"); ++ goto out; ++ } ++ ++ wd->buf = g_try_malloc(size); ++ if (!wd->buf) { ++ error_setg(errp, "multifd: out of mem for uadk buf"); ++ goto out_free_sess; ++ } ++ wd->buf_hdr = g_new0(uint32_t, count); ++ return wd; ++ ++out_free_sess: ++ wd_comp_free_sess(wd->handle); ++out: ++ wd_comp_uninit2(); ++ g_free(wd); ++ return NULL; ++} ++ ++static void multifd_uadk_uninit_sess(struct wd_data *wd) ++{ ++ wd_comp_free_sess(wd->handle); ++ wd_comp_uninit2(); ++ g_free(wd->buf); ++ g_free(wd->buf_hdr); ++ g_free(wd); ++} ++ ++/** ++ * multifd_uadk_send_setup: setup send side ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_send_setup(MultiFDSendParams *p, Error **errp) ++{ ++ struct wd_data *wd; ++ ++ wd = multifd_uadk_init_sess(p->page_count, p->page_size, true, errp); ++ if (!wd) { ++ return -1; ++ } ++ ++ p->compress_data = wd; ++ assert(p->iov == NULL); ++ /* ++ * Each page will be compressed independently and sent using an IOV. The ++ * additional two IOVs are used to store packet header and compressed data ++ * length ++ */ ++ ++ p->iov = g_new0(struct iovec, p->page_count + 2); ++ return 0; ++} ++ ++/** ++ * multifd_uadk_send_cleanup: cleanup send side ++ * ++ * Close the channel and return memory. ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) ++{ ++ struct wd_data *wd = p->compress_data; ++ ++ multifd_uadk_uninit_sess(wd); ++ p->compress_data = NULL; ++} ++ ++/** ++ * multifd_uadk_send_prepare: prepare data to be able to send ++ * ++ * Create a compressed buffer with all the pages that we are going to ++ * send. ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp) ++{ ++ return -1; ++} ++ ++/** ++ * multifd_uadk_recv_setup: setup receive side ++ * ++ * Create the compressed channel and buffer. ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_recv_setup(MultiFDRecvParams *p, Error **errp) ++{ ++ struct wd_data *wd; ++ ++ wd = multifd_uadk_init_sess(p->page_count, p->page_size, false, errp); ++ if (!wd) { ++ return -1; ++ } ++ p->compress_data = wd; ++ return 0; ++} ++ ++/** ++ * multifd_uadk_recv_cleanup: cleanup receive side ++ * ++ * Close the channel and return memory. ++ * ++ * @p: Params for the channel that we are using ++ */ ++static void multifd_uadk_recv_cleanup(MultiFDRecvParams *p) ++{ ++ struct wd_data *wd = p->compress_data; ++ ++ multifd_uadk_uninit_sess(wd); ++ p->compress_data = NULL; ++} ++ ++/** ++ * multifd_uadk_recv: read the data from the channel into actual pages ++ * ++ * Read the compressed buffer, and uncompress it into the actual ++ * pages. ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_recv(MultiFDRecvParams *p, Error **errp) ++{ ++ return -1; ++} ++ ++static MultiFDMethods multifd_uadk_ops = { ++ .send_setup = multifd_uadk_send_setup, ++ .send_cleanup = multifd_uadk_send_cleanup, ++ .send_prepare = multifd_uadk_send_prepare, ++ .recv_setup = multifd_uadk_recv_setup, ++ .recv_cleanup = multifd_uadk_recv_cleanup, ++ .recv = multifd_uadk_recv, ++}; + + static void multifd_uadk_register(void) + { +- /* noop for now */ ++ multifd_register_ops(MULTIFD_COMPRESSION_UADK, &multifd_uadk_ops); + } + migration_init(multifd_uadk_register); +-- +2.33.0 + diff --git a/migration-multifd-Add-a-synchronization-point-for-ch.patch b/migration-multifd-Add-a-synchronization-point-for-ch.patch new file mode 100644 index 0000000000000000000000000000000000000000..738ae801ec22cd62b544744799cdd8f14dbc1fca --- /dev/null +++ b/migration-multifd-Add-a-synchronization-point-for-ch.patch @@ -0,0 +1,127 @@ +From 5236178dc96f2e9b24aa95bc01d700428a95d023 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:18 -0300 +Subject: [54/99] migration/multifd: Add a synchronization point for channel + creation + +commit 93fa9dc2e0522c54b813dee0898a5feb98b624c9 upstream. + +It is possible that one of the multifd channels fails to be created at +multifd_new_send_channel_async() while the rest of the channel +creation tasks are still in flight. + +This could lead to multifd_save_cleanup() executing the +qemu_thread_join() loop too early and not waiting for the threads +which haven't been created yet, leading to the freeing of resources +that the newly created threads will try to access and crash. + +Add a synchronization point after which there will be no attempts at +thread creation and therefore calling multifd_save_cleanup() past that +point will ensure it properly waits for the threads. + +A note about performance: Prior to this patch, if a channel took too +long to be established, other channels could finish connecting first +and already start taking load. Now we're bounded by the +slowest-connecting channel. + +Reported-by: Avihai Horon +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-7-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 32 ++++++++++++++++++++++++++------ + 1 file changed, 26 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 85d1e7c347..bd240649f7 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -63,6 +63,11 @@ struct { + * Make it easy for now. + */ + uintptr_t packet_num; ++ /* ++ * Synchronization point past which no more channels will be ++ * created. ++ */ ++ QemuSemaphore channels_created; + /* send channels ready */ + QemuSemaphore channels_ready; + /* +@@ -623,10 +628,6 @@ static void multifd_send_terminate_threads(void) + + /* + * Finally recycle all the threads. +- * +- * TODO: p->running is still buggy, e.g. we can reach here without the +- * corresponding multifd_new_send_channel_async() get invoked yet, +- * then a new thread can even be created after this function returns. + */ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -671,6 +672,7 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + + static void multifd_send_cleanup_state(void) + { ++ qemu_sem_destroy(&multifd_send_state->channels_created); + qemu_sem_destroy(&multifd_send_state->channels_ready); + g_free(multifd_send_state->params); + multifd_send_state->params = NULL; +@@ -958,18 +960,26 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + + if (migrate_channel_requires_tls_upgrade(ioc)) { + ret = multifd_tls_channel_connect(p, ioc, &local_err); ++ if (ret) { ++ return; ++ } + } else { + ret = multifd_channel_connect(p, ioc, &local_err); + } + ++out: ++ /* ++ * Here we're not interested whether creation succeeded, only that ++ * it happened at all. ++ */ ++ qemu_sem_post(&multifd_send_state->channels_created); ++ + if (ret) { + return; + } + +-out: + trace_multifd_new_send_channel_async_error(p->id, local_err); + multifd_send_set_error(local_err); +- multifd_send_kick_main(p); + if (!p->c) { + /* + * If no channel has been created, drop the initial +@@ -1002,6 +1012,7 @@ bool multifd_send_setup(void) + multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->pages = multifd_pages_init(page_count); ++ qemu_sem_init(&multifd_send_state->channels_created, 0); + qemu_sem_init(&multifd_send_state->channels_ready, 0); + qatomic_set(&multifd_send_state->exiting, 0); + multifd_send_state->ops = multifd_ops[migrate_multifd_compression()]; +@@ -1027,6 +1038,15 @@ bool multifd_send_setup(void) + multifd_new_send_channel_create(p); + } + ++ /* ++ * Wait until channel creation has started for all channels. The ++ * creation can still fail, but no more channels will be created ++ * past this point. ++ */ ++ for (i = 0; i < thread_count; i++) { ++ qemu_sem_wait(&multifd_send_state->channels_created); ++ } ++ + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +-- +2.33.0 + diff --git a/migration-multifd-Add-new-migration-option-zero-page.patch b/migration-multifd-Add-new-migration-option-zero-page.patch new file mode 100644 index 0000000000000000000000000000000000000000..b732c7a72bf87e1b02ef9ac65f5715bda4cbaad3 --- /dev/null +++ b/migration-multifd-Add-new-migration-option-zero-page.patch @@ -0,0 +1,289 @@ +From 6bb380a1f7c37b5dda17f95519ec118990f332a8 Mon Sep 17 00:00:00 2001 +From: Hao Xiang +Date: Mon, 11 Mar 2024 18:00:11 +0000 +Subject: [68/99] migration/multifd: Add new migration option + zero-page-detection. + +commit 5fdbb1dfccfd59661c95cae760b8e276c5b8e65c upstream. + +This new parameter controls where the zero page checking is running. +1. If this parameter is set to 'legacy', zero page checking is +done in the migration main thread. +2. If this parameter is set to 'none', zero page checking is disabled. + +Signed-off-by: Hao Xiang +Reviewed-by: Peter Xu +Acked-by: Markus Armbruster +Link: https://lore.kernel.org/r/20240311180015.3359271-4-hao.xiang@linux.dev +Signed-off-by: Peter Xu + + Conflicts: + hw/core/qdev-properties-system.c + include/hw/qdev-properties-system.h + migration/options.c + qapi/migration.json +[jz: resolve simple context conflicts] +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 10 ++++++++++ + include/hw/qdev-properties-system.h | 4 ++++ + migration/migration-hmp-cmds.c | 9 +++++++++ + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + migration/ram.c | 4 ++++ + qapi/migration.json | 28 +++++++++++++++++++++++++++- + 7 files changed, 76 insertions(+), 1 deletion(-) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index c581d46f2e..cad1e04150 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -732,6 +732,16 @@ const PropertyInfo qdev_prop_mig_mode = { + .set_default_value = qdev_propinfo_set_default_value_enum, + }; + ++const PropertyInfo qdev_prop_zero_page_detection = { ++ .name = "ZeroPageDetection", ++ .description = "zero_page_detection values, " ++ "none,legacy", ++ .enum_table = &ZeroPageDetection_lookup, ++ .get = qdev_propinfo_get_enum, ++ .set = qdev_propinfo_set_enum, ++ .set_default_value = qdev_propinfo_set_default_value_enum, ++}; ++ + /* --- Reserved Region --- */ + + /* +diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h +index 7cf27e51b9..63dcf69978 100644 +--- a/include/hw/qdev-properties-system.h ++++ b/include/hw/qdev-properties-system.h +@@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr; + extern const PropertyInfo qdev_prop_reserved_region; + extern const PropertyInfo qdev_prop_multifd_compression; + extern const PropertyInfo qdev_prop_mig_mode; ++extern const PropertyInfo qdev_prop_zero_page_detection; + extern const PropertyInfo qdev_prop_losttickpolicy; + extern const PropertyInfo qdev_prop_blockdev_on_error; + extern const PropertyInfo qdev_prop_blockdev_retry_interval; +@@ -48,6 +49,9 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; + #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \ + MigMode) ++#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \ ++ ZeroPageDetection) + #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ + LostTickPolicy) +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 9857e2c97f..91e51eb7af 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -348,6 +348,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION), + MultiFDCompression_str(params->multifd_compression)); ++ assert(params->has_zero_page_detection); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION), ++ qapi_enum_lookup(&ZeroPageDetection_lookup, ++ params->zero_page_detection)); + monitor_printf(mon, "%s: %" PRIu64 " bytes\n", + MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE), + params->xbzrle_cache_size); +@@ -668,6 +673,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; ++ case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION: ++ p->has_zero_page_detection = true; ++ visit_type_ZeroPageDetection(v, param, &p->zero_page_detection, &err); ++ break; + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/migration/options.c b/migration/options.c +index 52ddbac35f..e752163114 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -183,6 +183,9 @@ Property migration_properties[] = { + DEFINE_PROP_MIG_MODE("mode", MigrationState, + parameters.mode, + MIG_MODE_NORMAL), ++ DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState, ++ parameters.zero_page_detection, ++ ZERO_PAGE_DETECTION_LEGACY), + DEFINE_PROP_STRING("sev-pdh", MigrationState, parameters.sev_pdh), + DEFINE_PROP_STRING("sev-plat-cert", MigrationState, parameters.sev_plat_cert), + DEFINE_PROP_STRING("sev-amd-cert", MigrationState, parameters.sev_amd_cert), +@@ -927,6 +930,13 @@ uint64_t migrate_xbzrle_cache_size(void) + return s->parameters.xbzrle_cache_size; + } + ++ZeroPageDetection migrate_zero_page_detection(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->parameters.zero_page_detection; ++} ++ + /* parameter setters */ + + void migrate_set_block_incremental(bool value) +@@ -1042,6 +1052,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; + params->has_mode = true; + params->mode = s->parameters.mode; ++ params->has_zero_page_detection = true; ++ params->zero_page_detection = s->parameters.zero_page_detection; + params->has_hdbss_buffer_size = true; + params->hdbss_buffer_size = s->parameters.hdbss_buffer_size; + +@@ -1081,6 +1093,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_x_vcpu_dirty_limit_period = true; + params->has_vcpu_dirty_limit = true; + params->has_mode = true; ++ params->has_zero_page_detection = true; + params->has_hdbss_buffer_size = true; + + params->sev_pdh = g_strdup(""); +@@ -1422,6 +1435,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + dest->mode = params->mode; + } + ++ if (params->has_zero_page_detection) { ++ dest->zero_page_detection = params->zero_page_detection; ++ } ++ + if (params->sev_pdh) { + assert(params->sev_pdh->type == QTYPE_QSTRING); + dest->sev_pdh = params->sev_pdh->u.s; +@@ -1593,6 +1610,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + s->parameters.mode = params->mode; + } + ++ if (params->has_zero_page_detection) { ++ s->parameters.zero_page_detection = params->zero_page_detection; ++ } ++ + if (params->sev_pdh) { + g_free(s->parameters.sev_pdh); + assert(params->sev_pdh->type == QTYPE_QSTRING); +diff --git a/migration/options.h b/migration/options.h +index 987fc81a18..dbd52d7acd 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -95,6 +95,7 @@ const char *migrate_tls_authz(void); + const char *migrate_tls_creds(void); + const char *migrate_tls_hostname(void); + uint64_t migrate_xbzrle_cache_size(void); ++ZeroPageDetection migrate_zero_page_detection(void); + + /* parameters setters */ + +diff --git a/migration/ram.c b/migration/ram.c +index 9630b654c2..7d0f1120df 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1141,6 +1141,10 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, + QEMUFile *file = pss->pss_channel; + int len = 0; + ++ if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) { ++ return 0; ++ } ++ + if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) { + return 0; + } +diff --git a/qapi/migration.json b/qapi/migration.json +index f672da5c0d..ff247a50ce 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -653,6 +653,18 @@ + { 'enum': 'MigMode', + 'data': [ 'normal', 'cpr-reboot' ] } + ++## ++# @ZeroPageDetection: ++# ++# @none: Do not perform zero page checking. ++# ++# @legacy: Perform zero page checking in main migration thread. ++# ++# Since: 9.0 ++## ++{ 'enum': 'ZeroPageDetection', ++ 'data': [ 'none', 'legacy' ] } ++ + ## + # @BitmapMigrationBitmapAliasTransform: + # +@@ -891,6 +903,10 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @zero-page-detection: Whether and how to detect zero pages. ++# See description in @ZeroPageDetection. Default is 'legacy'. ++# (since 9.0) ++# + # @sev-pdh: The target host platform diffie-hellman key encoded in base64, or + # pdh filename for hygon + # (Since 4.2) +@@ -940,6 +956,7 @@ + { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, + 'vcpu-dirty-limit', + 'mode', ++ 'zero-page-detection', + 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert', 'hdbss-buffer-size'] } + + ## +@@ -1098,6 +1115,10 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @zero-page-detection: Whether and how to detect zero pages. ++# See description in @ZeroPageDetection. Default is 'legacy'. ++# (since 9.0) ++# + # @sev-pdh: The target host platform diffie-hellman key encoded in base64, or + # pdh filename for hygon + # (Since 4.2) +@@ -1169,12 +1190,12 @@ + 'features': [ 'unstable' ] }, + '*vcpu-dirty-limit': 'uint64', + '*mode': 'MigMode', ++ '*zero-page-detection': 'ZeroPageDetection', + '*sev-pdh': 'StrOrNull', + '*sev-plat-cert': 'StrOrNull', + '*sev-amd-cert' : 'StrOrNull', + '*hdbss-buffer-size': 'uint8'} } + +- + ## + # @migrate-set-parameters: + # +@@ -1351,6 +1372,10 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @zero-page-detection: Whether and how to detect zero pages. ++# See description in @ZeroPageDetection. Default is 'legacy'. ++# (since 9.0) ++# + # @sev-pdh: The target host platform diffie-hellman key encoded in base64, or + # pdh filename for hygon + # (Since 4.2) +@@ -1418,6 +1443,7 @@ + 'features': [ 'unstable' ] }, + '*vcpu-dirty-limit': 'uint64', + '*mode': 'MigMode', ++ '*zero-page-detection': 'ZeroPageDetection', + '*sev-pdh': 'str', + '*sev-plat-cert': 'str', + '*sev-amd-cert' : 'str', +-- +2.33.0 + diff --git a/migration-multifd-Allow-multifd-without-packets.patch b/migration-multifd-Allow-multifd-without-packets.patch new file mode 100644 index 0000000000000000000000000000000000000000..ee9cf1827e5288d50c628291bdfffaeda5d798c2 --- /dev/null +++ b/migration-multifd-Allow-multifd-without-packets.patch @@ -0,0 +1,363 @@ +From 48942069691dced68ba3ad74014ce0fb8850df46 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:30:08 -0300 +Subject: [67/99] migration/multifd: Allow multifd without packets + +commit 06833d83f8978139395da0f1d6a9fad81b9dd024 upstream. + +For the upcoming support to the new 'mapped-ram' migration stream +format, we cannot use multifd packets because each write into the +ramblock section in the migration file is expected to contain only the +guest pages. They are written at their respective offsets relative to +the ramblock section header. + +There is no space for the packet information and the expected gains +from the new approach come partly from being able to write the pages +sequentially without extraneous data in between. + +The new format also simply doesn't need the packets and all necessary +information can be taken from the standard migration headers with some +(future) changes to multifd code. + +Use the presence of the mapped-ram capability to decide whether to +send packets. + +This only moves code under multifd_use_packets(), it has no effect for +now as mapped-ram cannot yet be enabled with multifd. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-15-farosas@suse.de +Signed-off-by: Peter Xu +[jz: make multifd_use_packet to always return true, since mapped-ram + is not backported] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 175 +++++++++++++++++++++++++++++--------------- + 1 file changed, 114 insertions(+), 61 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d5039af833..cac5f2743c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -93,6 +93,11 @@ struct { + MultiFDMethods *ops; + } *multifd_recv_state; + ++static bool multifd_use_packets(void) ++{ ++ return true; ++} ++ + /* Multifd without compression */ + + /** +@@ -123,6 +128,19 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + return; + } + ++static void multifd_send_prepare_iovs(MultiFDSendParams *p) ++{ ++ MultiFDPages_t *pages = p->pages; ++ ++ for (int i = 0; i < pages->num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ p->iov[p->iovs_num].iov_len = p->page_size; ++ p->iovs_num++; ++ } ++ ++ p->next_packet_size = pages->num * p->page_size; ++} ++ + /** + * nocomp_send_prepare: prepare date to be able to send + * +@@ -137,9 +155,13 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { + bool use_zero_copy_send = migrate_zero_copy_send(); +- MultiFDPages_t *pages = p->pages; + int ret; + ++ if (!multifd_use_packets()) { ++ multifd_send_prepare_iovs(p); ++ return 0; ++ } ++ + if (!use_zero_copy_send) { + /* + * Only !zerocopy needs the header in IOV; zerocopy will +@@ -148,13 +170,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + multifd_send_prepare_header(p); + } + +- for (int i = 0; i < pages->num; i++) { +- p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; +- p->iov[p->iovs_num].iov_len = p->page_size; +- p->iovs_num++; +- } +- +- p->next_packet_size = pages->num * p->page_size; ++ multifd_send_prepare_iovs(p); + p->flags |= MULTIFD_FLAG_NOCOMP; + + multifd_send_fill_packet(p); +@@ -209,7 +225,13 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + */ + static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + { +- uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ uint32_t flags; ++ ++ if (!multifd_use_packets()) { ++ return 0; ++ } ++ ++ flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + + if (flags != MULTIFD_FLAG_NOCOMP) { + error_setg(errp, "multifd %u: flags received %x flags expected %x", +@@ -796,6 +818,7 @@ static void *multifd_send_thread(void *opaque) + MigrationThread *thread = NULL; + Error *local_err = NULL; + int ret = 0; ++ bool use_packets = multifd_use_packets(); + + thread = migration_threads_add(p->name, qemu_get_thread_id()); + +@@ -805,9 +828,11 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); + +- if (multifd_send_initial_packet(p, &local_err) < 0) { +- ret = -1; +- goto out; ++ if (use_packets) { ++ if (multifd_send_initial_packet(p, &local_err) < 0) { ++ ret = -1; ++ goto out; ++ } + } + + while (true) { +@@ -858,16 +883,20 @@ static void *multifd_send_thread(void *opaque) + * it doesn't require explicit memory barriers. + */ + assert(qatomic_read(&p->pending_sync)); +- p->flags = MULTIFD_FLAG_SYNC; +- multifd_send_fill_packet(p); +- ret = qio_channel_write_all(p->c, (void *)p->packet, +- p->packet_len, &local_err); +- if (ret != 0) { +- break; ++ ++ if (use_packets) { ++ p->flags = MULTIFD_FLAG_SYNC; ++ multifd_send_fill_packet(p); ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ break; ++ } ++ /* p->next_packet_size will always be zero for a SYNC packet */ ++ stat64_add(&mig_stats.multifd_bytes, p->packet_len); ++ p->flags = 0; + } +- /* p->next_packet_size will always be zero for a SYNC packet */ +- stat64_add(&mig_stats.multifd_bytes, p->packet_len); +- p->flags = 0; ++ + qatomic_set(&p->pending_sync, false); + qemu_sem_post(&p->sem_sync); + } +@@ -1022,6 +1051,7 @@ bool multifd_send_setup(void) + Error *local_err = NULL; + int thread_count, ret = 0; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); ++ bool use_packets = multifd_use_packets(); + uint8_t i; + + if (!migrate_multifd()) { +@@ -1044,14 +1074,20 @@ bool multifd_send_setup(void) + qemu_sem_init(&p->sem_sync, 0); + p->id = i; + p->pages = multifd_pages_init(page_count); +- p->packet_len = sizeof(MultiFDPacket_t) +- + sizeof(uint64_t) * page_count; +- p->packet = g_malloc0(p->packet_len); +- p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); +- p->packet->version = cpu_to_be32(MULTIFD_VERSION); ++ ++ if (use_packets) { ++ p->packet_len = sizeof(MultiFDPacket_t) ++ + sizeof(uint64_t) * page_count; ++ p->packet = g_malloc0(p->packet_len); ++ p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); ++ p->packet->version = cpu_to_be32(MULTIFD_VERSION); ++ ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, page_count + 1); ++ } else { ++ p->iov = g_new0(struct iovec, page_count); ++ } + p->name = g_strdup_printf("multifdsend_%d", i); +- /* We need one extra place for the packet header */ +- p->iov = g_new0(struct iovec, page_count + 1); + p->page_size = qemu_target_page_size(); + p->page_count = page_count; + p->write_flags = 0; +@@ -1114,7 +1150,9 @@ static void multifd_recv_terminate_threads(Error *err) + * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, + * however try to wakeup it without harm in cleanup phase. + */ +- qemu_sem_post(&p->sem_sync); ++ if (multifd_use_packets()) { ++ qemu_sem_post(&p->sem_sync); ++ } + + /* + * We could arrive here for two reasons: +@@ -1189,7 +1227,7 @@ void multifd_recv_sync_main(void) + int thread_count = migrate_multifd_channels(); + int i; + +- if (!migrate_multifd()) { ++ if (!migrate_multifd() || !multifd_use_packets()) { + return; + } + +@@ -1224,13 +1262,14 @@ static void *multifd_recv_thread(void *opaque) + { + MultiFDRecvParams *p = opaque; + Error *local_err = NULL; ++ bool use_packets = multifd_use_packets(); + int ret; + + trace_multifd_recv_thread_start(p->id); + rcu_register_thread(); + + while (true) { +- uint32_t flags; ++ uint32_t flags = 0; + bool has_data = false; + p->normal_num = 0; + +@@ -1238,25 +1277,27 @@ static void *multifd_recv_thread(void *opaque) + break; + } + +- ret = qio_channel_read_all_eof(p->c, (void *)p->packet, +- p->packet_len, &local_err); +- if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ +- break; +- } ++ if (use_packets) { ++ ret = qio_channel_read_all_eof(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ ++ break; ++ } + +- qemu_mutex_lock(&p->mutex); +- ret = multifd_recv_unfill_packet(p, &local_err); +- if (ret) { ++ qemu_mutex_lock(&p->mutex); ++ ret = multifd_recv_unfill_packet(p, &local_err); ++ if (ret) { ++ qemu_mutex_unlock(&p->mutex); ++ break; ++ } ++ ++ flags = p->flags; ++ /* recv methods don't know how to handle the SYNC flag */ ++ p->flags &= ~MULTIFD_FLAG_SYNC; ++ has_data = !!p->normal_num; + qemu_mutex_unlock(&p->mutex); +- break; + } + +- flags = p->flags; +- /* recv methods don't know how to handle the SYNC flag */ +- p->flags &= ~MULTIFD_FLAG_SYNC; +- has_data = !!p->normal_num; +- qemu_mutex_unlock(&p->mutex); +- + if (has_data) { + ret = multifd_recv_state->ops->recv(p, &local_err); + if (ret != 0) { +@@ -1264,9 +1305,11 @@ static void *multifd_recv_thread(void *opaque) + } + } + +- if (flags & MULTIFD_FLAG_SYNC) { +- qemu_sem_post(&multifd_recv_state->sem_sync); +- qemu_sem_wait(&p->sem_sync); ++ if (use_packets) { ++ if (flags & MULTIFD_FLAG_SYNC) { ++ qemu_sem_post(&multifd_recv_state->sem_sync); ++ qemu_sem_wait(&p->sem_sync); ++ } + } + } + +@@ -1285,6 +1328,7 @@ int multifd_recv_setup(Error **errp) + { + int thread_count; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); ++ bool use_packets = multifd_use_packets(); + uint8_t i; + + /* +@@ -1309,9 +1353,12 @@ int multifd_recv_setup(Error **errp) + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem_sync, 0); + p->id = i; +- p->packet_len = sizeof(MultiFDPacket_t) +- + sizeof(uint64_t) * page_count; +- p->packet = g_malloc0(p->packet_len); ++ ++ if (use_packets) { ++ p->packet_len = sizeof(MultiFDPacket_t) ++ + sizeof(uint64_t) * page_count; ++ p->packet = g_malloc0(p->packet_len); ++ } + p->name = g_strdup_printf("multifdrecv_%d", i); + p->iov = g_new0(struct iovec, page_count); + p->normal = g_new0(ram_addr_t, page_count); +@@ -1355,18 +1402,24 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; ++ bool use_packets = multifd_use_packets(); + int id; + +- id = multifd_recv_initial_packet(ioc, &local_err); +- if (id < 0) { +- multifd_recv_terminate_threads(local_err); +- error_propagate_prepend(errp, local_err, +- "failed to receive packet" +- " via multifd channel %d: ", +- qatomic_read(&multifd_recv_state->count)); +- return; ++ if (use_packets) { ++ id = multifd_recv_initial_packet(ioc, &local_err); ++ if (id < 0) { ++ multifd_recv_terminate_threads(local_err); ++ error_propagate_prepend(errp, local_err, ++ "failed to receive packet" ++ " via multifd channel %d: ", ++ qatomic_read(&multifd_recv_state->count)); ++ return; ++ } ++ trace_multifd_recv_new_channel(id); ++ } else { ++ /* next patch gives this a meaningful value */ ++ id = 0; + } +- trace_multifd_recv_new_channel(id); + + p = &multifd_recv_state->params[id]; + if (p->c != NULL) { +-- +2.33.0 + diff --git a/migration-multifd-Change-multifd_pages_init-argument.patch b/migration-multifd-Change-multifd_pages_init-argument.patch new file mode 100644 index 0000000000000000000000000000000000000000..71fbb958c95080e2bd47cb23a9acd9c28519e154 --- /dev/null +++ b/migration-multifd-Change-multifd_pages_init-argument.patch @@ -0,0 +1,42 @@ +From 61e0a1ad97ca72ea4396d142bdfd7481b9380d6c Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:40 -0300 +Subject: [09/99] migration/multifd: Change multifd_pages_init argument + +commit 6074f81625800743e4c374aecf7dd30774aaf6e0 upstream. + +The 'size' argument is actually the number of pages that fit in a +multifd packet. Change it to uint32_t and rename. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-4-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3e5aaaa1d4..ef7d4520c4 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -237,12 +237,12 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + return msg.id; + } + +-static MultiFDPages_t *multifd_pages_init(size_t size) ++static MultiFDPages_t *multifd_pages_init(uint32_t n) + { + MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1); + +- pages->allocated = size; +- pages->offset = g_new0(ram_addr_t, size); ++ pages->allocated = n; ++ pages->offset = g_new0(ram_addr_t, n); + + return pages; + } +-- +2.33.0 + diff --git a/migration-multifd-Change-retval-of-multifd_queue_pag.patch b/migration-multifd-Change-retval-of-multifd_queue_pag.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a8b37c33ece445eab1c74f91268e89f1eb7fd87 --- /dev/null +++ b/migration-multifd-Change-retval-of-multifd_queue_pag.patch @@ -0,0 +1,88 @@ +From d95c440bb62e6eb30b3777e10d94fbc72b7f65a4 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:50 +0800 +Subject: [40/99] migration/multifd: Change retval of multifd_queue_page() + +commit d6556d174a6b9fc443f2320193f18e71eb67052a upstream. + +Using int is an overkill when there're only two options. Change it to a +boolean. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-17-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 9 +++++---- + migration/multifd.h | 2 +- + migration/ram.c | 2 +- + 3 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 59ccc42c05..c48c031009 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -506,7 +506,8 @@ static int multifd_send_pages(void) + return 1; + } + +-int multifd_queue_page(RAMBlock *block, ram_addr_t offset) ++/* Returns true if enqueue successful, false otherwise */ ++bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) + { + MultiFDPages_t *pages = multifd_send_state->pages; + bool changed = false; +@@ -520,21 +521,21 @@ int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + pages->num++; + + if (pages->num < pages->allocated) { +- return 1; ++ return true; + } + } else { + changed = true; + } + + if (multifd_send_pages() < 0) { +- return -1; ++ return false; + } + + if (changed) { + return multifd_queue_page(block, offset); + } + +- return 1; ++ return true; + } + + /* Multifd send side hit an error; remember it and prepare to quit */ +diff --git a/migration/multifd.h b/migration/multifd.h +index 34a2ecb9f4..a320c53a6f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -22,7 +22,7 @@ bool multifd_recv_all_channels_created(void); + void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); + int multifd_send_sync_main(void); +-int multifd_queue_page(RAMBlock *block, ram_addr_t offset); ++bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) +diff --git a/migration/ram.c b/migration/ram.c +index 67fa9c83d6..9630b654c2 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1389,7 +1389,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss) + + static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) + { +- if (multifd_queue_page(block, offset) < 0) { ++ if (!multifd_queue_page(block, offset)) { + return -1; + } + stat64_add(&mig_stats.normal_pages, 1); +-- +2.33.0 + diff --git a/migration-multifd-Change-retval-of-multifd_send_page.patch b/migration-multifd-Change-retval-of-multifd_send_page.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f60a87e09536e3c5a4e1c85198e300e5cc7fb29 --- /dev/null +++ b/migration-multifd-Change-retval-of-multifd_send_page.patch @@ -0,0 +1,83 @@ +From c91e89ee776b145b265f56fc9539514b36988e84 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:51 +0800 +Subject: [41/99] migration/multifd: Change retval of multifd_send_pages() + +commit 3b40964a863d69121733c8b9794a02347ed0000b upstream. + +Using int is an overkill when there're only two options. Change it to a +boolean. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-18-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index c48c031009..dabfc3ec0d 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -450,9 +450,10 @@ static void multifd_send_kick_main(MultiFDSendParams *p) + * thread is using the channel mutex when changing it, and the channel + * have to had finish with its own, otherwise pending_job can't be + * false. ++ * ++ * Returns true if succeed, false otherwise. + */ +- +-static int multifd_send_pages(void) ++static bool multifd_send_pages(void) + { + int i; + static int next_channel; +@@ -460,7 +461,7 @@ static int multifd_send_pages(void) + MultiFDPages_t *pages = multifd_send_state->pages; + + if (multifd_send_should_exit()) { +- return -1; ++ return false; + } + + /* We wait here, until at least one channel is ready */ +@@ -474,7 +475,7 @@ static int multifd_send_pages(void) + next_channel %= migrate_multifd_channels(); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { + if (multifd_send_should_exit()) { +- return -1; ++ return false; + } + p = &multifd_send_state->params[i]; + /* +@@ -503,7 +504,7 @@ static int multifd_send_pages(void) + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + +- return 1; ++ return true; + } + + /* Returns true if enqueue successful, false otherwise */ +@@ -527,7 +528,7 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) + changed = true; + } + +- if (multifd_send_pages() < 0) { ++ if (!multifd_send_pages()) { + return false; + } + +@@ -667,7 +668,7 @@ int multifd_send_sync_main(void) + return 0; + } + if (multifd_send_state->pages->num) { +- if (multifd_send_pages() < 0) { ++ if (!multifd_send_pages()) { + error_report("%s: multifd_send_pages fail", __func__); + return -1; + } +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-TLS-iochannel-referencing.patch b/migration-multifd-Cleanup-TLS-iochannel-referencing.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e47c27d0379e7f6f991886d631e89f9efe9066c --- /dev/null +++ b/migration-multifd-Cleanup-TLS-iochannel-referencing.patch @@ -0,0 +1,117 @@ +From f1ee974ab81330ae1048f0cf5ee2ccaeb16e26d1 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:52:57 +0800 +Subject: [57/99] migration/multifd: Cleanup TLS iochannel referencing + +commit 9221e3c6a237da90ac296adfeb6e99ea9babfc20 upstream. + +Commit a1af605bd5 ("migration/multifd: fix hangup with TLS-Multifd due to +blocking handshake") introduced a thread for TLS channels, which will +resolve the issue on blocking the main thread. However in the same commit +p->c is slightly abused just to be able to pass over the pointer "p" into +the thread. + +That's the major reason we'll need to conditionally free the io channel in +the fault paths. + +To clean it up, using a separate structure to pass over both "p" and "tioc" +in the tls handshake thread. Then we can make it a rule that p->c will +never be set until the channel is completely setup. With that, we can drop +the tricky conditional unref of the io channel in the error path. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-2-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 37 +++++++++++++++++++++++-------------- + 1 file changed, 23 insertions(+), 14 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index bbd421004f..ad8fa6a317 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -895,16 +895,22 @@ out: + + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque); + ++typedef struct { ++ MultiFDSendParams *p; ++ QIOChannelTLS *tioc; ++} MultiFDTLSThreadArgs; ++ + static void *multifd_tls_handshake_thread(void *opaque) + { +- MultiFDSendParams *p = opaque; +- QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c); ++ MultiFDTLSThreadArgs *args = opaque; + +- qio_channel_tls_handshake(tioc, ++ qio_channel_tls_handshake(args->tioc, + multifd_new_send_channel_async, +- p, ++ args->p, + NULL, + NULL); ++ g_free(args); ++ + return NULL; + } + +@@ -914,6 +920,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + { + MigrationState *s = migrate_get_current(); + const char *hostname = s->hostname; ++ MultiFDTLSThreadArgs *args; + QIOChannelTLS *tioc; + + tioc = migration_tls_client_create(ioc, hostname, errp); +@@ -928,11 +935,14 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + object_unref(OBJECT(ioc)); + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); +- p->c = QIO_CHANNEL(tioc); ++ ++ args = g_new0(MultiFDTLSThreadArgs, 1); ++ args->tioc = tioc; ++ args->p = p; + + p->tls_thread_created = true; + qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker", +- multifd_tls_handshake_thread, p, ++ multifd_tls_handshake_thread, args, + QEMU_THREAD_JOINABLE); + return true; + } +@@ -945,6 +955,7 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + + migration_ioc_register_yank(ioc); + p->registered_yank = true; ++ /* Setup p->c only if the channel is completely setup */ + p->c = ioc; + + p->thread_created = true; +@@ -998,14 +1009,12 @@ out: + + trace_multifd_new_send_channel_async_error(p->id, local_err); + multifd_send_set_error(local_err); +- if (!p->c) { +- /* +- * If no channel has been created, drop the initial +- * reference. Otherwise cleanup happens at +- * multifd_send_channel_destroy() +- */ +- object_unref(OBJECT(ioc)); +- } ++ /* ++ * For error cases (TLS or non-TLS), IO channel is always freed here ++ * rather than when cleanup multifd: since p->c is not set, multifd ++ * cleanup code doesn't even know its existence. ++ */ ++ object_unref(OBJECT(ioc)); + error_free(local_err); + } + +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-multifd_load_cleanup.patch b/migration-multifd-Cleanup-multifd_load_cleanup.patch new file mode 100644 index 0000000000000000000000000000000000000000..025058829f2e617035b37c6f15aca515ea22d642 --- /dev/null +++ b/migration-multifd-Cleanup-multifd_load_cleanup.patch @@ -0,0 +1,94 @@ +From d7240e133b0eebb08d42de278fbefbc89061143b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:54 +0800 +Subject: [44/99] migration/multifd: Cleanup multifd_load_cleanup() + +commit 5e6ea8a1d64e72e648b5a5277f08ec7fb09c3b8e upstream. + +Use similar logic to cleanup the recv side. + +Note that multifd_recv_terminate_threads() may need some similar rework +like the sender side, but let's leave that for later. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-21-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 52 ++++++++++++++++++++++++++------------------- + 1 file changed, 30 insertions(+), 22 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 83c6ccd0f2..048ff66760 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1074,6 +1074,34 @@ void multifd_load_shutdown(void) + } + } + ++static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) ++{ ++ migration_ioc_unregister_yank(p->c); ++ object_unref(OBJECT(p->c)); ++ p->c = NULL; ++ qemu_mutex_destroy(&p->mutex); ++ qemu_sem_destroy(&p->sem_sync); ++ g_free(p->name); ++ p->name = NULL; ++ p->packet_len = 0; ++ g_free(p->packet); ++ p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; ++ g_free(p->normal); ++ p->normal = NULL; ++ multifd_recv_state->ops->recv_cleanup(p); ++} ++ ++static void multifd_recv_cleanup_state(void) ++{ ++ qemu_sem_destroy(&multifd_recv_state->sem_sync); ++ g_free(multifd_recv_state->params); ++ multifd_recv_state->params = NULL; ++ g_free(multifd_recv_state); ++ multifd_recv_state = NULL; ++} ++ + void multifd_load_cleanup(void) + { + int i; +@@ -1096,29 +1124,9 @@ void multifd_load_cleanup(void) + qemu_thread_join(&p->thread); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +- MultiFDRecvParams *p = &multifd_recv_state->params[i]; +- +- migration_ioc_unregister_yank(p->c); +- object_unref(OBJECT(p->c)); +- p->c = NULL; +- qemu_mutex_destroy(&p->mutex); +- qemu_sem_destroy(&p->sem_sync); +- g_free(p->name); +- p->name = NULL; +- p->packet_len = 0; +- g_free(p->packet); +- p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; +- g_free(p->normal); +- p->normal = NULL; +- multifd_recv_state->ops->recv_cleanup(p); ++ multifd_recv_cleanup_channel(&multifd_recv_state->params[i]); + } +- qemu_sem_destroy(&multifd_recv_state->sem_sync); +- g_free(multifd_recv_state->params); +- multifd_recv_state->params = NULL; +- g_free(multifd_recv_state); +- multifd_recv_state = NULL; ++ multifd_recv_cleanup_state(); + } + + void multifd_recv_sync_main(void) +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-multifd_recv_sync_main.patch b/migration-multifd-Cleanup-multifd_recv_sync_main.patch new file mode 100644 index 0000000000000000000000000000000000000000..d26afc674bfa4350e6d4070c52d5a2d4fb772c67 --- /dev/null +++ b/migration-multifd-Cleanup-multifd_recv_sync_main.patch @@ -0,0 +1,75 @@ +From dc7717ee9311c374ad199c5baf4ecde8ac082248 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:29:55 -0300 +Subject: [64/99] migration/multifd: Cleanup multifd_recv_sync_main + +commit 4aac6b1e9bd48677c4f24518fe86ffd34c677d5a upstream. + +Some minor cleanups and documentation for multifd_recv_sync_main. + +Use thread_count as done in other parts of the code. Remove p->id from +the multifd_recv_state sync, since that is global and not tied to a +channel. Add documentation for the sync steps. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-2-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 17 +++++++++++++---- + migration/trace-events | 2 +- + 2 files changed, 14 insertions(+), 5 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 9e3955cb8c..429aad232b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1186,18 +1186,27 @@ void multifd_recv_cleanup(void) + + void multifd_recv_sync_main(void) + { ++ int thread_count = migrate_multifd_channels(); + int i; + + if (!migrate_multifd()) { + return; + } +- for (i = 0; i < migrate_multifd_channels(); i++) { +- MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- trace_multifd_recv_sync_main_wait(p->id); ++ /* ++ * Initiate the synchronization by waiting for all channels. ++ * For socket-based migration this means each channel has received ++ * the SYNC packet on the stream. ++ */ ++ for (i = 0; i < thread_count; i++) { ++ trace_multifd_recv_sync_main_wait(i); + qemu_sem_wait(&multifd_recv_state->sem_sync); + } +- for (i = 0; i < migrate_multifd_channels(); i++) { ++ ++ /* ++ * Sync done. Release the channels for the next iteration. ++ */ ++ for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + WITH_QEMU_LOCK_GUARD(&p->mutex) { +diff --git a/migration/trace-events b/migration/trace-events +index 298ad2b0dd..bf1a069632 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -132,7 +132,7 @@ multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uin + multifd_recv_new_channel(uint8_t id) "channel %u" + multifd_recv_sync_main(long packet_num) "packet num %ld" + multifd_recv_sync_main_signal(uint8_t id) "channel %u" +-multifd_recv_sync_main_wait(uint8_t id) "channel %u" ++multifd_recv_sync_main_wait(uint8_t id) "iter %u" + multifd_recv_terminate_threads(bool error) "error %d" + multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 + multifd_recv_thread_start(uint8_t id) "%u" +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-multifd_save_cleanup.patch b/migration-multifd-Cleanup-multifd_save_cleanup.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f5a9a3763117cc6e04a02b60c694cec4bf756dc --- /dev/null +++ b/migration-multifd-Cleanup-multifd_save_cleanup.patch @@ -0,0 +1,159 @@ +From bdcbbe9df0dcc74f21948ba459cc350da77446af Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:53 +0800 +Subject: [43/99] migration/multifd: Cleanup multifd_save_cleanup() + +commit 12808db3b8c22d26c9bc3da6f41756890ce882e4 upstream. + +Shrink the function by moving relevant works into helpers: move the thread +join()s into multifd_send_terminate_threads(), then create two more helpers +to cover channel/state cleanups. + +Add a TODO entry for the thread terminate process because p->running is +still buggy. We need to fix it at some point but not yet covered. + +Suggested-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-20-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 91 +++++++++++++++++++++++++++++---------------- + 1 file changed, 59 insertions(+), 32 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f92e6776f0..83c6ccd0f2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -594,6 +594,11 @@ static void multifd_send_terminate_threads(void) + * always set it. + */ + qatomic_set(&multifd_send_state->exiting, 1); ++ ++ /* ++ * Firstly, kick all threads out; no matter whether they are just idle, ++ * or blocked in an IO system call. ++ */ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -602,6 +607,21 @@ static void multifd_send_terminate_threads(void) + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } + } ++ ++ /* ++ * Finally recycle all the threads. ++ * ++ * TODO: p->running is still buggy, e.g. we can reach here without the ++ * corresponding multifd_new_send_channel_async() get invoked yet, ++ * then a new thread can even be created after this function returns. ++ */ ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDSendParams *p = &multifd_send_state->params[i]; ++ ++ if (p->running) { ++ qemu_thread_join(&p->thread); ++ } ++ } + } + + static int multifd_send_channel_destroy(QIOChannel *send) +@@ -609,6 +629,41 @@ static int multifd_send_channel_destroy(QIOChannel *send) + return socket_send_channel_destroy(send); + } + ++static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) ++{ ++ if (p->registered_yank) { ++ migration_ioc_unregister_yank(p->c); ++ } ++ multifd_send_channel_destroy(p->c); ++ p->c = NULL; ++ qemu_mutex_destroy(&p->mutex); ++ qemu_sem_destroy(&p->sem); ++ qemu_sem_destroy(&p->sem_sync); ++ g_free(p->name); ++ p->name = NULL; ++ multifd_pages_clear(p->pages); ++ p->pages = NULL; ++ p->packet_len = 0; ++ g_free(p->packet); ++ p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; ++ multifd_send_state->ops->send_cleanup(p, errp); ++ ++ return *errp == NULL; ++} ++ ++static void multifd_send_cleanup_state(void) ++{ ++ qemu_sem_destroy(&multifd_send_state->channels_ready); ++ g_free(multifd_send_state->params); ++ multifd_send_state->params = NULL; ++ multifd_pages_clear(multifd_send_state->pages); ++ multifd_send_state->pages = NULL; ++ g_free(multifd_send_state); ++ multifd_send_state = NULL; ++} ++ + void multifd_save_cleanup(void) + { + int i; +@@ -616,48 +671,20 @@ void multifd_save_cleanup(void) + if (!migrate_multifd()) { + return; + } ++ + multifd_send_terminate_threads(); +- for (i = 0; i < migrate_multifd_channels(); i++) { +- MultiFDSendParams *p = &multifd_send_state->params[i]; + +- if (p->running) { +- qemu_thread_join(&p->thread); +- } +- } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + Error *local_err = NULL; + +- if (p->registered_yank) { +- migration_ioc_unregister_yank(p->c); +- } +- multifd_send_channel_destroy(p->c); +- p->c = NULL; +- qemu_mutex_destroy(&p->mutex); +- qemu_sem_destroy(&p->sem); +- qemu_sem_destroy(&p->sem_sync); +- g_free(p->name); +- p->name = NULL; +- multifd_pages_clear(p->pages); +- p->pages = NULL; +- p->packet_len = 0; +- g_free(p->packet); +- p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; +- multifd_send_state->ops->send_cleanup(p, &local_err); +- if (local_err) { ++ if (!multifd_send_cleanup_channel(p, &local_err)) { + migrate_set_error(migrate_get_current(), local_err); + error_free(local_err); + } + } +- qemu_sem_destroy(&multifd_send_state->channels_ready); +- g_free(multifd_send_state->params); +- multifd_send_state->params = NULL; +- multifd_pages_clear(multifd_send_state->pages); +- multifd_send_state->pages = NULL; +- g_free(multifd_send_state); +- multifd_send_state = NULL; ++ ++ multifd_send_cleanup_state(); + } + + static int multifd_zero_copy_flush(QIOChannel *c) +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-outgoing_args-in-state-des.patch b/migration-multifd-Cleanup-outgoing_args-in-state-des.patch new file mode 100644 index 0000000000000000000000000000000000000000..7c6c197270d359cff3dd1af3f20bcd451d8272f8 --- /dev/null +++ b/migration-multifd-Cleanup-outgoing_args-in-state-des.patch @@ -0,0 +1,78 @@ +From 28700ce624e7972fc971d7524c5aa8de868d253d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:53:00 +0800 +Subject: [60/99] migration/multifd: Cleanup outgoing_args in state destroy + +commit 72b90b96872acc5d00f9c16dfc196543349361da upstream. + +outgoing_args is a global cache of socket address to be reused in multifd. +Freeing the cache in per-channel destructor is more or less a hack. Move +it to multifd_send_cleanup_state() so it only get checked once. Use a +small helper to do so because it's internal of socket.c. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-5-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 1 + + migration/socket.c | 12 ++++++++---- + migration/socket.h | 2 ++ + 3 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index a7289289a4..aa7b7e224e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -690,6 +690,7 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + + static void multifd_send_cleanup_state(void) + { ++ socket_cleanup_outgoing_migration(); + qemu_sem_destroy(&multifd_send_state->channels_created); + qemu_sem_destroy(&multifd_send_state->channels_ready); + g_free(multifd_send_state->params); +diff --git a/migration/socket.c b/migration/socket.c +index 98e3ea1514..3184c7c3c1 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -64,10 +64,6 @@ int socket_send_channel_destroy(QIOChannel *send) + { + /* Remove channel */ + object_unref(OBJECT(send)); +- if (outgoing_args.saddr) { +- qapi_free_SocketAddress(outgoing_args.saddr); +- outgoing_args.saddr = NULL; +- } + return 0; + } + +@@ -137,6 +133,14 @@ void socket_start_outgoing_migration(MigrationState *s, + NULL); + } + ++void socket_cleanup_outgoing_migration(void) ++{ ++ if (outgoing_args.saddr) { ++ qapi_free_SocketAddress(outgoing_args.saddr); ++ outgoing_args.saddr = NULL; ++ } ++} ++ + static void socket_accept_incoming_migration(QIONetListener *listener, + QIOChannelSocket *cioc, + gpointer opaque) +diff --git a/migration/socket.h b/migration/socket.h +index 5e4c33b8ea..5f52eddd4c 100644 +--- a/migration/socket.h ++++ b/migration/socket.h +@@ -29,4 +29,6 @@ void socket_start_incoming_migration(SocketAddress *saddr, Error **errp); + + void socket_start_outgoing_migration(MigrationState *s, + SocketAddress *saddr, Error **errp); ++void socket_cleanup_outgoing_migration(void); ++ + #endif +-- +2.33.0 + diff --git a/migration-multifd-Decouple-recv-method-from-pages.patch b/migration-multifd-Decouple-recv-method-from-pages.patch new file mode 100644 index 0000000000000000000000000000000000000000..99915b17981811d0e9e8bfeddf132369e6a7738e --- /dev/null +++ b/migration-multifd-Decouple-recv-method-from-pages.patch @@ -0,0 +1,157 @@ +From deca5474782611e8bacf0c3110897ddd204084e9 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:30:07 -0300 +Subject: [66/99] migration/multifd: Decouple recv method from pages + +commit 9db191251381c75e57201f7b07330ca982a55d1e upstream. + +Next patches will abstract the type of data being received by the +channels, so do some cleanup now to remove references to pages and +dependency on 'normal_num'. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-14-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 6 +++--- + migration/multifd-zstd.c | 6 +++--- + migration/multifd.c | 13 ++++++++----- + migration/multifd.h | 4 ++-- + 4 files changed, 16 insertions(+), 13 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 2a8f5fc9a6..6120faad65 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -234,7 +234,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + } + + /** +- * zlib_recv_pages: read the data from the channel into actual pages ++ * zlib_recv: read the data from the channel into actual pages + * + * Read the compressed buffer, and uncompress it into the actual + * pages. +@@ -244,7 +244,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +-static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) ++static int zlib_recv(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->compress_data; + z_stream *zs = &z->zs; +@@ -319,7 +319,7 @@ static MultiFDMethods multifd_zlib_ops = { + .send_prepare = zlib_send_prepare, + .recv_setup = zlib_recv_setup, + .recv_cleanup = zlib_recv_cleanup, +- .recv_pages = zlib_recv_pages ++ .recv = zlib_recv + }; + + static void multifd_zlib_register(void) +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 593cf290ad..cac236833d 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -232,7 +232,7 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) + } + + /** +- * zstd_recv_pages: read the data from the channel into actual pages ++ * zstd_recv: read the data from the channel into actual pages + * + * Read the compressed buffer, and uncompress it into the actual + * pages. +@@ -242,7 +242,7 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +-static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) ++static int zstd_recv(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +@@ -310,7 +310,7 @@ static MultiFDMethods multifd_zstd_ops = { + .send_prepare = zstd_send_prepare, + .recv_setup = zstd_recv_setup, + .recv_cleanup = zstd_recv_cleanup, +- .recv_pages = zstd_recv_pages ++ .recv = zstd_recv + }; + + static void multifd_zstd_register(void) +diff --git a/migration/multifd.c b/migration/multifd.c +index 429aad232b..d5039af833 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -198,7 +198,7 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + } + + /** +- * nocomp_recv_pages: read the data from the channel into actual pages ++ * nocomp_recv: read the data from the channel + * + * For no compression we just need to read things into the correct place. + * +@@ -207,7 +207,7 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +-static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) ++static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + +@@ -229,7 +229,7 @@ static MultiFDMethods multifd_nocomp_ops = { + .send_prepare = nocomp_send_prepare, + .recv_setup = nocomp_recv_setup, + .recv_cleanup = nocomp_recv_cleanup, +- .recv_pages = nocomp_recv_pages ++ .recv = nocomp_recv + }; + + static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = { +@@ -1231,6 +1231,8 @@ static void *multifd_recv_thread(void *opaque) + + while (true) { + uint32_t flags; ++ bool has_data = false; ++ p->normal_num = 0; + + if (multifd_recv_should_exit()) { + break; +@@ -1252,10 +1254,11 @@ static void *multifd_recv_thread(void *opaque) + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; ++ has_data = !!p->normal_num; + qemu_mutex_unlock(&p->mutex); + +- if (p->normal_num) { +- ret = multifd_recv_state->ops->recv_pages(p, &local_err); ++ if (has_data) { ++ ret = multifd_recv_state->ops->recv(p, &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index adccd3532f..6a54377cc1 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -197,8 +197,8 @@ typedef struct { + int (*recv_setup)(MultiFDRecvParams *p, Error **errp); + /* Cleanup for receiving side */ + void (*recv_cleanup)(MultiFDRecvParams *p); +- /* Read all pages */ +- int (*recv_pages)(MultiFDRecvParams *p, Error **errp); ++ /* Read all data */ ++ int (*recv)(MultiFDRecvParams *p, Error **errp); + } MultiFDMethods; + + void multifd_register_ops(int method, MultiFDMethods *ops); +-- +2.33.0 + diff --git a/migration-multifd-Drop-MultiFDSendParams.normal-arra.patch b/migration-multifd-Drop-MultiFDSendParams.normal-arra.patch new file mode 100644 index 0000000000000000000000000000000000000000..9acd02b1e43a38cabba42d9e4305be7a2e9d91d5 --- /dev/null +++ b/migration-multifd-Drop-MultiFDSendParams.normal-arra.patch @@ -0,0 +1,212 @@ +From 383f4cb78af723cf650841dc31862f9b0b612f4b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:39 +0800 +Subject: [29/99] migration/multifd: Drop MultiFDSendParams.normal[] array + +commit efd8c5439db7eaf00f35adc0fcc4f01d916e8619 upstream. + +This array is redundant when p->pages exists. Now we extended the life of +p->pages to the whole period where pending_job is set, it should be safe to +always use p->pages->offset[] rather than p->normal[]. Drop the array. + +Alongside, the normal_num is also redundant, which is the same to +p->pages->num. + +This doesn't apply to recv side, because there's no extra buffering on recv +side, so p->normal[] array is still needed. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-6-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 7 ++++--- + migration/multifd-zstd.c | 7 ++++--- + migration/multifd.c | 33 +++++++++++++-------------------- + migration/multifd.h | 4 ---- + 4 files changed, 21 insertions(+), 30 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 37ce48621e..100809abc1 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { ++ MultiFDPages_t *pages = p->pages; + struct zlib_data *z = p->data; + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; + uint32_t i; + +- for (i = 0; i < p->normal_num; i++) { ++ for (i = 0; i < pages->num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == p->normal_num - 1) { ++ if (i == pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +@@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + * with compression. zlib does not guarantee that this is safe, + * therefore copy the page before calling deflate(). + */ +- memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); ++ memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size); + zs->avail_in = p->page_size; + zs->next_in = z->buf; + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index b471daadcd..2023edd8cc 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { ++ MultiFDPages_t *pages = p->pages; + struct zstd_data *z = p->data; + int ret; + uint32_t i; +@@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < p->normal_num; i++) { ++ for (i = 0; i < pages->num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == p->normal_num - 1) { ++ if (i == pages->num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = p->pages->block->host + p->normal[i]; ++ z->in.src = p->pages->block->host + pages->offset[i]; + z->in.size = p->page_size; + z->in.pos = 0; + +diff --git a/migration/multifd.c b/migration/multifd.c +index fff119237a..bfafe94e1e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -91,13 +91,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { + MultiFDPages_t *pages = p->pages; + +- for (int i = 0; i < p->normal_num; i++) { +- p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; ++ for (int i = 0; i < pages->num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; + p->iov[p->iovs_num].iov_len = p->page_size; + p->iovs_num++; + } + +- p->next_packet_size = p->normal_num * p->page_size; ++ p->next_packet_size = pages->num * p->page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -270,21 +270,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + static void multifd_send_fill_packet(MultiFDSendParams *p) + { + MultiFDPacket_t *packet = p->packet; ++ MultiFDPages_t *pages = p->pages; + int i; + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->normal_pages = cpu_to_be32(p->normal_num); ++ packet->normal_pages = cpu_to_be32(pages->num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +- if (p->pages->block) { +- strncpy(packet->ramblock, p->pages->block->idstr, 256); ++ if (pages->block) { ++ strncpy(packet->ramblock, pages->block->idstr, 256); + } + +- for (i = 0; i < p->normal_num; i++) { ++ for (i = 0; i < pages->num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ +- uint64_t temp = p->normal[i]; ++ uint64_t temp = pages->offset[i]; + + packet->offset[i] = cpu_to_be64(temp); + } +@@ -571,8 +572,6 @@ void multifd_save_cleanup(void) + p->packet = NULL; + g_free(p->iov); + p->iov = NULL; +- g_free(p->normal); +- p->normal = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -692,8 +691,8 @@ static void *multifd_send_thread(void *opaque) + + if (p->pending_job) { + uint64_t packet_num = p->packet_num; ++ MultiFDPages_t *pages = p->pages; + uint32_t flags; +- p->normal_num = 0; + + if (use_zero_copy_send) { + p->iovs_num = 0; +@@ -701,12 +700,7 @@ static void *multifd_send_thread(void *opaque) + p->iovs_num = 1; + } + +- for (int i = 0; i < p->pages->num; i++) { +- p->normal[p->normal_num] = p->pages->offset[i]; +- p->normal_num++; +- } +- +- if (p->normal_num) { ++ if (pages->num) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); +@@ -717,10 +711,10 @@ static void *multifd_send_thread(void *opaque) + flags = p->flags; + p->flags = 0; + p->num_packets++; +- p->total_normal_pages += p->normal_num; ++ p->total_normal_pages += pages->num; + qemu_mutex_unlock(&p->mutex); + +- trace_multifd_send(p->id, packet_num, p->normal_num, flags, ++ trace_multifd_send(p->id, packet_num, pages->num, flags, + p->next_packet_size); + + if (use_zero_copy_send) { +@@ -928,7 +922,6 @@ int multifd_save_setup(Error **errp) + p->name = g_strdup_printf("multifdsend_%d", i); + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); +- p->normal = g_new0(ram_addr_t, page_count); + p->page_size = qemu_target_page_size(); + p->page_count = page_count; + +diff --git a/migration/multifd.h b/migration/multifd.h +index 7c040cb85a..3920bdbcf1 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -122,10 +122,6 @@ typedef struct { + struct iovec *iov; + /* number of iovs used */ + uint32_t iovs_num; +- /* Pages that are not zero */ +- ram_addr_t *normal; +- /* num of non zero pages */ +- uint32_t normal_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +-- +2.33.0 + diff --git a/migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch b/migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch new file mode 100644 index 0000000000000000000000000000000000000000..aa4fbeeff8cd2c6ccb637d7b9e0fa2b392b5ba43 --- /dev/null +++ b/migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch @@ -0,0 +1,251 @@ +From 046f864bba4035328269599e7d0e9de1b7a93932 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:37 +0800 +Subject: [27/99] migration/multifd: Drop MultiFDSendParams.quit, cleanup error + paths + +commit 15f3f21d598148895c33b6fc41e29777cf6ad992 upstream. + +Multifd send side has two fields to indicate error quits: + + - MultiFDSendParams.quit + - &multifd_send_state->exiting + +Merge them into the global one. The replacement is done by changing all +p->quit checks into the global var check. The global check doesn't need +any lock. + +A few more things done on top of this altogether: + + - multifd_send_terminate_threads() + + Moving the xchg() of &multifd_send_state->exiting upper, so as to cover + the tracepoint, migrate_set_error() and migrate_set_state(). + + - multifd_send_sync_main() + + In the 2nd loop, add one more check over the global var to make sure we + don't keep the looping if QEMU already decided to quit. + + - multifd_tls_outgoing_handshake() + + Use multifd_send_terminate_threads() to set the error state. That has + a benefit of updating MigrationState.error to that error too, so we can + persist that 1st error we hit in that specific channel. + + - multifd_new_send_channel_async() + + Take similar approach like above, drop the migrate_set_error() because + multifd_send_terminate_threads() already covers that. Unwrap the helper + multifd_new_send_channel_cleanup() along the way; not really needed. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-4-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 85 ++++++++++++++++++--------------------------- + migration/multifd.h | 2 -- + 2 files changed, 33 insertions(+), 54 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d2da6178b0..ea756b6eb8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -373,6 +373,11 @@ struct { + MultiFDMethods *ops; + } *multifd_send_state; + ++static bool multifd_send_should_exit(void) ++{ ++ return qatomic_read(&multifd_send_state->exiting); ++} ++ + /* + * The migration thread can wait on either of the two semaphores. This + * function can be used to kick the main thread out of waiting on either of +@@ -410,7 +415,7 @@ static int multifd_send_pages(void) + MultiFDSendParams *p = NULL; /* make happy gcc */ + MultiFDPages_t *pages = multifd_send_state->pages; + +- if (qatomic_read(&multifd_send_state->exiting)) { ++ if (multifd_send_should_exit()) { + return -1; + } + +@@ -422,14 +427,11 @@ static int multifd_send_pages(void) + */ + next_channel %= migrate_multifd_channels(); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { +- p = &multifd_send_state->params[i]; +- +- qemu_mutex_lock(&p->mutex); +- if (p->quit) { +- error_report("%s: channel %d has already quit!", __func__, i); +- qemu_mutex_unlock(&p->mutex); ++ if (multifd_send_should_exit()) { + return -1; + } ++ p = &multifd_send_state->params[i]; ++ qemu_mutex_lock(&p->mutex); + if (!p->pending_job) { + p->pending_job++; + next_channel = (i + 1) % migrate_multifd_channels(); +@@ -484,6 +486,16 @@ static void multifd_send_terminate_threads(Error *err) + { + int i; + ++ /* ++ * We don't want to exit each threads twice. Depending on where ++ * we get the error, or if there are two independent errors in two ++ * threads at the same time, we can end calling this function ++ * twice. ++ */ ++ if (qatomic_xchg(&multifd_send_state->exiting, 1)) { ++ return; ++ } ++ + trace_multifd_send_terminate_threads(err != NULL); + + if (err) { +@@ -498,26 +510,13 @@ static void multifd_send_terminate_threads(Error *err) + } + } + +- /* +- * We don't want to exit each threads twice. Depending on where +- * we get the error, or if there are two independent errors in two +- * threads at the same time, we can end calling this function +- * twice. +- */ +- if (qatomic_xchg(&multifd_send_state->exiting, 1)) { +- return; +- } +- + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +- qemu_mutex_lock(&p->mutex); +- p->quit = true; + qemu_sem_post(&p->sem); + if (p->c) { + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } +- qemu_mutex_unlock(&p->mutex); + } + } + +@@ -616,16 +615,13 @@ int multifd_send_sync_main(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +- trace_multifd_send_sync_main_signal(p->id); +- +- qemu_mutex_lock(&p->mutex); +- +- if (p->quit) { +- error_report("%s: channel %d has already quit", __func__, i); +- qemu_mutex_unlock(&p->mutex); ++ if (multifd_send_should_exit()) { + return -1; + } + ++ trace_multifd_send_sync_main_signal(p->id); ++ ++ qemu_mutex_lock(&p->mutex); + p->packet_num = multifd_send_state->packet_num++; + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; +@@ -635,6 +631,10 @@ int multifd_send_sync_main(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ if (multifd_send_should_exit()) { ++ return -1; ++ } ++ + qemu_sem_wait(&multifd_send_state->channels_ready); + trace_multifd_send_sync_main_wait(p->id); + qemu_sem_wait(&p->sem_sync); +@@ -675,7 +675,7 @@ static void *multifd_send_thread(void *opaque) + qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_wait(&p->sem); + +- if (qatomic_read(&multifd_send_state->exiting)) { ++ if (multifd_send_should_exit()) { + break; + } + qemu_mutex_lock(&p->mutex); +@@ -790,12 +790,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + +- migrate_set_error(migrate_get_current(), err); +- /* +- * Error happen, mark multifd_send_thread status as 'quit' although it +- * is not created, and then tell who pay attention to me. +- */ +- p->quit = true; ++ multifd_send_terminate_threads(err); + multifd_send_kick_main(p); + error_free(err); + } +@@ -861,22 +856,6 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + return true; + } + +-static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, +- QIOChannel *ioc, Error *err) +-{ +- migrate_set_error(migrate_get_current(), err); +- /* Error happen, we need to tell who pay attention to me */ +- multifd_send_kick_main(p); +- /* +- * Although multifd_send_thread is not created, but main migration +- * thread need to judge whether it is running, so we need to mark +- * its status. +- */ +- p->quit = true; +- object_unref(OBJECT(ioc)); +- error_free(err); +-} +- + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + { + MultiFDSendParams *p = opaque; +@@ -893,7 +872,10 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + } + + trace_multifd_new_send_channel_async_error(p->id, local_err); +- multifd_new_send_channel_cleanup(p, ioc, local_err); ++ multifd_send_terminate_threads(local_err); ++ multifd_send_kick_main(p); ++ object_unref(OBJECT(ioc)); ++ error_free(local_err); + } + + static void multifd_new_send_channel_create(gpointer opaque) +@@ -925,7 +907,6 @@ int multifd_save_setup(Error **errp) + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); +- p->quit = false; + p->pending_job = 0; + p->id = i; + p->pages = multifd_pages_init(page_count); +diff --git a/migration/multifd.h b/migration/multifd.h +index 35d11f103c..7c040cb85a 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -95,8 +95,6 @@ typedef struct { + QemuMutex mutex; + /* is this channel thread running */ + bool running; +- /* should this thread finish */ +- bool quit; + /* multifd flags for each packet */ + uint32_t flags; + /* global number of generated multifd packets */ +-- +2.33.0 + diff --git a/migration-multifd-Drop-pages-num-check-in-sender-thr.patch b/migration-multifd-Drop-pages-num-check-in-sender-thr.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc97d2ee5b4ad5491ef2db6f771bf33e3bf8d72b --- /dev/null +++ b/migration-multifd-Drop-pages-num-check-in-sender-thr.patch @@ -0,0 +1,46 @@ +From b24853b2f5524d988406732fc22c3fe9253de104 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:42 +0800 +Subject: [32/99] migration/multifd: Drop pages->num check in sender thread + +commit 83c560fb4249ee5698652249e0c1730c3d611a9b upstream. + +Now with a split SYNC handler, we always have pages->num set for +pending_job==true. Assert it instead. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-9-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cef4a88237..a67917b113 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -714,13 +714,14 @@ static void *multifd_send_thread(void *opaque) + p->iovs_num = 1; + } + +- if (pages->num) { +- ret = multifd_send_state->ops->send_prepare(p, &local_err); +- if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); +- break; +- } ++ assert(pages->num); ++ ++ ret = multifd_send_state->ops->send_prepare(p, &local_err); ++ if (ret != 0) { ++ qemu_mutex_unlock(&p->mutex); ++ break; + } ++ + multifd_send_fill_packet(p); + p->num_packets++; + p->total_normal_pages += pages->num; +-- +2.33.0 + diff --git a/migration-multifd-Drop-registered_yank.patch b/migration-multifd-Drop-registered_yank.patch new file mode 100644 index 0000000000000000000000000000000000000000..fba2ec03aeb3d2af06b820041ee8b88fdc8cad7b --- /dev/null +++ b/migration-multifd-Drop-registered_yank.patch @@ -0,0 +1,65 @@ +From 103fe08122ba65282660932a5e342a282a4b3e1c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:52:58 +0800 +Subject: [58/99] migration/multifd: Drop registered_yank + +commit 0518b5d8d30d3a4d0ea4f45d61527bcdc43044d2 upstream. + +With a clear definition of p->c protocol, where we only set it up if the +channel is fully established (TLS or non-TLS), registered_yank boolean will +have equal meaning of "p->c != NULL". + +Drop registered_yank by checking p->c instead. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-3-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 7 +++---- + migration/multifd.h | 2 -- + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index ad8fa6a317..3e85bc544a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -667,11 +667,11 @@ static int multifd_send_channel_destroy(QIOChannel *send) + + static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + { +- if (p->registered_yank) { ++ if (p->c) { + migration_ioc_unregister_yank(p->c); ++ multifd_send_channel_destroy(p->c); ++ p->c = NULL; + } +- multifd_send_channel_destroy(p->c); +- p->c = NULL; + qemu_sem_destroy(&p->sem); + qemu_sem_destroy(&p->sem_sync); + g_free(p->name); +@@ -954,7 +954,6 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + qio_channel_set_delay(ioc, false); + + migration_ioc_register_yank(ioc); +- p->registered_yank = true; + /* Setup p->c only if the channel is completely setup */ + p->c = ioc; + +diff --git a/migration/multifd.h b/migration/multifd.h +index 8a1cad0996..b3fe27ae93 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -78,8 +78,6 @@ typedef struct { + bool tls_thread_created; + /* communication channel */ + QIOChannel *c; +- /* is the yank function registered */ +- bool registered_yank; + /* packet allocated len */ + uint32_t packet_len; + /* guest page size */ +-- +2.33.0 + diff --git a/migration-multifd-Drop-stale-comment-for-multifd-zer.patch b/migration-multifd-Drop-stale-comment-for-multifd-zer.patch new file mode 100644 index 0000000000000000000000000000000000000000..c8d2dc060ce6b823eb33b2adb4f71fd8d1f28ad9 --- /dev/null +++ b/migration-multifd-Drop-stale-comment-for-multifd-zer.patch @@ -0,0 +1,43 @@ +From c454cdf2eef413af1c5ca04524e15dffdfc90a58 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:35 +0800 +Subject: [25/99] migration/multifd: Drop stale comment for multifd zero copy + +commit 8888a552bf7af200e36ff123772547dfb4f133c4 upstream. + +We've already done that with multifd_flush_after_each_section, for multifd +in general. Drop the stale "TODO-like" comment. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-2-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 11 ----------- + 1 file changed, 11 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index ef7d4520c4..07e7e78029 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -599,17 +599,6 @@ int multifd_send_sync_main(void) + } + } + +- /* +- * When using zero-copy, it's necessary to flush the pages before any of +- * the pages can be sent again, so we'll make sure the new version of the +- * pages will always arrive _later_ than the old pages. +- * +- * Currently we achieve this by flushing the zero-page requested writes +- * per ram iteration, but in the future we could potentially optimize it +- * to be less frequent, e.g. only after we finished one whole scanning of +- * all the dirty bitmaps. +- */ +- + flush_zero_copy = migrate_zero_copy_send(); + + for (i = 0; i < migrate_multifd_channels(); i++) { +-- +2.33.0 + diff --git a/migration-multifd-Drop-unnecessary-helper-to-destroy.patch b/migration-multifd-Drop-unnecessary-helper-to-destroy.patch new file mode 100644 index 0000000000000000000000000000000000000000..2ac32ca698d30cc0cc741aaa372985d998f19992 --- /dev/null +++ b/migration-multifd-Drop-unnecessary-helper-to-destroy.patch @@ -0,0 +1,77 @@ +From 0700d5acc4e51e949cc6d34a9bbb504a2803a127 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:53:01 +0800 +Subject: [61/99] migration/multifd: Drop unnecessary helper to destroy IOC + +commit c9a7e83c9d64fd5ebc759186789e1b753c919d32 upstream. + +Both socket_send_channel_destroy() and multifd_send_channel_destroy() are +unnecessary wrappers to destroy an IOC, as the only thing to do is to +release the final IOC reference. We have plenty of code that destroys an +IOC using direct unref() already; keep that style. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-6-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 7 +------ + migration/socket.c | 7 ------- + migration/socket.h | 1 - + 3 files changed, 1 insertion(+), 14 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index aa7b7e224e..9e3955cb8c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -660,16 +660,11 @@ static void multifd_send_terminate_threads(void) + } + } + +-static int multifd_send_channel_destroy(QIOChannel *send) +-{ +- return socket_send_channel_destroy(send); +-} +- + static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + { + if (p->c) { + migration_ioc_unregister_yank(p->c); +- multifd_send_channel_destroy(p->c); ++ object_unref(OBJECT(p->c)); + p->c = NULL; + } + qemu_sem_destroy(&p->sem); +diff --git a/migration/socket.c b/migration/socket.c +index 3184c7c3c1..9ab89b1e08 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -60,13 +60,6 @@ QIOChannel *socket_send_channel_create_sync(Error **errp) + return QIO_CHANNEL(sioc); + } + +-int socket_send_channel_destroy(QIOChannel *send) +-{ +- /* Remove channel */ +- object_unref(OBJECT(send)); +- return 0; +-} +- + struct SocketConnectData { + MigrationState *s; + char *hostname; +diff --git a/migration/socket.h b/migration/socket.h +index 5f52eddd4c..46c233ecd2 100644 +--- a/migration/socket.h ++++ b/migration/socket.h +@@ -23,7 +23,6 @@ + + void socket_send_channel_create(QIOTaskFunc f, void *data); + QIOChannel *socket_send_channel_create_sync(Error **errp); +-int socket_send_channel_destroy(QIOChannel *send); + + void socket_start_incoming_migration(SocketAddress *saddr, Error **errp); + +-- +2.33.0 + diff --git a/migration-multifd-Ensure-packet-ramblock-is-null-ter.patch b/migration-multifd-Ensure-packet-ramblock-is-null-ter.patch new file mode 100644 index 0000000000000000000000000000000000000000..89c1d655e66104cd24fc3d384595058f9a47550c --- /dev/null +++ b/migration-multifd-Ensure-packet-ramblock-is-null-ter.patch @@ -0,0 +1,69 @@ +From 82b23ca67d0a5d77cb0266b89f76b9c8c4bffb3d Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 19 Sep 2024 12:06:11 -0300 +Subject: [95/99] migration/multifd: Ensure packet->ramblock is null-terminated + +commit 68e0fca625912c7c63a8bfbc784f53d4fefa1a13 upstream. + +Coverity points out that the current usage of strncpy to write the +ramblock name allows the field to not have an ending '\0' in case +idstr is already not null-terminated (e.g. if it's larger than 256 +bytes). + +This is currently harmless because the packet->ramblock field is never +touched again on the source side. The destination side reads only up +to the field's size from the stream and forces the last byte to be 0. + +We're still open to a programming error in the future in case this +field is ever passed into a function that expects a null-terminated +string. + +Change from strncpy to QEMU's pstrcpy, which puts a '\0' at the end of +the string and doesn't fill the extra space with zeros. + +(there's no spillage between iterations of fill_packet because after +commit 87bb9e953e ("migration/multifd: Isolate ram pages packet data") +the packet is always zeroed before filling) + +Resolves: Coverity CID 1560071 +Reported-by: Peter Maydell +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240919150611.17074-1-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd-nocomp.c +[jz: upstream has split nocomp code into multifd-nocomp.c, while + openEuler hasn't yet. The function that needs to be fixed is + still named multifd_send_fill_packet in multifd.c, so we fix + it in multifd.c] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 0fcecc3759..3761a803ed 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -16,6 +16,7 @@ + #include "exec/target_page.h" + #include "sysemu/sysemu.h" + #include "exec/ramblock.h" ++#include "qemu/cutils.h" + #include "qemu/error-report.h" + #include "qapi/error.h" + #include "qapi/qapi-events-migration.h" +@@ -400,7 +401,8 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + packet->packet_num = cpu_to_be64(packet_num); + + if (pages->block) { +- strncpy(packet->ramblock, pages->block->idstr, 256); ++ pstrcpy(packet->ramblock, sizeof(packet->ramblock), ++ pages->block->idstr); + } + + for (i = 0; i < pages->num; i++) { +-- +2.33.0 + diff --git a/migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch b/migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc7f5fb6eed06794bc3488ff9aa754f2a617b8f3 --- /dev/null +++ b/migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch @@ -0,0 +1,167 @@ +From cafe218b15706cf78c3790eaa08497c09d78c7b4 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:56 +0800 +Subject: [46/99] migration/multifd: Fix MultiFDSendParams.packet_num race + +commit 98ea497d8b8a5076be7b6ceb0dcc4a475373eb76 upstream. + +As reported correctly by Fabiano [1] (while per Fabiano, it sourced back to +Elena's initial report in Oct 2023), MultiFDSendParams.packet_num is buggy +to be assigned and stored. Consider two consequent operations of: (1) +queue a job into multifd send thread X, then (2) queue another sync request +to the same send thread X. Then the MultiFDSendParams.packet_num will be +assigned twice, and the first assignment can get lost already. + +To avoid that, we move the packet_num assignment from p->packet_num into +where the thread will fill in the packet. Use atomic operations to protect +the field, making sure there's no race. + +Note that atomic fetch_add() may not be good for scaling purposes, however +multifd should be fine as number of threads should normally not go beyond +16 threads. Let's leave that concern for later but fix the issue first. + +There's also a trick on how to make it always work even on 32 bit hosts for +uint64_t packet number. Switching to uintptr_t as of now to simply the +case. It will cause packet number to overflow easier on 32 bit, but that +shouldn't be a major concern for now as 32 bit systems is not the major +audience for any performance concerns like what multifd wants to address. + +We also need to move multifd_send_state definition upper, so that +multifd_send_fill_packet() can reference it. + +[1] https://lore.kernel.org/r/87o7d1jlu5.fsf@suse.de + +Reported-by: Elena Ufimtseva +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-23-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 56 +++++++++++++++++++++++++++------------------ + migration/multifd.h | 2 -- + 2 files changed, 34 insertions(+), 24 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 723b1d0b35..c52c18046a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -46,6 +46,35 @@ typedef struct { + uint64_t unused2[4]; /* Reserved for future use */ + } __attribute__((packed)) MultiFDInit_t; + ++struct { ++ MultiFDSendParams *params; ++ /* array of pages to sent */ ++ MultiFDPages_t *pages; ++ /* ++ * Global number of generated multifd packets. ++ * ++ * Note that we used 'uintptr_t' because it'll naturally support atomic ++ * operations on both 32bit / 64 bits hosts. It means on 32bit systems ++ * multifd will overflow the packet_num easier, but that should be ++ * fine. ++ * ++ * Another option is to use QEMU's Stat64 then it'll be 64 bits on all ++ * hosts, however so far it does not support atomic fetch_add() yet. ++ * Make it easy for now. ++ */ ++ uintptr_t packet_num; ++ /* send channels ready */ ++ QemuSemaphore channels_ready; ++ /* ++ * Have we already run terminate threads. There is a race when it ++ * happens that we got one error while we are exiting. ++ * We will use atomic operations. Only valid values are 0 and 1. ++ */ ++ int exiting; ++ /* multifd ops */ ++ MultiFDMethods *ops; ++} *multifd_send_state; ++ + /* Multifd without compression */ + + /** +@@ -293,13 +322,16 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + { + MultiFDPacket_t *packet = p->packet; + MultiFDPages_t *pages = p->pages; ++ uint64_t packet_num; + int i; + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); + packet->normal_pages = cpu_to_be32(pages->num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); +- packet->packet_num = cpu_to_be64(p->packet_num); ++ ++ packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num); ++ packet->packet_num = cpu_to_be64(packet_num); + + if (pages->block) { + strncpy(packet->ramblock, pages->block->idstr, 256); +@@ -315,7 +347,7 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + p->packets_sent++; + p->total_normal_pages += pages->num; + +- trace_multifd_send(p->id, p->packet_num, pages->num, p->flags, ++ trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + } + +@@ -399,24 +431,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return 0; + } + +-struct { +- MultiFDSendParams *params; +- /* array of pages to sent */ +- MultiFDPages_t *pages; +- /* global number of generated multifd packets */ +- uint64_t packet_num; +- /* send channels ready */ +- QemuSemaphore channels_ready; +- /* +- * Have we already run terminate threads. There is a race when it +- * happens that we got one error while we are exiting. +- * We will use atomic operations. Only valid values are 0 and 1. +- */ +- int exiting; +- /* multifd ops */ +- MultiFDMethods *ops; +-} *multifd_send_state; +- + static bool multifd_send_should_exit(void) + { + return qatomic_read(&multifd_send_state->exiting); +@@ -498,7 +512,6 @@ static bool multifd_send_pages(void) + */ + assert(qatomic_read(&p->pending_job) == false); + qatomic_set(&p->pending_job, true); +- p->packet_num = multifd_send_state->packet_num++; + multifd_send_state->pages = p->pages; + p->pages = pages; + qemu_mutex_unlock(&p->mutex); +@@ -731,7 +744,6 @@ int multifd_send_sync_main(void) + trace_multifd_send_sync_main_signal(p->id); + + qemu_mutex_lock(&p->mutex); +- p->packet_num = multifd_send_state->packet_num++; + /* + * We should be the only user so far, so not possible to be set by + * others concurrently. +diff --git a/migration/multifd.h b/migration/multifd.h +index 9b40a53cb6..98876ff94a 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -97,8 +97,6 @@ typedef struct { + bool running; + /* multifd flags for each packet */ + uint32_t flags; +- /* global number of generated multifd packets */ +- uint64_t packet_num; + /* + * The sender thread has work to do if either of below boolean is set. + * +-- +2.33.0 + diff --git a/migration-multifd-Fix-error-message-in-multifd_recv_.patch b/migration-multifd-Fix-error-message-in-multifd_recv_.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2764878df8f1b39e8b7a6c28c68994494ba69db --- /dev/null +++ b/migration-multifd-Fix-error-message-in-multifd_recv_.patch @@ -0,0 +1,41 @@ +From 122a0daf78f540bb3595432acc33a749cc6ca5a4 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:10 +0200 +Subject: [01/99] migration/multifd: Fix error message in + multifd_recv_initial_packet() + +commit c77b40859a5201f01b44dc475258405e289c431f upstream. + +In multifd_recv_initial_packet(), if MultiFDInit_t->id is greater than +the configured number of multifd channels, an irrelevant error message +about multifd version is printed. + +Change the error message to a relevant one about the channel id. + +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20231231093016.14204-6-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f3bf6888c0..055b2688ad 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -229,8 +229,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.id > migrate_multifd_channels()) { +- error_setg(errp, "multifd: received channel version %u " +- "expected %u", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received channel id %u is greater than " ++ "number of channels %u", msg.id, migrate_multifd_channels()); + return -1; + } + +-- +2.33.0 + diff --git a/migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch b/migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch new file mode 100644 index 0000000000000000000000000000000000000000..93d7d800dc72d1c99aa2ef386ad672139704f97e --- /dev/null +++ b/migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch @@ -0,0 +1,49 @@ +From 313207b5d51f530b45f106addcf489845f32b449 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:12 +0200 +Subject: [03/99] migration/multifd: Fix leaking of Error in TLS error flow + +commit 6ae208ce9656114e428b1a75ac62a6761ed3216c upstream. + +If there is an error in multifd TLS handshake task, +multifd_tls_outgoing_handshake() retrieves the error with +qio_task_propagate_error() but never frees it. + +Fix it by freeing the obtained Error. + +In addition, the error is not reported at all, so report it with +migrate_set_error(). + +Fixes: 29647140157a ("migration/tls: add support for multifd tls-handshake") +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20231231093016.14204-8-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 06585f0141..8221ebe4b6 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -791,6 +791,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + ++ migrate_set_error(migrate_get_current(), err); + /* + * Error happen, mark multifd_send_thread status as 'quit' although it + * is not created, and then tell who pay attention to me. +@@ -798,6 +799,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + p->quit = true; + qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_post(&p->sem_sync); ++ error_free(err); + } + + static void *multifd_tls_handshake_thread(void *opaque) +-- +2.33.0 + diff --git a/migration-multifd-Fix-loop-conditions-in-multifd_zst.patch b/migration-multifd-Fix-loop-conditions-in-multifd_zst.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e6a00782fc92a205f0bc6ce37715fb9e73c73b3 --- /dev/null +++ b/migration-multifd-Fix-loop-conditions-in-multifd_zst.patch @@ -0,0 +1,57 @@ +From 75ab1fea57e8925efd8a3bef827d0c0f0cdd1fa2 Mon Sep 17 00:00:00 2001 +From: Stefan Weil +Date: Tue, 10 Sep 2024 07:41:38 +0200 +Subject: [93/99] migration/multifd: Fix loop conditions in + multifd_zstd_send_prepare and multifd_zstd_recv + +commit cb0ed522a51a7d4b1fde535972d4aeeb82447928 upstream. + +GitHub's CodeQL reports four critical errors which are fixed by this commit: + + Unsigned difference expression compared to zero + +An expression (u - v > 0) with unsigned values u, v is only false if u == v, +so all changed expressions did not work as expected. + +Signed-off-by: Stefan Weil +Link: https://lore.kernel.org/r/20240910054138.1458555-1-sw@weilnetz.de +[peterx: Fix mangled email for author] +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd-zstd.c +[jz: resolve context conflict due to p->page which not renamed to page yet] +Signed-off-by: Jason Zeng +--- + migration/multifd-zstd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index ca17b7e310..46ee68b6ce 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -152,9 +152,9 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + */ + do { + ret = ZSTD_compressStream2(z->zcs, &z->out, &z->in, flush); +- } while (ret > 0 && (z->in.size - z->in.pos > 0) +- && (z->out.size - z->out.pos > 0)); +- if (ret > 0 && (z->in.size - z->in.pos > 0)) { ++ } while (ret > 0 && (z->in.size > z->in.pos) ++ && (z->out.size > z->out.pos)); ++ if (ret > 0 && (z->in.size > z->in.pos)) { + error_setg(errp, "multifd %u: compressStream buffer too small", + p->id); + return -1; +@@ -299,7 +299,7 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) + */ + do { + ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); +- } while (ret > 0 && (z->in.size - z->in.pos > 0) ++ } while (ret > 0 && (z->in.size > z->in.pos) + && (z->out.pos < p->page_size)); + if (ret > 0 && (z->out.pos < p->page_size)) { + error_setg(errp, "multifd %u: decompressStream buffer too small", +-- +2.33.0 + diff --git a/migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch b/migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..8234b59c049a49e85bcdabb55ebc5d96ef001c00 --- /dev/null +++ b/migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch @@ -0,0 +1,36 @@ +From 85507465a9de3d745204ad86c4cd4a6a7b5004b1 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Wed, 28 Aug 2024 11:56:48 -0300 +Subject: [87/99] migration/multifd: Fix p->iov leak in multifd-uadk.c + +commit 405e352d28c24991cacfdebccf67d56c4795cf6e upstream. + +The send_cleanup() hook should free the p->iov that was allocated at +send_setup(). This was missed because the UADK code is conditional on +the presence of the accelerator, so it's not tested by default. + +Fixes: 819dd20636 ("migration/multifd: Add UADK initialization") +Reported-by: Peter Xu +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index d12353fb21..9a582fc919 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -146,6 +146,8 @@ static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) + + multifd_uadk_uninit_sess(wd); + p->compress_data = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + } + + static inline void prepare_next_iov(MultiFDSendParams *p, void *base, +-- +2.33.0 + diff --git a/migration-multifd-Fix-rb-receivedmap-cleanup-race.patch b/migration-multifd-Fix-rb-receivedmap-cleanup-race.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b251a344a36384ff05a7a1f6181d11a540f5281 --- /dev/null +++ b/migration-multifd-Fix-rb-receivedmap-cleanup-race.patch @@ -0,0 +1,95 @@ +From a15e40dc17b96c431ad4c71377a3a66e57a00dab Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 17 Sep 2024 15:58:02 -0300 +Subject: [94/99] migration/multifd: Fix rb->receivedmap cleanup race + +commit 4ce56229087860805877075ddb29dd44578365a9 upstream. + +Fix a segmentation fault in multifd when rb->receivedmap is cleared +too early. + +After commit 5ef7e26bdb ("migration/multifd: solve zero page causing +multiple page faults"), multifd started using the rb->receivedmap +bitmap, which belongs to ram.c and is initialized and *freed* from the +ram SaveVMHandlers. + +Multifd threads are live until migration_incoming_state_destroy(), +which is called after qemu_loadvm_state_cleanup(), leading to a crash +when accessing rb->receivedmap. + +process_incoming_migration_co() ... + qemu_loadvm_state() multifd_nocomp_recv() + qemu_loadvm_state_cleanup() ramblock_recv_bitmap_set_offset() + rb->receivedmap = NULL set_bit_atomic(..., rb->receivedmap) + ... + migration_incoming_state_destroy() + multifd_recv_cleanup() + multifd_recv_terminate_threads(NULL) + +Move the loadvm cleanup into migration_incoming_state_destroy(), after +multifd_recv_cleanup() to ensure multifd threads have already exited +when rb->receivedmap is cleared. + +Adjust the postcopy listen thread comment to indicate that we still +want to skip the cpu synchronization. + +CC: qemu-stable@nongnu.org +Fixes: 5ef7e26bdb ("migration/multifd: solve zero page causing multiple page faults") +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240917185802.15619-3-farosas@suse.de +[peterx: added comment in migration_incoming_state_destroy()] +Signed-off-by: Peter Xu + + Conflicts: + migration/migration.c +[jz: resolve context conflict due to non-multifd compression which is + already deleted in upstream while still in openEuler] +Signed-off-by: Jason Zeng +--- + migration/migration.c | 5 +++++ + migration/savevm.c | 6 ++++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 59c0bbee67..107e106b73 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -276,6 +276,11 @@ void migration_incoming_state_destroy(void) + + multifd_recv_cleanup(); + compress_threads_load_cleanup(); ++ /* ++ * RAM state cleanup needs to happen after multifd cleanup, because ++ * multifd threads can use some of its states (receivedmap). ++ */ ++ qemu_loadvm_state_cleanup(); + + if (mis->to_src_file) { + /* Tell source that we are done */ +diff --git a/migration/savevm.c b/migration/savevm.c +index cc65da605e..29389068df 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2959,7 +2959,10 @@ int qemu_loadvm_state(QEMUFile *f) + trace_qemu_loadvm_state_post_main(ret); + + if (mis->have_listen_thread) { +- /* Listen thread still going, can't clean up yet */ ++ /* ++ * Postcopy listen thread still going, don't synchronize the ++ * cpus yet. ++ */ + return ret; + } + +@@ -3002,7 +3005,6 @@ int qemu_loadvm_state(QEMUFile *f) + } + } + +- qemu_loadvm_state_cleanup(); + cpu_synchronize_all_post_init(); + + return ret; +-- +2.33.0 + diff --git a/migration-multifd-Forbid-spurious-wakeups.patch b/migration-multifd-Forbid-spurious-wakeups.patch new file mode 100644 index 0000000000000000000000000000000000000000..82d7cb7c0689fe221200abb8342eea49c4a2970d --- /dev/null +++ b/migration-multifd-Forbid-spurious-wakeups.patch @@ -0,0 +1,51 @@ +From d4f46c41e0dd921563614ad48e7099eeac06d285 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:48 +0800 +Subject: [38/99] migration/multifd: Forbid spurious wakeups + +commit 859ebaf346e8b5dece6cf255c604fe953d8ec9ab upstream. + +Now multifd's logic is designed to have no spurious wakeup. I still +remember a talk to Juan and he seems to agree we should drop it now, and if +my memory was right it was there because multifd used to hit that when +still debugging. + +Let's drop it and see what can explode; as long as it's not reaching +soft-freeze. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-15-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d4528cf9d1..3b7984cf99 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -760,7 +760,9 @@ static void *multifd_send_thread(void *opaque) + p->next_packet_size = 0; + qatomic_set(&p->pending_job, false); + qemu_mutex_unlock(&p->mutex); +- } else if (qatomic_read(&p->pending_sync)) { ++ } else { ++ /* If not a normal job, must be a sync request */ ++ assert(qatomic_read(&p->pending_sync)); + p->flags = MULTIFD_FLAG_SYNC; + multifd_send_fill_packet(p); + ret = qio_channel_write_all(p->c, (void *)p->packet, +@@ -775,9 +777,6 @@ static void *multifd_send_thread(void *opaque) + qatomic_set(&p->pending_sync, false); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem_sync); +- } else { +- qemu_mutex_unlock(&p->mutex); +- /* sometimes there are spurious wakeups */ + } + } + +-- +2.33.0 + diff --git a/migration-multifd-Implement-ram_save_target_page_mul.patch b/migration-multifd-Implement-ram_save_target_page_mul.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb771c10445b87feab17e414e1b01fa95c713a64 --- /dev/null +++ b/migration-multifd-Implement-ram_save_target_page_mul.patch @@ -0,0 +1,94 @@ +From 5107700317e5cba24822f71615a001a8a62fea07 Mon Sep 17 00:00:00 2001 +From: Hao Xiang +Date: Mon, 11 Mar 2024 18:00:13 +0000 +Subject: [70/99] migration/multifd: Implement ram_save_target_page_multifd to + handle multifd version of MigrationOps::ram_save_target_page. + +commit 9ae90f73e623c8b8c7ec1fccd8ca493805df8fbd upstream. + +1. Add a dedicated handler for MigrationOps::ram_save_target_page in +multifd live migration. +2. Refactor ram_save_target_page_legacy so that the legacy and multifd +handlers don't have internal functions calling into each other. + +Signed-off-by: Hao Xiang +Reviewed-by: Fabiano Rosas +Message-Id: <20240226195654.934709-4-hao.xiang@bytedance.com> +Link: https://lore.kernel.org/r/20240311180015.3359271-6-hao.xiang@linux.dev +Signed-off-by: Peter Xu +[jz: resolve context conflict due to BQL name] +Signed-off-by: Jason Zeng +--- + migration/ram.c | 38 +++++++++++++++++++++++++++++--------- + 1 file changed, 29 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index bae5853996..fe2e4c6164 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2233,7 +2233,6 @@ static bool encrypted_test_list(RAMState *rs, RAMBlock *block, + */ + static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + { +- RAMBlock *block = pss->block; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; + int res; + +@@ -2260,17 +2259,33 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + return 1; + } + ++ return ram_save_page(rs, pss); ++} ++ ++/** ++ * ram_save_target_page_multifd: send one target page to multifd workers ++ * ++ * Returns 1 if the page was queued, -1 otherwise. ++ * ++ * @rs: current RAM state ++ * @pss: data about the page we want to send ++ */ ++static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss) ++{ ++ RAMBlock *block = pss->block; ++ ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; ++ + /* +- * Do not use multifd in postcopy as one whole host page should be +- * placed. Meanwhile postcopy requires atomic update of pages, so even +- * if host page size == guest page size the dest guest during run may +- * still see partially copied pages which is data corruption. ++ * While using multifd live migration, we still need to handle zero ++ * page checking on the migration main thread. + */ +- if (migrate_multifd() && !migration_in_postcopy()) { +- return ram_save_multifd_page(block, offset); ++ if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) { ++ if (save_zero_page(rs, pss, offset)) { ++ return 1; ++ } + } + +- return ram_save_page(rs, pss); ++ return ram_save_multifd_page(block, offset); + } + + /* Should be called before sending a host page */ +@@ -3433,7 +3448,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + } + + migration_ops = g_malloc0(sizeof(MigrationOps)); +- migration_ops->ram_save_target_page = ram_save_target_page_legacy; ++ ++ if (migrate_multifd()) { ++ migration_ops->ram_save_target_page = ram_save_target_page_multifd; ++ } else { ++ migration_ops->ram_save_target_page = ram_save_target_page_legacy; ++ } + + qemu_mutex_unlock_iothread(); + ret = multifd_send_sync_main(); +-- +2.33.0 + diff --git a/migration-multifd-Implement-zero-page-transmission-o.patch b/migration-multifd-Implement-zero-page-transmission-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..85b976c695bd45e6adb4359dadbc5d6a9064914b --- /dev/null +++ b/migration-multifd-Implement-zero-page-transmission-o.patch @@ -0,0 +1,622 @@ +From 68f37655bf414e74c623164c9c20bc7884ee5bb8 Mon Sep 17 00:00:00 2001 +From: Hao Xiang +Date: Mon, 11 Mar 2024 18:00:12 +0000 +Subject: [69/99] migration/multifd: Implement zero page transmission on the + multifd thread. + +commit 303e6f54f9657be76ee060006ee2d4cacff263a0 upstream. + +1. Add zero_pages field in MultiFDPacket_t. +2. Implements the zero page detection and handling on the multifd +threads for non-compression, zlib and zstd compression backends. +3. Added a new value 'multifd' in ZeroPageDetection enumeration. +4. Adds zero page counters and updates multifd send/receive tracing +format to track the newly added counters. + +Signed-off-by: Hao Xiang +Acked-by: Markus Armbruster +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240311180015.3359271-5-hao.xiang@linux.dev +Signed-off-by: Peter Xu + + Conflicts: + migration/meson.build + migration/multifd.c +[jz: there is no multifd_set_file_bitmap() because we didn't backport + mapped-ram, so abandon changes in multifd_set_file_bitmap()] +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-zero-page.c | 87 ++++++++++++++++++++++++++++++++ + migration/multifd-zlib.c | 21 ++++++-- + migration/multifd-zstd.c | 20 ++++++-- + migration/multifd.c | 83 +++++++++++++++++++++++++----- + migration/multifd.h | 23 ++++++++- + migration/ram.c | 1 - + migration/trace-events | 8 +-- + qapi/migration.json | 7 ++- + 10 files changed, 222 insertions(+), 31 deletions(-) + create mode 100644 migration/multifd-zero-page.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index cad1e04150..b3b9238b65 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -735,7 +735,7 @@ const PropertyInfo qdev_prop_mig_mode = { + const PropertyInfo qdev_prop_zero_page_detection = { + .name = "ZeroPageDetection", + .description = "zero_page_detection values, " +- "none,legacy", ++ "none,legacy,multifd", + .enum_table = &ZeroPageDetection_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index d9b46ef0df..d619ebf238 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,6 +22,7 @@ system_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', ++ 'multifd-zero-page.c', + 'options.c', + 'postcopy-ram.c', + 'savevm.c', +diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c +new file mode 100644 +index 0000000000..1ba38be636 +--- /dev/null ++++ b/migration/multifd-zero-page.c +@@ -0,0 +1,87 @@ ++/* ++ * Multifd zero page detection implementation. ++ * ++ * Copyright (c) 2024 Bytedance Inc ++ * ++ * Authors: ++ * Hao Xiang ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/cutils.h" ++#include "exec/ramblock.h" ++#include "migration.h" ++#include "multifd.h" ++#include "options.h" ++#include "ram.h" ++ ++static bool multifd_zero_page_enabled(void) ++{ ++ return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD; ++} ++ ++static void swap_page_offset(ram_addr_t *pages_offset, int a, int b) ++{ ++ ram_addr_t temp; ++ ++ if (a == b) { ++ return; ++ } ++ ++ temp = pages_offset[a]; ++ pages_offset[a] = pages_offset[b]; ++ pages_offset[b] = temp; ++} ++ ++/** ++ * multifd_send_zero_page_detect: Perform zero page detection on all pages. ++ * ++ * Sorts normal pages before zero pages in p->pages->offset and updates ++ * p->pages->normal_num. ++ * ++ * @param p A pointer to the send params. ++ */ ++void multifd_send_zero_page_detect(MultiFDSendParams *p) ++{ ++ MultiFDPages_t *pages = p->pages; ++ RAMBlock *rb = pages->block; ++ int i = 0; ++ int j = pages->num - 1; ++ ++ if (!multifd_zero_page_enabled()) { ++ pages->normal_num = pages->num; ++ return; ++ } ++ ++ /* ++ * Sort the page offset array by moving all normal pages to ++ * the left and all zero pages to the right of the array. ++ */ ++ while (i <= j) { ++ uint64_t offset = pages->offset[i]; ++ ++ if (!buffer_is_zero(rb->host + offset, p->page_size)) { ++ i++; ++ continue; ++ } ++ ++ swap_page_offset(pages->offset, i, j); ++ ram_release_page(rb->idstr, offset); ++ j--; ++ } ++ ++ pages->normal_num = i; ++} ++ ++void multifd_recv_zero_page_process(MultiFDRecvParams *p) ++{ ++ for (int i = 0; i < p->zero_num; i++) { ++ void *page = p->host + p->zero[i]; ++ if (!buffer_is_zero(page, p->page_size)) { ++ memset(page, 0, p->page_size); ++ } ++ } ++} +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 6120faad65..83c0374380 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -123,13 +123,15 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + +- multifd_send_prepare_header(p); ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } + +- for (i = 0; i < pages->num; i++) { ++ for (i = 0; i < pages->normal_num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == pages->num - 1) { ++ if (i == pages->normal_num - 1) { + flush = Z_SYNC_FLUSH; + } + +@@ -172,10 +174,10 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + p->iov[p->iovs_num].iov_len = out_size; + p->iovs_num++; + p->next_packet_size = out_size; +- p->flags |= MULTIFD_FLAG_ZLIB; + ++out: ++ p->flags |= MULTIFD_FLAG_ZLIB; + multifd_send_fill_packet(p); +- + return 0; + } + +@@ -261,6 +263,14 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) + p->id, flags, MULTIFD_FLAG_ZLIB); + return -1; + } ++ ++ multifd_recv_zero_page_process(p); ++ ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); + + if (ret != 0) { +@@ -310,6 +320,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) + p->id, out_size, expected_size); + return -1; + } ++ + return 0; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index cac236833d..02112255ad 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -118,16 +118,18 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + +- multifd_send_prepare_header(p); ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } + + z->out.dst = z->zbuff; + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < pages->num; i++) { ++ for (i = 0; i < pages->normal_num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == pages->num - 1) { ++ if (i == pages->normal_num - 1) { + flush = ZSTD_e_flush; + } + z->in.src = p->pages->block->host + pages->offset[i]; +@@ -161,10 +163,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + p->iov[p->iovs_num].iov_len = z->out.pos; + p->iovs_num++; + p->next_packet_size = z->out.pos; +- p->flags |= MULTIFD_FLAG_ZSTD; + ++out: ++ p->flags |= MULTIFD_FLAG_ZSTD; + multifd_send_fill_packet(p); +- + return 0; + } + +@@ -257,6 +259,14 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) + p->id, flags, MULTIFD_FLAG_ZSTD); + return -1; + } ++ ++ multifd_recv_zero_page_process(p); ++ ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); + + if (ret != 0) { +diff --git a/migration/multifd.c b/migration/multifd.c +index cac5f2743c..6c01179858 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -11,6 +11,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/cutils.h" + #include "qemu/rcu.h" + #include "exec/target_page.h" + #include "sysemu/sysemu.h" +@@ -132,13 +133,13 @@ static void multifd_send_prepare_iovs(MultiFDSendParams *p) + { + MultiFDPages_t *pages = p->pages; + +- for (int i = 0; i < pages->num; i++) { ++ for (int i = 0; i < pages->normal_num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; + p->iov[p->iovs_num].iov_len = p->page_size; + p->iovs_num++; + } + +- p->next_packet_size = pages->num * p->page_size; ++ p->next_packet_size = pages->normal_num * p->page_size; + } + + /** +@@ -157,6 +158,8 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + bool use_zero_copy_send = migrate_zero_copy_send(); + int ret; + ++ multifd_send_zero_page_detect(p); ++ + if (!multifd_use_packets()) { + multifd_send_prepare_iovs(p); + return 0; +@@ -238,6 +241,13 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } ++ ++ multifd_recv_zero_page_process(p); ++ ++ if (!p->normal_num) { ++ return 0; ++ } ++ + for (int i = 0; i < p->normal_num; i++) { + p->iov[i].iov_base = p->host + p->normal[i]; + p->iov[i].iov_len = p->page_size; +@@ -272,6 +282,7 @@ static void multifd_pages_reset(MultiFDPages_t *pages) + * overwritten later when reused. + */ + pages->num = 0; ++ pages->normal_num = 0; + pages->block = NULL; + } + +@@ -363,11 +374,13 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + MultiFDPacket_t *packet = p->packet; + MultiFDPages_t *pages = p->pages; + uint64_t packet_num; ++ uint32_t zero_num = pages->num - pages->normal_num; + int i; + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->normal_pages = cpu_to_be32(pages->num); ++ packet->normal_pages = cpu_to_be32(pages->normal_num); ++ packet->zero_pages = cpu_to_be32(zero_num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + + packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num); +@@ -385,10 +398,11 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + } + + p->packets_sent++; +- p->total_normal_pages += pages->num; ++ p->total_normal_pages += pages->normal_num; ++ p->total_zero_pages += zero_num; + +- trace_multifd_send(p->id, packet_num, pages->num, p->flags, +- p->next_packet_size); ++ trace_multifd_send(p->id, packet_num, pages->normal_num, zero_num, ++ p->flags, p->next_packet_size); + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -429,20 +443,29 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->normal_num = be32_to_cpu(packet->normal_pages); + if (p->normal_num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " +- "with %u pages and expected maximum pages are %u", ++ "with %u normal pages and expected maximum pages are %u", + p->normal_num, packet->pages_alloc) ; + return -1; + } + ++ p->zero_num = be32_to_cpu(packet->zero_pages); ++ if (p->zero_num > packet->pages_alloc - p->normal_num) { ++ error_setg(errp, "multifd: received packet " ++ "with %u zero pages and expected maximum zero pages are %u", ++ p->zero_num, packet->pages_alloc - p->normal_num) ; ++ return -1; ++ } ++ + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); + p->packets_recved++; + p->total_normal_pages += p->normal_num; ++ p->total_zero_pages += p->zero_num; + +- trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, +- p->next_packet_size); ++ trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num, ++ p->flags, p->next_packet_size); + +- if (p->normal_num == 0) { ++ if (p->normal_num == 0 && p->zero_num == 0) { + return 0; + } + +@@ -468,6 +491,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->normal[i] = offset; + } + ++ for (i = 0; i < p->zero_num; i++) { ++ uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); ++ ++ if (offset > (p->block->used_length - p->page_size)) { ++ error_setg(errp, "multifd: offset too long %" PRIu64 ++ " (max " RAM_ADDR_FMT ")", ++ offset, p->block->used_length); ++ return -1; ++ } ++ p->zero[i] = offset; ++ } ++ + return 0; + } + +@@ -866,6 +901,8 @@ static void *multifd_send_thread(void *opaque) + + stat64_add(&mig_stats.multifd_bytes, + (uint64_t)p->next_packet_size + p->packet_len); ++ stat64_add(&mig_stats.normal_pages, pages->normal_num); ++ stat64_add(&mig_stats.zero_pages, pages->num - pages->normal_num); + + multifd_pages_reset(p->pages); + p->next_packet_size = 0; +@@ -913,7 +950,8 @@ out: + + rcu_unregister_thread(); + migration_threads_remove(thread); +- trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); ++ trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages, ++ p->total_zero_pages); + + return NULL; + } +@@ -1189,6 +1227,8 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) + p->iov = NULL; + g_free(p->normal); + p->normal = NULL; ++ g_free(p->zero); ++ p->zero = NULL; + multifd_recv_state->ops->recv_cleanup(p); + } + +@@ -1294,7 +1334,7 @@ static void *multifd_recv_thread(void *opaque) + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; +- has_data = !!p->normal_num; ++ has_data = p->normal_num || p->zero_num; + qemu_mutex_unlock(&p->mutex); + } + +@@ -1319,7 +1359,9 @@ static void *multifd_recv_thread(void *opaque) + } + + rcu_unregister_thread(); +- trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); ++ trace_multifd_recv_thread_end(p->id, p->packets_recved, ++ p->total_normal_pages, ++ p->total_zero_pages); + + return NULL; + } +@@ -1362,6 +1404,7 @@ int multifd_recv_setup(Error **errp) + p->name = g_strdup_printf("multifdrecv_%d", i); + p->iov = g_new0(struct iovec, page_count); + p->normal = g_new0(ram_addr_t, page_count); ++ p->zero = g_new0(ram_addr_t, page_count); + p->page_count = page_count; + p->page_size = qemu_target_page_size(); + } +@@ -1437,3 +1480,17 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); + } ++ ++bool multifd_send_prepare_common(MultiFDSendParams *p) ++{ ++ multifd_send_zero_page_detect(p); ++ ++ if (!p->pages->normal_num) { ++ p->next_packet_size = 0; ++ return false; ++ } ++ ++ multifd_send_prepare_header(p); ++ ++ return true; ++} +diff --git a/migration/multifd.h b/migration/multifd.h +index 6a54377cc1..d99603c6a4 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -48,14 +48,24 @@ typedef struct { + /* size of the next packet that contains pages */ + uint32_t next_packet_size; + uint64_t packet_num; +- uint64_t unused[4]; /* Reserved for future use */ ++ /* zero pages */ ++ uint32_t zero_pages; ++ uint32_t unused32[1]; /* Reserved for future use */ ++ uint64_t unused64[3]; /* Reserved for future use */ + char ramblock[256]; ++ /* ++ * This array contains the pointers to: ++ * - normal pages (initial normal_pages entries) ++ * - zero pages (following zero_pages entries) ++ */ + uint64_t offset[]; + } __attribute__((packed)) MultiFDPacket_t; + + typedef struct { + /* number of used pages */ + uint32_t num; ++ /* number of normal pages */ ++ uint32_t normal_num; + /* number of allocated pages */ + uint32_t allocated; + /* offset of each page */ +@@ -122,6 +132,8 @@ typedef struct { + uint64_t packets_sent; + /* non zero pages sent through this channel */ + uint64_t total_normal_pages; ++ /* zero pages sent through this channel */ ++ uint64_t total_zero_pages; + /* buffers to send */ + struct iovec *iov; + /* number of iovs used */ +@@ -176,12 +188,18 @@ typedef struct { + uint8_t *host; + /* non zero pages recv through this channel */ + uint64_t total_normal_pages; ++ /* zero pages recv through this channel */ ++ uint64_t total_zero_pages; + /* buffers to recv */ + struct iovec *iov; + /* Pages that are not zero */ + ram_addr_t *normal; + /* num of non zero pages */ + uint32_t normal_num; ++ /* Pages that are zero */ ++ ram_addr_t *zero; ++ /* num of zero pages */ ++ uint32_t zero_num; + /* used for de-compression methods */ + void *compress_data; + } MultiFDRecvParams; +@@ -203,6 +221,9 @@ typedef struct { + + void multifd_register_ops(int method, MultiFDMethods *ops); + void multifd_send_fill_packet(MultiFDSendParams *p); ++bool multifd_send_prepare_common(MultiFDSendParams *p); ++void multifd_send_zero_page_detect(MultiFDSendParams *p); ++void multifd_recv_zero_page_process(MultiFDRecvParams *p); + + static inline void multifd_send_prepare_header(MultiFDSendParams *p) + { +diff --git a/migration/ram.c b/migration/ram.c +index 7d0f1120df..bae5853996 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1396,7 +1396,6 @@ static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) + if (!multifd_queue_page(block, offset)) { + return -1; + } +- stat64_add(&mig_stats.normal_pages, 1); + + return 1; + } +diff --git a/migration/trace-events b/migration/trace-events +index bf1a069632..f0e1cb80c7 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -128,21 +128,21 @@ postcopy_preempt_reset_channel(void) "" + # multifd.c + multifd_new_send_channel_async(uint8_t id) "channel %u" + multifd_new_send_channel_async_error(uint8_t id, void *err) "channel=%u err=%p" +-multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_recv(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t zero, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" + multifd_recv_new_channel(uint8_t id) "channel %u" + multifd_recv_sync_main(long packet_num) "packet num %ld" + multifd_recv_sync_main_signal(uint8_t id) "channel %u" + multifd_recv_sync_main_wait(uint8_t id) "iter %u" + multifd_recv_terminate_threads(bool error) "error %d" +-multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 + multifd_recv_thread_start(uint8_t id) "%u" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal_pages, uint32_t zero_pages, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" + multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" + multifd_send_sync_main_signal(uint8_t id) "channel %u" + multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(void) "" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" +diff --git a/qapi/migration.json b/qapi/migration.json +index ff247a50ce..fc3178b1dc 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -660,10 +660,15 @@ + # + # @legacy: Perform zero page checking in main migration thread. + # ++# @multifd: Perform zero page checking in multifd sender thread if ++# multifd migration is enabled, else in the main migration ++# thread as for @legacy. ++# + # Since: 9.0 ++# + ## + { 'enum': 'ZeroPageDetection', +- 'data': [ 'none', 'legacy' ] } ++ 'data': [ 'none', 'legacy', 'multifd' ] } + + ## + # @BitmapMigrationBitmapAliasTransform: +-- +2.33.0 + diff --git a/migration-multifd-Join-the-TLS-thread.patch b/migration-multifd-Join-the-TLS-thread.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7c64d83b782ad6da30f37915800d0df584186a4 --- /dev/null +++ b/migration-multifd-Join-the-TLS-thread.patch @@ -0,0 +1,64 @@ +From 234d32c5cef7114f2554f18c8ad73fb294fb4542 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:13 -0300 +Subject: [49/99] migration/multifd: Join the TLS thread + +commit e1921f10d9afe651f4887284e85f6789b37e67d3 upstream. + +We're currently leaking the resources of the TLS thread by not joining +it and also overwriting the p->thread pointer altogether. + +Fixes: a1af605bd5 ("migration/multifd: fix hangup with TLS-Multifd due to blocking handshake") +Cc: qemu-stable +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-2-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 8 +++++++- + migration/multifd.h | 2 ++ + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index c0d8f438bc..459e7889e8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -631,6 +631,10 @@ static void multifd_send_terminate_threads(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ if (p->tls_thread_created) { ++ qemu_thread_join(&p->tls_thread); ++ } ++ + if (p->running) { + qemu_thread_join(&p->thread); + } +@@ -925,7 +929,9 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); + p->c = QIO_CHANNEL(tioc); +- qemu_thread_create(&p->thread, "multifd-tls-handshake-worker", ++ ++ p->tls_thread_created = true; ++ qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker", + multifd_tls_handshake_thread, p, + QEMU_THREAD_JOINABLE); + return true; +diff --git a/migration/multifd.h b/migration/multifd.h +index 78a2317263..720c9d50db 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -73,6 +73,8 @@ typedef struct { + char *name; + /* channel thread id */ + QemuThread thread; ++ QemuThread tls_thread; ++ bool tls_thread_created; + /* communication channel */ + QIOChannel *c; + /* is the yank function registered */ +-- +2.33.0 + diff --git a/migration-multifd-Make-multifd_channel_connect-retur.patch b/migration-multifd-Make-multifd_channel_connect-retur.patch new file mode 100644 index 0000000000000000000000000000000000000000..66daf90dea43ddce46e49afb4db7c887279d57ea --- /dev/null +++ b/migration-multifd-Make-multifd_channel_connect-retur.patch @@ -0,0 +1,54 @@ +From 797304d0151652a684f0df388036c2032dcc3979 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:52:59 +0800 +Subject: [59/99] migration/multifd: Make multifd_channel_connect() return void + +commit 770de49c00fa9eb262473f282c92979b47b7fd22 upstream. + +It never fails, drop the retval and also the Error**. + +Suggested-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-4-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3e85bc544a..a7289289a4 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -947,9 +947,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + return true; + } + +-static bool multifd_channel_connect(MultiFDSendParams *p, +- QIOChannel *ioc, +- Error **errp) ++static void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc) + { + qio_channel_set_delay(ioc, false); + +@@ -960,7 +958,6 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + p->thread_created = true; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); +- return true; + } + + /* +@@ -992,7 +989,8 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + return; + } + } else { +- ret = multifd_channel_connect(p, ioc, &local_err); ++ multifd_channel_connect(p, ioc); ++ ret = true; + } + + out: +-- +2.33.0 + diff --git a/migration-multifd-Move-header-prepare-fill-into-send.patch b/migration-multifd-Move-header-prepare-fill-into-send.patch new file mode 100644 index 0000000000000000000000000000000000000000..cbf8c46157875e6fb3269253f680b026e78b3ac8 --- /dev/null +++ b/migration-multifd-Move-header-prepare-fill-into-send.patch @@ -0,0 +1,227 @@ +From 1dfecda79660d2b68cd56a7e44ef76ac847f54d1 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:47 +0800 +Subject: [37/99] migration/multifd: Move header prepare/fill into + send_prepare() + +commit 25a1f8787597f6906b151b2f73ae6cc92a31de57 upstream. + +This patch redefines the interfacing of ->send_prepare(). It further +simplifies multifd_send_thread() especially on zero copy. + +Now with the new interface, we require the hook to do all the work for +preparing the IOVs to send. After it's completed, the IOVs should be ready +to be dumped into the specific multifd QIOChannel later. + +So now the API looks like: + + p->pages -----------> send_prepare() -------------> IOVs + +This also prepares for the case where the input can be extended to even not +any p->pages. But that's for later. + +This patch will achieve similar goal of what Fabiano used to propose here: + +https://lore.kernel.org/r/20240126221943.26628-1-farosas@suse.de + +However the send() interface may not be necessary. I'm boldly attaching a +"Co-developed-by" for Fabiano. + +Co-developed-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-14-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 4 +++ + migration/multifd-zstd.c | 4 +++ + migration/multifd.c | 61 ++++++++++++++++++---------------------- + migration/multifd.h | 1 + + 4 files changed, 37 insertions(+), 33 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 100809abc1..012e3bdea1 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -123,6 +123,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + ++ multifd_send_prepare_header(p); ++ + for (i = 0; i < pages->num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; +@@ -172,6 +174,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + p->next_packet_size = out_size; + p->flags |= MULTIFD_FLAG_ZLIB; + ++ multifd_send_fill_packet(p); ++ + return 0; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 2023edd8cc..dc8fe43e94 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -118,6 +118,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + ++ multifd_send_prepare_header(p); ++ + z->out.dst = z->zbuff; + z->out.size = z->zbuff_len; + z->out.pos = 0; +@@ -161,6 +163,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + p->next_packet_size = z->out.pos; + p->flags |= MULTIFD_FLAG_ZSTD; + ++ multifd_send_fill_packet(p); ++ + return 0; + } + +diff --git a/migration/multifd.c b/migration/multifd.c +index a42e152268..d4528cf9d1 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -51,15 +51,15 @@ typedef struct { + /** + * nocomp_send_setup: setup send side + * +- * For no compression this function does nothing. +- * +- * Returns 0 for success or -1 for error +- * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ + static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + { ++ if (migrate_zero_copy_send()) { ++ p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; ++ } ++ + return 0; + } + +@@ -89,7 +89,17 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { ++ bool use_zero_copy_send = migrate_zero_copy_send(); + MultiFDPages_t *pages = p->pages; ++ int ret; ++ ++ if (!use_zero_copy_send) { ++ /* ++ * Only !zerocopy needs the header in IOV; zerocopy will ++ * send it separately. ++ */ ++ multifd_send_prepare_header(p); ++ } + + for (int i = 0; i < pages->num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; +@@ -99,6 +109,18 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + + p->next_packet_size = pages->num * p->page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; ++ ++ multifd_send_fill_packet(p); ++ ++ if (use_zero_copy_send) { ++ /* Send header first, without zerocopy */ ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, errp); ++ if (ret != 0) { ++ return -1; ++ } ++ } ++ + return 0; + } + +@@ -267,7 +289,7 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + g_free(pages); + } + +-static void multifd_send_fill_packet(MultiFDSendParams *p) ++void multifd_send_fill_packet(MultiFDSendParams *p) + { + MultiFDPacket_t *packet = p->packet; + MultiFDPages_t *pages = p->pages; +@@ -689,7 +711,6 @@ static void *multifd_send_thread(void *opaque) + MigrationThread *thread = NULL; + Error *local_err = NULL; + int ret = 0; +- bool use_zero_copy_send = migrate_zero_copy_send(); + + thread = migration_threads_add(p->name, qemu_get_thread_id()); + +@@ -717,15 +738,6 @@ static void *multifd_send_thread(void *opaque) + MultiFDPages_t *pages = p->pages; + + p->iovs_num = 0; +- +- if (!use_zero_copy_send) { +- /* +- * Only !zerocopy needs the header in IOV; zerocopy will +- * send it separately. +- */ +- multifd_send_prepare_header(p); +- } +- + assert(pages->num); + + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -734,17 +746,6 @@ static void *multifd_send_thread(void *opaque) + break; + } + +- multifd_send_fill_packet(p); +- +- if (use_zero_copy_send) { +- /* Send header first, without zerocopy */ +- ret = qio_channel_write_all(p->c, (void *)p->packet, +- p->packet_len, &local_err); +- if (ret != 0) { +- break; +- } +- } +- + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, + 0, p->write_flags, &local_err); + if (ret != 0) { +@@ -949,13 +950,7 @@ int multifd_save_setup(Error **errp) + p->iov = g_new0(struct iovec, page_count + 1); + p->page_size = qemu_target_page_size(); + p->page_count = page_count; +- +- if (migrate_zero_copy_send()) { +- p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; +- } else { +- p->write_flags = 0; +- } +- ++ p->write_flags = 0; + multifd_new_send_channel_create(p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index 4ec005f53f..34a2ecb9f4 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -208,6 +208,7 @@ typedef struct { + } MultiFDMethods; + + void multifd_register_ops(int method, MultiFDMethods *ops); ++void multifd_send_fill_packet(MultiFDSendParams *p); + + static inline void multifd_send_prepare_header(MultiFDSendParams *p) + { +-- +2.33.0 + diff --git a/migration-multifd-Move-multifd_send_setup-error-hand.patch b/migration-multifd-Move-multifd_send_setup-error-hand.patch new file mode 100644 index 0000000000000000000000000000000000000000..b72cad1249aaa92062cbc6c3e2279066d61c8651 --- /dev/null +++ b/migration-multifd-Move-multifd_send_setup-error-hand.patch @@ -0,0 +1,106 @@ +From d9e7bf53856956e6417a2dd0b5636fb61fb1c365 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:15 -0300 +Subject: [51/99] migration/multifd: Move multifd_send_setup error handling in + to the function + +commit bd8b0a8f82d8fc17aa285ab963ba75675c2fbe7a upstream. + +Hide the error handling inside multifd_send_setup to make it cleaner +for the next patch to move the function around. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-4-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 6 +----- + migration/multifd.c | 24 +++++++++++++++++------- + migration/multifd.h | 2 +- + 3 files changed, 19 insertions(+), 13 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 0e8255180d..66417b40a2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3643,11 +3643,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + return; + } + +- if (multifd_send_setup(&local_err) != 0) { +- migrate_set_error(s, local_err); +- error_report_err(local_err); +- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +- MIGRATION_STATUS_FAILED); ++ if (!multifd_send_setup()) { + migrate_fd_cleanup(s); + return; + } +diff --git a/migration/multifd.c b/migration/multifd.c +index 59dcb6c9a2..1299248fea 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -989,14 +989,16 @@ static void multifd_new_send_channel_create(gpointer opaque) + socket_send_channel_create(multifd_new_send_channel_async, opaque); + } + +-int multifd_send_setup(Error **errp) ++bool multifd_send_setup(void) + { +- int thread_count; ++ MigrationState *s = migrate_get_current(); ++ Error *local_err = NULL; ++ int thread_count, ret = 0; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + + if (!migrate_multifd()) { +- return 0; ++ return true; + } + + thread_count = migrate_multifd_channels(); +@@ -1030,14 +1032,22 @@ int multifd_send_setup(Error **errp) + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +- int ret; + +- ret = multifd_send_state->ops->send_setup(p, errp); ++ ret = multifd_send_state->ops->send_setup(p, &local_err); + if (ret) { +- return ret; ++ break; + } + } +- return 0; ++ ++ if (ret) { ++ migrate_set_error(s, local_err); ++ error_report_err(local_err); ++ migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, ++ MIGRATION_STATUS_FAILED); ++ return false; ++ } ++ ++ return true; + } + + struct { +diff --git a/migration/multifd.h b/migration/multifd.h +index 7881980ee6..8a1cad0996 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -13,7 +13,7 @@ + #ifndef QEMU_MIGRATION_MULTIFD_H + #define QEMU_MIGRATION_MULTIFD_H + +-int multifd_send_setup(Error **errp); ++bool multifd_send_setup(void); + void multifd_send_shutdown(void); + int multifd_recv_setup(Error **errp); + void multifd_recv_cleanup(void); +-- +2.33.0 + diff --git a/migration-multifd-Move-multifd_send_setup-into-migra.patch b/migration-multifd-Move-multifd_send_setup-into-migra.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a2042e31c14a3c31ee01d5cf39d65f532734a08 --- /dev/null +++ b/migration-multifd-Move-multifd_send_setup-into-migra.patch @@ -0,0 +1,90 @@ +From 4ab5ed68480ec55bff220496342000187b76c451 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:16 -0300 +Subject: [52/99] migration/multifd: Move multifd_send_setup into migration + thread + +commit dd904bc13f2af0c605c3fe72f118ea4e27a6610c upstream. + +We currently have an unfavorable situation around multifd channels +creation and the migration thread execution. + +We create the multifd channels with qio_channel_socket_connect_async +-> qio_task_run_in_thread, but only connect them at the +multifd_new_send_channel_async callback, called from +qio_task_complete, which is registered as a glib event. + +So at multifd_send_setup() we create the channels, but they will only +be actually usable after the whole multifd_send_setup() calling stack +returns back to the main loop. Which means that the migration thread +is already up and running without any possibility for the multifd +channels to be ready on time. + +We currently rely on the channels-ready semaphore blocking +multifd_send_sync_main() until channels start to come up and release +it. However there have been bugs recently found when a channel's +creation fails and multifd_send_cleanup() is allowed to run while +other channels are still being created. + +Let's start to organize this situation by moving the +multifd_send_setup() call into the migration thread. That way we +unblock the main-loop to dispatch the completion callbacks and +actually have a chance of getting the multifd channels ready for when +the migration thread needs them. + +The next patches will deal with the synchronization aspects. + +Note that this takes multifd_send_setup() out of the BQL. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-5-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + migration/migration.c +[jz: upstream renamed qemu_mutex_lock_iothread() to bql_lock(), while + openEuler not yet. Resolve context conflict due to this] +Signed-off-by: Jason Zeng +--- + migration/migration.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 66417b40a2..59c0bbee67 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3319,6 +3319,10 @@ static void *migration_thread(void *opaque) + object_ref(OBJECT(s)); + update_iteration_initial_status(s); + ++ if (!multifd_send_setup()) { ++ goto out; ++ } ++ + qemu_mutex_lock_iothread(); + qemu_savevm_state_header(s->to_dst_file); + qemu_mutex_unlock_iothread(); +@@ -3390,6 +3394,7 @@ static void *migration_thread(void *opaque) + urgent = migration_rate_limit(); + } + ++out: + trace_migration_thread_after_loop(); + migration_iteration_finish(s); + object_unref(OBJECT(s)); +@@ -3643,11 +3648,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + return; + } + +- if (!multifd_send_setup()) { +- migrate_fd_cleanup(s); +- return; +- } +- + if (migrate_background_snapshot()) { + qemu_thread_create(&s->thread, "bg_snapshot", + bg_migration_thread, s, QEMU_THREAD_JOINABLE); +-- +2.33.0 + diff --git a/migration-multifd-Move-total_normal_pages-accounting.patch b/migration-multifd-Move-total_normal_pages-accounting.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a9f2935d7905427cff1cc27ac03d837b73cccac --- /dev/null +++ b/migration-multifd-Move-total_normal_pages-accounting.patch @@ -0,0 +1,57 @@ +From 2316c555d9893f3e637260367477edcf40592679 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:44 +0800 +Subject: [34/99] migration/multifd: Move total_normal_pages accounting + +commit db7e1cc5103137743394a939045a17fa2b30a0dc upstream. + +Just like the previous patch, move the accounting for total_normal_pages on +both src/dst sides into the packet fill/unfill procedures. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-11-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f79badb546..510bfdcac8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -291,6 +291,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + } + + p->packets_sent++; ++ p->total_normal_pages += pages->num; + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -339,6 +340,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); + p->packets_recved++; ++ p->total_normal_pages += p->normal_num; + + if (p->normal_num == 0) { + return 0; +@@ -724,7 +726,6 @@ static void *multifd_send_thread(void *opaque) + } + + multifd_send_fill_packet(p); +- p->total_normal_pages += pages->num; + trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + +@@ -1128,7 +1129,6 @@ static void *multifd_recv_thread(void *opaque) + p->flags &= ~MULTIFD_FLAG_SYNC; + trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, + p->next_packet_size); +- p->total_normal_pages += p->normal_num; + qemu_mutex_unlock(&p->mutex); + + if (p->normal_num) { +-- +2.33.0 + diff --git a/migration-multifd-Move-trace_multifd_send-recv.patch b/migration-multifd-Move-trace_multifd_send-recv.patch new file mode 100644 index 0000000000000000000000000000000000000000..267acb304052653afcebd2f9f890b9a5e0043b8a --- /dev/null +++ b/migration-multifd-Move-trace_multifd_send-recv.patch @@ -0,0 +1,71 @@ +From 8a1deb6f19abbd8824a9b3e04abc77f5f72f37f6 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:45 +0800 +Subject: [35/99] migration/multifd: Move trace_multifd_send|recv() + +commit 8a9ef1738037e1d1132f9e1bd3e2f1102bde719f upstream. + +Move them into fill/unfill of packets. With that, we can further cleanup +the send/recv thread procedure, and remove one more temp var. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-12-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 510bfdcac8..f545faaa52 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -292,6 +292,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + p->packets_sent++; + p->total_normal_pages += pages->num; ++ ++ trace_multifd_send(p->id, p->packet_num, pages->num, p->flags, ++ p->next_packet_size); + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -342,6 +345,9 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->packets_recved++; + p->total_normal_pages += p->normal_num; + ++ trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, ++ p->next_packet_size); ++ + if (p->normal_num == 0) { + return 0; + } +@@ -708,7 +714,6 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (qatomic_read(&p->pending_job)) { +- uint64_t packet_num = p->packet_num; + MultiFDPages_t *pages = p->pages; + + if (use_zero_copy_send) { +@@ -726,8 +731,6 @@ static void *multifd_send_thread(void *opaque) + } + + multifd_send_fill_packet(p); +- trace_multifd_send(p->id, packet_num, pages->num, p->flags, +- p->next_packet_size); + + if (use_zero_copy_send) { + /* Send header first, without zerocopy */ +@@ -1127,8 +1130,6 @@ static void *multifd_recv_thread(void *opaque) + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; +- trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, +- p->next_packet_size); + qemu_mutex_unlock(&p->mutex); + + if (p->normal_num) { +-- +2.33.0 + diff --git a/migration-multifd-Optimize-sender-side-to-be-lockles.patch b/migration-multifd-Optimize-sender-side-to-be-lockles.patch new file mode 100644 index 0000000000000000000000000000000000000000..b74b8bde40830bc2bbe81732545c02fe72782ba9 --- /dev/null +++ b/migration-multifd-Optimize-sender-side-to-be-lockles.patch @@ -0,0 +1,204 @@ +From 2beae052ba502782de62ca4ccf7a1cdb6e830150 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:57 +0800 +Subject: [47/99] migration/multifd: Optimize sender side to be lockless + +commit 488c84acb465c21b716c3fd14de27ab5ce388c85 upstream. + +When reviewing my attempt to refactor send_prepare(), Fabiano suggested we +try out with dropping the mutex in multifd code [1]. + +I thought about that before but I never tried to change the code. Now +maybe it's time to give it a stab. This only optimizes the sender side. + +The trick here is multifd has a clear provider/consumer model, that the +migration main thread publishes requests (either pending_job/pending_sync), +while the multifd sender threads are consumers. Here we don't have a lot +of complicated data sharing, and the jobs can logically be submitted +lockless. + +Arm the code with atomic weapons. Two things worth mentioning: + + - For multifd_send_pages(): we can use qatomic_load_acquire() when trying + to find a free channel, but that's expensive if we attach one ACQUIRE per + channel. Instead, keep the qatomic_read() on reading the pending_job + flag as we do already, meanwhile use one smp_mb_acquire() after the loop + to guarantee the memory ordering. + + - For pending_sync: it doesn't have any extra data required since now + p->flags are never touched, it should be safe to not use memory barrier. + That's different from pending_job. + +Provide rich comments for all the lockless operations to state how they are +paired. With that, we can remove the mutex. + +[1] https://lore.kernel.org/r/87o7d1jlu5.fsf@suse.de + +Suggested-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-24-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 51 +++++++++++++++++++++++---------------------- + migration/multifd.h | 2 -- + 2 files changed, 26 insertions(+), 27 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index c52c18046a..c0d8f438bc 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -502,19 +502,19 @@ static bool multifd_send_pages(void) + } + } + +- qemu_mutex_lock(&p->mutex); +- assert(!p->pages->num); +- assert(!p->pages->block); + /* +- * Double check on pending_job==false with the lock. In the future if +- * we can have >1 requester thread, we can replace this with a "goto +- * retry", but that is for later. ++ * Make sure we read p->pending_job before all the rest. Pairs with ++ * qatomic_store_release() in multifd_send_thread(). + */ +- assert(qatomic_read(&p->pending_job) == false); +- qatomic_set(&p->pending_job, true); ++ smp_mb_acquire(); ++ assert(!p->pages->num); + multifd_send_state->pages = p->pages; + p->pages = pages; +- qemu_mutex_unlock(&p->mutex); ++ /* ++ * Making sure p->pages is setup before marking pending_job=true. Pairs ++ * with the qatomic_load_acquire() in multifd_send_thread(). ++ */ ++ qatomic_store_release(&p->pending_job, true); + qemu_sem_post(&p->sem); + + return true; +@@ -649,7 +649,6 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + } + multifd_send_channel_destroy(p->c); + p->c = NULL; +- qemu_mutex_destroy(&p->mutex); + qemu_sem_destroy(&p->sem); + qemu_sem_destroy(&p->sem_sync); + g_free(p->name); +@@ -743,14 +742,12 @@ int multifd_send_sync_main(void) + + trace_multifd_send_sync_main_signal(p->id); + +- qemu_mutex_lock(&p->mutex); + /* + * We should be the only user so far, so not possible to be set by + * others concurrently. + */ + assert(qatomic_read(&p->pending_sync) == false); + qatomic_set(&p->pending_sync, true); +- qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -800,9 +797,12 @@ static void *multifd_send_thread(void *opaque) + if (multifd_send_should_exit()) { + break; + } +- qemu_mutex_lock(&p->mutex); + +- if (qatomic_read(&p->pending_job)) { ++ /* ++ * Read pending_job flag before p->pages. Pairs with the ++ * qatomic_store_release() in multifd_send_pages(). ++ */ ++ if (qatomic_load_acquire(&p->pending_job)) { + MultiFDPages_t *pages = p->pages; + + p->iovs_num = 0; +@@ -810,14 +810,12 @@ static void *multifd_send_thread(void *opaque) + + ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); + break; + } + + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, + 0, p->write_flags, &local_err); + if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); + break; + } + +@@ -826,24 +824,31 @@ static void *multifd_send_thread(void *opaque) + + multifd_pages_reset(p->pages); + p->next_packet_size = 0; +- qatomic_set(&p->pending_job, false); +- qemu_mutex_unlock(&p->mutex); ++ ++ /* ++ * Making sure p->pages is published before saying "we're ++ * free". Pairs with the smp_mb_acquire() in ++ * multifd_send_pages(). ++ */ ++ qatomic_store_release(&p->pending_job, false); + } else { +- /* If not a normal job, must be a sync request */ ++ /* ++ * If not a normal job, must be a sync request. Note that ++ * pending_sync is a standalone flag (unlike pending_job), so ++ * it doesn't require explicit memory barriers. ++ */ + assert(qatomic_read(&p->pending_sync)); + p->flags = MULTIFD_FLAG_SYNC; + multifd_send_fill_packet(p); + ret = qio_channel_write_all(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); + break; + } + /* p->next_packet_size will always be zero for a SYNC packet */ + stat64_add(&mig_stats.multifd_bytes, p->packet_len); + p->flags = 0; + qatomic_set(&p->pending_sync, false); +- qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem_sync); + } + } +@@ -857,10 +862,7 @@ out: + error_free(local_err); + } + +- qemu_mutex_lock(&p->mutex); + p->running = false; +- qemu_mutex_unlock(&p->mutex); +- + rcu_unregister_thread(); + migration_threads_remove(thread); + trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); +@@ -1002,7 +1004,6 @@ int multifd_send_setup(Error **errp) + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +- qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); + p->id = i; +diff --git a/migration/multifd.h b/migration/multifd.h +index 98876ff94a..78a2317263 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -91,8 +91,6 @@ typedef struct { + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + +- /* this mutex protects the following parameters */ +- QemuMutex mutex; + /* is this channel thread running */ + bool running; + /* multifd flags for each packet */ +-- +2.33.0 + diff --git a/migration-multifd-Postpone-reset-of-MultiFDPages_t.patch b/migration-multifd-Postpone-reset-of-MultiFDPages_t.patch new file mode 100644 index 0000000000000000000000000000000000000000..692b22eef4eb1f03561b67813a25b46d1a7de5f0 --- /dev/null +++ b/migration-multifd-Postpone-reset-of-MultiFDPages_t.patch @@ -0,0 +1,84 @@ +From 9ce63dcad32efdb9e31db0db495bf4a3e1a96595 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:38 +0800 +Subject: [28/99] migration/multifd: Postpone reset of MultiFDPages_t + +commit 836eca47f62f9f6d5b8e9b6fedfc3539775c4e2e upstream. + +Now we reset MultiFDPages_t object in the multifd sender thread in the +middle of the sending job. That's not necessary, because the "*pages" +struct will not be reused anyway until pending_job is cleared. + +Move that to the end after the job is completed, provide a helper to reset +a "*pages" object. Use that same helper when free the object too. + +This prepares us to keep using p->pages in the follow up patches, where we +may drop p->normal[]. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-5-peterx@redhat.com +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd.c +[jz: openEuler backported 254c67a88ab5 ("migration: fix-possible-int-overflow") + which causes simple context conflict when cherry-pick this commit] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index ea756b6eb8..fff119237a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -173,6 +173,17 @@ void multifd_register_ops(int method, MultiFDMethods *ops) + multifd_ops[method] = ops; + } + ++/* Reset a MultiFDPages_t* object for the next use */ ++static void multifd_pages_reset(MultiFDPages_t *pages) ++{ ++ /* ++ * We don't need to touch offset[] array, because it will be ++ * overwritten later when reused. ++ */ ++ pages->num = 0; ++ pages->block = NULL; ++} ++ + static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) + { + MultiFDInit_t msg = {}; +@@ -249,9 +260,8 @@ static MultiFDPages_t *multifd_pages_init(uint32_t n) + + static void multifd_pages_clear(MultiFDPages_t *pages) + { +- pages->num = 0; ++ multifd_pages_reset(pages); + pages->allocated = 0; +- pages->block = NULL; + g_free(pages->offset); + pages->offset = NULL; + g_free(pages); +@@ -708,8 +718,6 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->total_normal_pages += p->normal_num; +- p->pages->num = 0; +- p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + + trace_multifd_send(p->id, packet_num, p->normal_num, flags, +@@ -736,6 +744,8 @@ static void *multifd_send_thread(void *opaque) + + stat64_add(&mig_stats.multifd_bytes, + (uint64_t)p->next_packet_size + p->packet_len); ++ ++ multifd_pages_reset(p->pages); + p->next_packet_size = 0; + qemu_mutex_lock(&p->mutex); + p->pending_job--; +-- +2.33.0 + diff --git a/migration-multifd-Release-recv-sem_sync-earlier.patch b/migration-multifd-Release-recv-sem_sync-earlier.patch new file mode 100644 index 0000000000000000000000000000000000000000..6744b2251ea487b380575df8caa20ab2ca3a8cc5 --- /dev/null +++ b/migration-multifd-Release-recv-sem_sync-earlier.patch @@ -0,0 +1,52 @@ +From 7a9435d5db4a525b841078b125ba4843339c82fa Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 20 Feb 2024 19:41:09 -0300 +Subject: [56/99] migration/multifd: Release recv sem_sync earlier + +commit d13f0026c7a625a5a34a5dea4095a4d9cfa04652 upstream. + +Now that multifd_recv_terminate_threads() is called only once, release +the recv side sem_sync earlier like we do for the send side. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240220224138.24759-6-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 126c18406f..bbd421004f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1108,6 +1108,12 @@ static void multifd_recv_terminate_threads(Error *err) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + ++ /* ++ * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, ++ * however try to wakeup it without harm in cleanup phase. ++ */ ++ qemu_sem_post(&p->sem_sync); ++ + /* + * We could arrive here for two reasons: + * - normal quit, i.e. everything went fine, just finished +@@ -1166,12 +1172,6 @@ void multifd_recv_cleanup(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- /* +- * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, +- * however try to wakeup it without harm in cleanup phase. +- */ +- qemu_sem_post(&p->sem_sync); +- + if (p->thread_created) { + qemu_thread_join(&p->thread); + } +-- +2.33.0 + diff --git a/migration-multifd-Remove-MultiFDPages_t-packet_num.patch b/migration-multifd-Remove-MultiFDPages_t-packet_num.patch new file mode 100644 index 0000000000000000000000000000000000000000..47f90a96cb0cf2146b64550684a3b10fd393c1d8 --- /dev/null +++ b/migration-multifd-Remove-MultiFDPages_t-packet_num.patch @@ -0,0 +1,48 @@ +From d6e061a269348d6d559be65a816cc0404501d86a Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:38 -0300 +Subject: [07/99] migration/multifd: Remove MultiFDPages_t::packet_num + +commit dca1bc7f24d2fa227f0b787f85f3cc67006e67bf upstream. + +This was introduced by commit 34c55a94b1 ("migration: Create multipage +support") and never used. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-2-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 1 - + migration/multifd.h | 2 -- + 2 files changed, 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f5991bc746..3ea204cac8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -251,7 +251,6 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + { + pages->num = 0; + pages->allocated = 0; +- pages->packet_num = 0; + pages->block = NULL; + g_free(pages->offset); + pages->offset = NULL; +diff --git a/migration/multifd.h b/migration/multifd.h +index a835643b48..b0ff610c37 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -58,8 +58,6 @@ typedef struct { + uint32_t num; + /* number of allocated pages */ + uint32_t allocated; +- /* global number of generated multifd packets */ +- uint64_t packet_num; + /* offset of each page */ + ram_addr_t *offset; + RAMBlock *block; +-- +2.33.0 + diff --git a/migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch b/migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch new file mode 100644 index 0000000000000000000000000000000000000000..02ea46a03ade9dff4af6cbcb9d35870fbdde843e --- /dev/null +++ b/migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch @@ -0,0 +1,159 @@ +From d7823b26d0d983402a16b3568543bac7bb5c7f34 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:39 -0300 +Subject: [08/99] migration/multifd: Remove QEMUFile from where it is not + needed + +commit 9346fa1870784c70618bfd5a9e1f1da89de0c5ec upstream. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-3-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + migration/ram.c +[jz: resolve context conflict due to BQL name, + qemu_mutex_lock_iothread() hasn't renamed to bql_lock() yet] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 12 ++++++------ + migration/multifd.h | 4 ++-- + migration/ram.c | 15 +++++++-------- + 3 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3ea204cac8..3e5aaaa1d4 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -391,7 +391,7 @@ struct { + * false. + */ + +-static int multifd_send_pages(QEMUFile *f) ++static int multifd_send_pages(void) + { + int i; + static int next_channel; +@@ -437,7 +437,7 @@ static int multifd_send_pages(QEMUFile *f) + return 1; + } + +-int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) ++int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + { + MultiFDPages_t *pages = multifd_send_state->pages; + bool changed = false; +@@ -457,12 +457,12 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + changed = true; + } + +- if (multifd_send_pages(f) < 0) { ++ if (multifd_send_pages() < 0) { + return -1; + } + + if (changed) { +- return multifd_queue_page(f, block, offset); ++ return multifd_queue_page(block, offset); + } + + return 1; +@@ -584,7 +584,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) + return ret; + } + +-int multifd_send_sync_main(QEMUFile *f) ++int multifd_send_sync_main(void) + { + int i; + bool flush_zero_copy; +@@ -593,7 +593,7 @@ int multifd_send_sync_main(QEMUFile *f) + return 0; + } + if (multifd_send_state->pages->num) { +- if (multifd_send_pages(f) < 0) { ++ if (multifd_send_pages() < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return -1; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index b0ff610c37..35d11f103c 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -21,8 +21,8 @@ void multifd_load_shutdown(void); + bool multifd_recv_all_channels_created(void); + void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-int multifd_send_sync_main(QEMUFile *f); +-int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); ++int multifd_send_sync_main(void); ++int multifd_queue_page(RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) +diff --git a/migration/ram.c b/migration/ram.c +index f1ff38cf39..67fa9c83d6 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1387,10 +1387,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss) + return pages; + } + +-static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, +- ram_addr_t offset) ++static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) + { +- if (multifd_queue_page(file, block, offset) < 0) { ++ if (multifd_queue_page(block, offset) < 0) { + return -1; + } + stat64_add(&mig_stats.normal_pages, 1); +@@ -1473,7 +1472,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) + if (migrate_multifd() && + !migrate_multifd_flush_after_each_section()) { + QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; +- int ret = multifd_send_sync_main(f); ++ int ret = multifd_send_sync_main(); + if (ret < 0) { + return ret; + } +@@ -2265,7 +2264,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + * still see partially copied pages which is data corruption. + */ + if (migrate_multifd() && !migration_in_postcopy()) { +- return ram_save_multifd_page(pss->pss_channel, block, offset); ++ return ram_save_multifd_page(block, offset); + } + + return ram_save_page(rs, pss); +@@ -3434,7 +3433,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + migration_ops->ram_save_target_page = ram_save_target_page_legacy; + + qemu_mutex_unlock_iothread(); +- ret = multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(); + qemu_mutex_lock_iothread(); + if (ret < 0) { + return ret; +@@ -3558,7 +3557,7 @@ out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { + if (migrate_multifd() && migrate_multifd_flush_after_each_section()) { +- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); ++ ret = multifd_send_sync_main(); + if (ret < 0) { + return ret; + } +@@ -3654,7 +3653,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + } + } + +- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); ++ ret = multifd_send_sync_main(); + if (ret < 0) { + return ret; + } +-- +2.33.0 + diff --git a/migration-multifd-Remove-error_setg-in-migration_ioc.patch b/migration-multifd-Remove-error_setg-in-migration_ioc.patch new file mode 100644 index 0000000000000000000000000000000000000000..5b54c880cfa70458c742c582b3b27c0edf691201 --- /dev/null +++ b/migration-multifd-Remove-error_setg-in-migration_ioc.patch @@ -0,0 +1,39 @@ +From 1698ab2f40ef2bde3e7ee3175a5b5656589ce27d Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:13 +0200 +Subject: [04/99] migration/multifd: Remove error_setg() in + migration_ioc_process_incoming() + +commit 1d3886f837d8e972366a8b58ba8afb0e5efbeed7 upstream. + +If multifd_load_setup() fails in migration_ioc_process_incoming(), +error_setg() is called with errp. This will lead to an assert because in +that case errp already contains an error. + +Fix it by removing the redundant error_setg(). + +Fixes: 6720c2b32725 ("migration: check magic value for deciding the mapping of channels") +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20231231093016.14204-9-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index dce22c2da5..5829565f9c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -848,7 +848,6 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } + + if (multifd_load_setup(errp) != 0) { +- error_setg(errp, "Failed to setup multifd channels"); + return; + } + +-- +2.33.0 + diff --git a/migration-multifd-Remove-p-quit-from-recv-side.patch b/migration-multifd-Remove-p-quit-from-recv-side.patch new file mode 100644 index 0000000000000000000000000000000000000000..67158644b2f763f40eaa7d64f1c0fcdedbaff3b1 --- /dev/null +++ b/migration-multifd-Remove-p-quit-from-recv-side.patch @@ -0,0 +1,129 @@ +From eacc8d435828d31478498fe266487906941be6cb Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 20 Feb 2024 19:41:08 -0300 +Subject: [55/99] migration/multifd: Remove p->quit from recv side + +commit 11dd7be57524d400652cecf8740a016b3d66b53d upstream. + +Like we did on the sending side, replace the p->quit per-channel flag +with a global atomic 'exiting' flag. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240220224138.24759-5-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index bd240649f7..126c18406f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -80,6 +80,19 @@ struct { + MultiFDMethods *ops; + } *multifd_send_state; + ++struct { ++ MultiFDRecvParams *params; ++ /* number of created threads */ ++ int count; ++ /* syncs main thread and channels */ ++ QemuSemaphore sem_sync; ++ /* global number of generated multifd packets */ ++ uint64_t packet_num; ++ int exiting; ++ /* multifd ops */ ++ MultiFDMethods *ops; ++} *multifd_recv_state; ++ + /* Multifd without compression */ + + /** +@@ -441,6 +454,11 @@ static bool multifd_send_should_exit(void) + return qatomic_read(&multifd_send_state->exiting); + } + ++static bool multifd_recv_should_exit(void) ++{ ++ return qatomic_read(&multifd_recv_state->exiting); ++} ++ + /* + * The migration thread can wait on either of the two semaphores. This + * function can be used to kick the main thread out of waiting on either of +@@ -1067,24 +1085,16 @@ bool multifd_send_setup(void) + return true; + } + +-struct { +- MultiFDRecvParams *params; +- /* number of created threads */ +- int count; +- /* syncs main thread and channels */ +- QemuSemaphore sem_sync; +- /* global number of generated multifd packets */ +- uint64_t packet_num; +- /* multifd ops */ +- MultiFDMethods *ops; +-} *multifd_recv_state; +- + static void multifd_recv_terminate_threads(Error *err) + { + int i; + + trace_multifd_recv_terminate_threads(err != NULL); + ++ if (qatomic_xchg(&multifd_recv_state->exiting, 1)) { ++ return; ++ } ++ + if (err) { + MigrationState *s = migrate_get_current(); + migrate_set_error(s, err); +@@ -1098,8 +1108,6 @@ static void multifd_recv_terminate_threads(Error *err) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- qemu_mutex_lock(&p->mutex); +- p->quit = true; + /* + * We could arrive here for two reasons: + * - normal quit, i.e. everything went fine, just finished +@@ -1109,7 +1117,6 @@ static void multifd_recv_terminate_threads(Error *err) + if (p->c) { + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } +- qemu_mutex_unlock(&p->mutex); + } + } + +@@ -1214,7 +1221,7 @@ static void *multifd_recv_thread(void *opaque) + while (true) { + uint32_t flags; + +- if (p->quit) { ++ if (multifd_recv_should_exit()) { + break; + } + +@@ -1278,6 +1285,7 @@ int multifd_recv_setup(Error **errp) + multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); + multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); + qatomic_set(&multifd_recv_state->count, 0); ++ qatomic_set(&multifd_recv_state->exiting, 0); + qemu_sem_init(&multifd_recv_state->sem_sync, 0); + multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()]; + +@@ -1286,7 +1294,6 @@ int multifd_recv_setup(Error **errp) + + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem_sync, 0); +- p->quit = false; + p->id = i; + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(uint64_t) * page_count; +-- +2.33.0 + diff --git a/migration-multifd-Remove-p-running.patch b/migration-multifd-Remove-p-running.patch new file mode 100644 index 0000000000000000000000000000000000000000..c4b45f8e69e6ffde834aa0dffc87c4f25dc1eeea --- /dev/null +++ b/migration-multifd-Remove-p-running.patch @@ -0,0 +1,175 @@ +From 9fb44da2534bcf1802c5f7ce36944b0940821728 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:14 -0300 +Subject: [50/99] migration/multifd: Remove p->running + +commit a2a63c4abd52f4e3ff4046dcb67fe44ebf0bb8de upstream. + +We currently only need p->running to avoid calling qemu_thread_join() +on a non existent thread if the thread has never been created. + +However, there are at least two bugs in this logic: + +1) On the sending side, p->running is set too early and +qemu_thread_create() can be skipped due to an error during TLS +handshake, leaving the flag set and leading to a crash when +multifd_send_cleanup() calls qemu_thread_join(). + +2) During exit, the multifd thread clears the flag while holding the +channel lock. The counterpart at multifd_send_cleanup() reads the flag +outside of the lock and might free the mutex while the multifd thread +still has it locked. + +Fix the first issue by setting the flag right before creating the +thread. Rename it from p->running to p->thread_created to clarify its +usage. + +Fix the second issue by not clearing the flag at the multifd thread +exit. We don't have any use for that. + +Note that these bugs are straight-forward logic issues and not race +conditions. There is still a gap for races to affect this code due to +multifd_send_cleanup() being allowed to run concurrently with the +thread creation loop. This issue is solved in the next patches. + +Cc: qemu-stable +Fixes: 29647140157a ("migration/tls: add support for multifd tls-handshake") +Reported-by: Avihai Horon +Reported-by: chenyuhui5@huawei.com +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-3-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 27 ++++++++++++--------------- + migration/multifd.h | 7 ++----- + 2 files changed, 14 insertions(+), 20 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 459e7889e8..59dcb6c9a2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -635,7 +635,7 @@ static void multifd_send_terminate_threads(void) + qemu_thread_join(&p->tls_thread); + } + +- if (p->running) { ++ if (p->thread_created) { + qemu_thread_join(&p->thread); + } + } +@@ -866,7 +866,6 @@ out: + error_free(local_err); + } + +- p->running = false; + rcu_unregister_thread(); + migration_threads_remove(thread); + trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); +@@ -957,6 +956,8 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + migration_ioc_register_yank(ioc); + p->registered_yank = true; + p->c = ioc; ++ ++ p->thread_created = true; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); + return true; +@@ -971,7 +972,6 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + trace_multifd_new_send_channel_async(p->id); + if (!qio_task_propagate_error(task, &local_err)) { + qio_channel_set_delay(ioc, false); +- p->running = true; + if (multifd_channel_connect(p, ioc, &local_err)) { + return; + } +@@ -1132,15 +1132,15 @@ void multifd_recv_cleanup(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- if (p->running) { +- /* +- * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, +- * however try to wakeup it without harm in cleanup phase. +- */ +- qemu_sem_post(&p->sem_sync); +- } ++ /* ++ * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, ++ * however try to wakeup it without harm in cleanup phase. ++ */ ++ qemu_sem_post(&p->sem_sync); + +- qemu_thread_join(&p->thread); ++ if (p->thread_created) { ++ qemu_thread_join(&p->thread); ++ } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + multifd_recv_cleanup_channel(&multifd_recv_state->params[i]); +@@ -1226,9 +1226,6 @@ static void *multifd_recv_thread(void *opaque) + multifd_recv_terminate_threads(local_err); + error_free(local_err); + } +- qemu_mutex_lock(&p->mutex); +- p->running = false; +- qemu_mutex_unlock(&p->mutex); + + rcu_unregister_thread(); + trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); +@@ -1334,7 +1331,7 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + p->c = ioc; + object_ref(OBJECT(ioc)); + +- p->running = true; ++ p->thread_created = true; + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); +diff --git a/migration/multifd.h b/migration/multifd.h +index 720c9d50db..7881980ee6 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -73,6 +73,7 @@ typedef struct { + char *name; + /* channel thread id */ + QemuThread thread; ++ bool thread_created; + QemuThread tls_thread; + bool tls_thread_created; + /* communication channel */ +@@ -93,8 +94,6 @@ typedef struct { + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + +- /* is this channel thread running */ +- bool running; + /* multifd flags for each packet */ + uint32_t flags; + /* +@@ -143,6 +142,7 @@ typedef struct { + char *name; + /* channel thread id */ + QemuThread thread; ++ bool thread_created; + /* communication channel */ + QIOChannel *c; + /* packet allocated len */ +@@ -157,8 +157,6 @@ typedef struct { + + /* this mutex protects the following parameters */ + QemuMutex mutex; +- /* is this channel thread running */ +- bool running; + /* should this thread finish */ + bool quit; + /* multifd flags for each packet */ +@@ -217,4 +215,3 @@ static inline void multifd_send_prepare_header(MultiFDSendParams *p) + + + #endif +- +-- +2.33.0 + diff --git a/migration-multifd-Remove-unnecessary-usage-of-local-.patch b/migration-multifd-Remove-unnecessary-usage-of-local-.patch new file mode 100644 index 0000000000000000000000000000000000000000..3a9701c38ee1e84d2c01a4e173b1466db78c634c --- /dev/null +++ b/migration-multifd-Remove-unnecessary-usage-of-local-.patch @@ -0,0 +1,61 @@ +From c707a4d1339d572942b79a1b6440cbe487ab2b81 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:16 +0200 +Subject: [06/99] migration/multifd: Remove unnecessary usage of local Error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 3fc58efa938338a82e4d5c0c031e7e9c98e9544f upstream. + +According to Error API, usage of ERRP_GUARD() or a local Error instead +of errp is needed if errp is passed to void functions, where it is later +dereferenced to see if an error occurred. + +There are several places in multifd.c that use local Error although it +is not needed. Change these places to use errp directly. + +Signed-off-by: Avihai Horon +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/20231231093016.14204-12-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8221ebe4b6..f5991bc746 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -955,12 +955,10 @@ int multifd_save_setup(Error **errp) + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +- Error *local_err = NULL; + int ret; + +- ret = multifd_send_state->ops->send_setup(p, &local_err); ++ ret = multifd_send_state->ops->send_setup(p, errp); + if (ret) { +- error_propagate(errp, local_err); + return ret; + } + } +@@ -1199,12 +1197,10 @@ int multifd_load_setup(Error **errp) + + for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; +- Error *local_err = NULL; + int ret; + +- ret = multifd_recv_state->ops->recv_setup(p, &local_err); ++ ret = multifd_recv_state->ops->recv_setup(p, errp); + if (ret) { +- error_propagate(errp, local_err); + return ret; + } + } +-- +2.33.0 + diff --git a/migration-multifd-Rename-MultiFDSend-RecvParams-data.patch b/migration-multifd-Rename-MultiFDSend-RecvParams-data.patch new file mode 100644 index 0000000000000000000000000000000000000000..22840a0113188bf204b633d7dc1213a99c28b8a1 --- /dev/null +++ b/migration-multifd-Rename-MultiFDSend-RecvParams-data.patch @@ -0,0 +1,199 @@ +From 68a8a9da612d2d2dec5ad1b7b9ad5d7db603e05d Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:30:06 -0300 +Subject: [65/99] migration/multifd: Rename MultiFDSend|RecvParams::data to + compress_data + +commit 402dd7ac1c3be44f306c903cdfd2583ffec5e2fd upstream. + +Use a more specific name for the compression data so we can use the +generic for the multifd core code. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-13-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 20 ++++++++++---------- + migration/multifd-zstd.c | 20 ++++++++++---------- + migration/multifd.h | 4 ++-- + 3 files changed, 22 insertions(+), 22 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 012e3bdea1..2a8f5fc9a6 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -69,7 +69,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + err_msg = "out of memory for buf"; + goto err_free_zbuff; + } +- p->data = z; ++ p->compress_data = z; + return 0; + + err_free_zbuff: +@@ -92,15 +92,15 @@ err_free_z: + */ + static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + { +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + + deflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; + g_free(z->buf); + z->buf = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -117,7 +117,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { + MultiFDPages_t *pages = p->pages; +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; +@@ -194,7 +194,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + struct zlib_data *z = g_new0(struct zlib_data, 1); + z_stream *zs = &z->zs; + +- p->data = z; ++ p->compress_data = z; + zs->zalloc = Z_NULL; + zs->zfree = Z_NULL; + zs->opaque = Z_NULL; +@@ -224,13 +224,13 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + */ + static void zlib_recv_cleanup(MultiFDRecvParams *p) + { +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + + inflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -246,7 +246,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + */ + static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index dc8fe43e94..593cf290ad 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -52,7 +52,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + struct zstd_data *z = g_new0(struct zstd_data, 1); + int res; + +- p->data = z; ++ p->compress_data = z; + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); +@@ -90,14 +90,14 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + */ + static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + { +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + + ZSTD_freeCStream(z->zcs); + z->zcs = NULL; + g_free(z->zbuff); + z->zbuff = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -114,7 +114,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { + MultiFDPages_t *pages = p->pages; +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + int ret; + uint32_t i; + +@@ -183,7 +183,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + struct zstd_data *z = g_new0(struct zstd_data, 1); + int ret; + +- p->data = z; ++ p->compress_data = z; + z->zds = ZSTD_createDStream(); + if (!z->zds) { + g_free(z); +@@ -221,14 +221,14 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + */ + static void zstd_recv_cleanup(MultiFDRecvParams *p) + { +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + + ZSTD_freeDStream(z->zds); + z->zds = NULL; + g_free(z->zbuff); + z->zbuff = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -248,7 +248,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + uint32_t out_size = 0; + uint32_t expected_size = p->normal_num * p->page_size; + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + int ret; + int i; + +diff --git a/migration/multifd.h b/migration/multifd.h +index b3fe27ae93..adccd3532f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -127,7 +127,7 @@ typedef struct { + /* number of iovs used */ + uint32_t iovs_num; + /* used for compression methods */ +- void *data; ++ void *compress_data; + } MultiFDSendParams; + + typedef struct { +@@ -183,7 +183,7 @@ typedef struct { + /* num of non zero pages */ + uint32_t normal_num; + /* used for de-compression methods */ +- void *data; ++ void *compress_data; + } MultiFDRecvParams; + + typedef struct { +-- +2.33.0 + diff --git a/migration-multifd-Rename-p-num_packets-and-clean-it-.patch b/migration-multifd-Rename-p-num_packets-and-clean-it-.patch new file mode 100644 index 0000000000000000000000000000000000000000..26cd02cbf01ab9243dd6ee2c6c81bb2463d56111 --- /dev/null +++ b/migration-multifd-Rename-p-num_packets-and-clean-it-.patch @@ -0,0 +1,140 @@ +From a10ddd65e951c65119135eb847c93ab8db980638 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:43 +0800 +Subject: [33/99] migration/multifd: Rename p->num_packets and clean it up + +commit 05b7ec1890158471afb8537a6817a7e0d0a6c938 upstream. + +This field, no matter whether on src or dest, is only used for debugging +purpose. + +They can even be removed already, unless it still more or less provide some +accounting on "how many packets are sent/recved for this thread". The +other more important one is called packet_num, which is embeded in the +multifd packet headers (MultiFDPacket_t). + +So let's keep them for now, but make them much easier to understand, by +doing below: + + - Rename both of them to packets_sent / packets_recved, the old + name (num_packets) are waaay too confusing when we already have + MultiFDPacket_t.packets_num. + + - Avoid worrying on the "initial packet": we know we will send it, that's + good enough. The accounting won't matter a great deal to start with 0 or + with 1. + + - Move them to where we send/recv the packets. They're: + + - multifd_send_fill_packet() for senders. + - multifd_recv_unfill_packet() for receivers. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-10-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 13 +++++-------- + migration/multifd.h | 6 +++--- + 2 files changed, 8 insertions(+), 11 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index a67917b113..f79badb546 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -289,6 +289,8 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->offset[i] = cpu_to_be64(temp); + } ++ ++ p->packets_sent++; + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -336,6 +338,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); ++ p->packets_recved++; + + if (p->normal_num == 0) { + return 0; +@@ -692,8 +695,6 @@ static void *multifd_send_thread(void *opaque) + ret = -1; + goto out; + } +- /* initial packet */ +- p->num_packets = 1; + + while (true) { + qemu_sem_post(&multifd_send_state->channels_ready); +@@ -723,7 +724,6 @@ static void *multifd_send_thread(void *opaque) + } + + multifd_send_fill_packet(p); +- p->num_packets++; + p->total_normal_pages += pages->num; + trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); +@@ -791,7 +791,7 @@ out: + + rcu_unregister_thread(); + migration_threads_remove(thread); +- trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); ++ trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); + + return NULL; + } +@@ -1128,7 +1128,6 @@ static void *multifd_recv_thread(void *opaque) + p->flags &= ~MULTIFD_FLAG_SYNC; + trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, + p->next_packet_size); +- p->num_packets++; + p->total_normal_pages += p->normal_num; + qemu_mutex_unlock(&p->mutex); + +@@ -1154,7 +1153,7 @@ static void *multifd_recv_thread(void *opaque) + qemu_mutex_unlock(&p->mutex); + + rcu_unregister_thread(); +- trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages); ++ trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); + + return NULL; + } +@@ -1256,8 +1255,6 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + } + p->c = ioc; + object_ref(OBJECT(ioc)); +- /* initial packet */ +- p->num_packets = 1; + + p->running = true; + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, +diff --git a/migration/multifd.h b/migration/multifd.h +index 08f26ef3fe..2e4ad0dc56 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -124,7 +124,7 @@ typedef struct { + /* size of the next packet that contains pages */ + uint32_t next_packet_size; + /* packets sent through this channel */ +- uint64_t num_packets; ++ uint64_t packets_sent; + /* non zero pages sent through this channel */ + uint64_t total_normal_pages; + /* buffers to send */ +@@ -174,8 +174,8 @@ typedef struct { + MultiFDPacket_t *packet; + /* size of the next packet that contains pages */ + uint32_t next_packet_size; +- /* packets sent through this channel */ +- uint64_t num_packets; ++ /* packets received through this channel */ ++ uint64_t packets_recved; + /* ramblock */ + RAMBlock *block; + /* ramblock host address */ +-- +2.33.0 + diff --git a/migration-multifd-Rewrite-multifd_queue_page.patch b/migration-multifd-Rewrite-multifd_queue_page.patch new file mode 100644 index 0000000000000000000000000000000000000000..5126981c66b362404a74ec9c5401193467f5f555 --- /dev/null +++ b/migration-multifd-Rewrite-multifd_queue_page.patch @@ -0,0 +1,112 @@ +From 68733215eef6342b28386fd6711f3ab82a7dc66a Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:52 +0800 +Subject: [42/99] migration/multifd: Rewrite multifd_queue_page() + +commit f88f86c4ee3fe673b34873e27af2de0a16fe01fd upstream. + +The current multifd_queue_page() is not easy to read and follow. It is not +good with a few reasons: + + - No helper at all to show what exactly does a condition mean; in short, + readability is low. + + - Rely on pages->ramblock being cleared to detect an empty queue. It's + slightly an overload of the ramblock pointer, per Fabiano [1], which I + also agree. + + - Contains a self recursion, even if not necessary.. + +Rewrite this function. We add some comments to make it even clearer on +what it does. + +[1] https://lore.kernel.org/r/87wmrpjzew.fsf@suse.de + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-19-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 56 ++++++++++++++++++++++++++++++--------------- + 1 file changed, 37 insertions(+), 19 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index dabfc3ec0d..f92e6776f0 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -507,35 +507,53 @@ static bool multifd_send_pages(void) + return true; + } + ++static inline bool multifd_queue_empty(MultiFDPages_t *pages) ++{ ++ return pages->num == 0; ++} ++ ++static inline bool multifd_queue_full(MultiFDPages_t *pages) ++{ ++ return pages->num == pages->allocated; ++} ++ ++static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset) ++{ ++ pages->offset[pages->num++] = offset; ++} ++ + /* Returns true if enqueue successful, false otherwise */ + bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) + { +- MultiFDPages_t *pages = multifd_send_state->pages; +- bool changed = false; ++ MultiFDPages_t *pages; ++ ++retry: ++ pages = multifd_send_state->pages; + +- if (!pages->block) { ++ /* If the queue is empty, we can already enqueue now */ ++ if (multifd_queue_empty(pages)) { + pages->block = block; ++ multifd_enqueue(pages, offset); ++ return true; + } + +- if (pages->block == block) { +- pages->offset[pages->num] = offset; +- pages->num++; +- +- if (pages->num < pages->allocated) { +- return true; ++ /* ++ * Not empty, meanwhile we need a flush. It can because of either: ++ * ++ * (1) The page is not on the same ramblock of previous ones, or, ++ * (2) The queue is full. ++ * ++ * After flush, always retry. ++ */ ++ if (pages->block != block || multifd_queue_full(pages)) { ++ if (!multifd_send_pages()) { ++ return false; + } +- } else { +- changed = true; +- } +- +- if (!multifd_send_pages()) { +- return false; +- } +- +- if (changed) { +- return multifd_queue_page(block, offset); ++ goto retry; + } + ++ /* Not empty, and we still have space, do it! */ ++ multifd_enqueue(pages, offset); + return true; + } + +-- +2.33.0 + diff --git a/migration-multifd-Separate-SYNC-request-with-normal-.patch b/migration-multifd-Separate-SYNC-request-with-normal-.patch new file mode 100644 index 0000000000000000000000000000000000000000..4aa8c7518d0e448163cd1b0405ccfc256430800e --- /dev/null +++ b/migration-multifd-Separate-SYNC-request-with-normal-.patch @@ -0,0 +1,190 @@ +From 40021e3b91b10672849477f4d76712ff3e78f738 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:40 +0800 +Subject: [30/99] migration/multifd: Separate SYNC request with normal jobs + +commit f5f48a7891cf6664a920ba52f6f4dea1646049a4 upstream. + +Multifd provide a threaded model for processing jobs. On sender side, +there can be two kinds of job: (1) a list of pages to send, or (2) a sync +request. + +The sync request is a very special kind of job. It never contains a page +array, but only a multifd packet telling the dest side to synchronize with +sent pages. + +Before this patch, both requests use the pending_job field, no matter what +the request is, it will boost pending_job, while multifd sender thread will +decrement it after it finishes one job. + +However this should be racy, because SYNC is special in that it needs to +set p->flags with MULTIFD_FLAG_SYNC, showing that this is a sync request. +Consider a sequence of operations where: + + - migration thread enqueue a job to send some pages, pending_job++ (0->1) + + - [...before the selected multifd sender thread wakes up...] + + - migration thread enqueue another job to sync, pending_job++ (1->2), + setup p->flags=MULTIFD_FLAG_SYNC + + - multifd sender thread wakes up, found pending_job==2 + - send the 1st packet with MULTIFD_FLAG_SYNC and list of pages + - send the 2nd packet with flags==0 and no pages + +This is not expected, because MULTIFD_FLAG_SYNC should hopefully be done +after all the pages are received. Meanwhile, the 2nd packet will be +completely useless, which contains zero information. + +I didn't verify above, but I think this issue is still benign in that at +least on the recv side we always receive pages before handling +MULTIFD_FLAG_SYNC. However that's not always guaranteed and just tricky. + +One other reason I want to separate it is using p->flags to communicate +between the two threads is also not clearly defined, it's very hard to read +and understand why accessing p->flags is always safe; see the current impl +of multifd_send_thread() where we tried to cache only p->flags. It doesn't +need to be that complicated. + +This patch introduces pending_sync, a separate flag just to show that the +requester needs a sync. Alongside, we remove the tricky caching of +p->flags now because after this patch p->flags should only be used by +multifd sender thread now, which will be crystal clear. So it is always +thread safe to access p->flags. + +With that, we can also safely convert the pending_job into a boolean, +because we don't support >1 pending jobs anyway. + +Always use atomic ops to access both flags to make sure no cache effect. +When at it, drop the initial setting of "pending_job = 0" because it's +always allocated using g_new0(). + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-7-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 39 +++++++++++++++++++++++++-------------- + migration/multifd.h | 13 +++++++++++-- + 2 files changed, 36 insertions(+), 16 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index bfafe94e1e..dd90c09b26 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -443,8 +443,8 @@ static int multifd_send_pages(void) + } + p = &multifd_send_state->params[i]; + qemu_mutex_lock(&p->mutex); +- if (!p->pending_job) { +- p->pending_job++; ++ if (qatomic_read(&p->pending_job) == false) { ++ qatomic_set(&p->pending_job, true); + next_channel = (i + 1) % migrate_multifd_channels(); + break; + } +@@ -632,8 +632,12 @@ int multifd_send_sync_main(void) + + qemu_mutex_lock(&p->mutex); + p->packet_num = multifd_send_state->packet_num++; +- p->flags |= MULTIFD_FLAG_SYNC; +- p->pending_job++; ++ /* ++ * We should be the only user so far, so not possible to be set by ++ * others concurrently. ++ */ ++ assert(qatomic_read(&p->pending_sync) == false); ++ qatomic_set(&p->pending_sync, true); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +@@ -689,10 +693,9 @@ static void *multifd_send_thread(void *opaque) + } + qemu_mutex_lock(&p->mutex); + +- if (p->pending_job) { ++ if (qatomic_read(&p->pending_job)) { + uint64_t packet_num = p->packet_num; + MultiFDPages_t *pages = p->pages; +- uint32_t flags; + + if (use_zero_copy_send) { + p->iovs_num = 0; +@@ -708,13 +711,11 @@ static void *multifd_send_thread(void *opaque) + } + } + multifd_send_fill_packet(p); +- flags = p->flags; +- p->flags = 0; + p->num_packets++; + p->total_normal_pages += pages->num; + qemu_mutex_unlock(&p->mutex); + +- trace_multifd_send(p->id, packet_num, pages->num, flags, ++ trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + + if (use_zero_copy_send) { +@@ -742,12 +743,23 @@ static void *multifd_send_thread(void *opaque) + multifd_pages_reset(p->pages); + p->next_packet_size = 0; + qemu_mutex_lock(&p->mutex); +- p->pending_job--; ++ qatomic_set(&p->pending_job, false); + qemu_mutex_unlock(&p->mutex); +- +- if (flags & MULTIFD_FLAG_SYNC) { +- qemu_sem_post(&p->sem_sync); ++ } else if (qatomic_read(&p->pending_sync)) { ++ p->flags = MULTIFD_FLAG_SYNC; ++ multifd_send_fill_packet(p); ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ qemu_mutex_unlock(&p->mutex); ++ break; + } ++ /* p->next_packet_size will always be zero for a SYNC packet */ ++ stat64_add(&mig_stats.multifd_bytes, p->packet_len); ++ p->flags = 0; ++ qatomic_set(&p->pending_sync, false); ++ qemu_mutex_unlock(&p->mutex); ++ qemu_sem_post(&p->sem_sync); + } else { + qemu_mutex_unlock(&p->mutex); + /* sometimes there are spurious wakeups */ +@@ -911,7 +923,6 @@ int multifd_save_setup(Error **errp) + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); +- p->pending_job = 0; + p->id = i; + p->pages = multifd_pages_init(page_count); + p->packet_len = sizeof(MultiFDPacket_t) +diff --git a/migration/multifd.h b/migration/multifd.h +index 3920bdbcf1..08f26ef3fe 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -99,8 +99,17 @@ typedef struct { + uint32_t flags; + /* global number of generated multifd packets */ + uint64_t packet_num; +- /* thread has work to do */ +- int pending_job; ++ /* ++ * The sender thread has work to do if either of below boolean is set. ++ * ++ * @pending_job: a job is pending ++ * @pending_sync: a sync request is pending ++ * ++ * For both of these fields, they're only set by the requesters, and ++ * cleared by the multifd sender threads. ++ */ ++ bool pending_job; ++ bool pending_sync; + /* array of pages to sent. + * The owner of 'pages' depends of 'pending_job' value: + * pending_job == 0 -> migration_thread can use it. +-- +2.33.0 + diff --git a/migration-multifd-Simplify-locking-in-sender-thread.patch b/migration-multifd-Simplify-locking-in-sender-thread.patch new file mode 100644 index 0000000000000000000000000000000000000000..06c4958c69f4b2fe2cde6778231494675a192969 --- /dev/null +++ b/migration-multifd-Simplify-locking-in-sender-thread.patch @@ -0,0 +1,99 @@ +From 9e616674520aa0272393eda94a4ad7301969b73c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:41 +0800 +Subject: [31/99] migration/multifd: Simplify locking in sender thread + +commit e3cce9af10b06c51434ced4e1a6686f1ce43e124 upstream. + +The sender thread will yield the p->mutex before IO starts, trying to not +block the requester thread. This may be unnecessary lock optimizations, +because the requester can already read pending_job safely even without the +lock, because the requester is currently the only one who can assign a +task. + +Drop that lock complication on both sides: + + (1) in the sender thread, always take the mutex until job done + (2) in the requester thread, check pending_job clear lockless + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-8-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index dd90c09b26..cef4a88237 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -430,7 +430,9 @@ static int multifd_send_pages(void) + return -1; + } + ++ /* We wait here, until at least one channel is ready */ + qemu_sem_wait(&multifd_send_state->channels_ready); ++ + /* + * next_channel can remain from a previous migration that was + * using more channels, so ensure it doesn't overflow if the +@@ -442,17 +444,26 @@ static int multifd_send_pages(void) + return -1; + } + p = &multifd_send_state->params[i]; +- qemu_mutex_lock(&p->mutex); ++ /* ++ * Lockless read to p->pending_job is safe, because only multifd ++ * sender thread can clear it. ++ */ + if (qatomic_read(&p->pending_job) == false) { +- qatomic_set(&p->pending_job, true); + next_channel = (i + 1) % migrate_multifd_channels(); + break; + } +- qemu_mutex_unlock(&p->mutex); + } ++ ++ qemu_mutex_lock(&p->mutex); + assert(!p->pages->num); + assert(!p->pages->block); +- ++ /* ++ * Double check on pending_job==false with the lock. In the future if ++ * we can have >1 requester thread, we can replace this with a "goto ++ * retry", but that is for later. ++ */ ++ assert(qatomic_read(&p->pending_job) == false); ++ qatomic_set(&p->pending_job, true); + p->packet_num = multifd_send_state->packet_num++; + multifd_send_state->pages = p->pages; + p->pages = pages; +@@ -713,8 +724,6 @@ static void *multifd_send_thread(void *opaque) + multifd_send_fill_packet(p); + p->num_packets++; + p->total_normal_pages += pages->num; +- qemu_mutex_unlock(&p->mutex); +- + trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + +@@ -734,6 +743,7 @@ static void *multifd_send_thread(void *opaque) + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, + 0, p->write_flags, &local_err); + if (ret != 0) { ++ qemu_mutex_unlock(&p->mutex); + break; + } + +@@ -742,7 +752,6 @@ static void *multifd_send_thread(void *opaque) + + multifd_pages_reset(p->pages); + p->next_packet_size = 0; +- qemu_mutex_lock(&p->mutex); + qatomic_set(&p->pending_job, false); + qemu_mutex_unlock(&p->mutex); + } else if (qatomic_read(&p->pending_sync)) { +-- +2.33.0 + diff --git a/migration-multifd-Simplify-multifd_channel_connect-i.patch b/migration-multifd-Simplify-multifd_channel_connect-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..69d0a5bcae8e4736eb9accfc498d1b38264d3559 --- /dev/null +++ b/migration-multifd-Simplify-multifd_channel_connect-i.patch @@ -0,0 +1,53 @@ +From 9ec8c17e34afec47c8085a870e8dcfff36a9d3c7 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:11 +0200 +Subject: [02/99] migration/multifd: Simplify multifd_channel_connect() if else + statement +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit a4395f5d3c06472ed70d9ef9f79878f95575be9e upstream. + +The else branch in multifd_channel_connect() is redundant because when +the if branch is taken the function returns. + +Simplify the code by removing the else branch. + +Signed-off-by: Avihai Horon +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/20231231093016.14204-7-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 055b2688ad..06585f0141 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -851,14 +851,13 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + * so we mustn't call multifd_send_thread until then + */ + return multifd_tls_channel_connect(p, ioc, errp); +- +- } else { +- migration_ioc_register_yank(ioc); +- p->registered_yank = true; +- p->c = ioc; +- qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, +- QEMU_THREAD_JOINABLE); + } ++ ++ migration_ioc_register_yank(ioc); ++ p->registered_yank = true; ++ p->c = ioc; ++ qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, ++ QEMU_THREAD_JOINABLE); + return true; + } + +-- +2.33.0 + diff --git a/migration-multifd-Split-multifd_send_terminate_threa.patch b/migration-multifd-Split-multifd_send_terminate_threa.patch new file mode 100644 index 0000000000000000000000000000000000000000..91e5f4b1759848e614c5b4958ef777f44766c008 --- /dev/null +++ b/migration-multifd-Split-multifd_send_terminate_threa.patch @@ -0,0 +1,131 @@ +From e033a771a9d35a86b7864652abf61165bcdcaf55 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:49 +0800 +Subject: [39/99] migration/multifd: Split multifd_send_terminate_threads() + +commit 3ab4441d97af59ea09ee015d68c4770704b2b34f upstream. + +Split multifd_send_terminate_threads() into two functions: + + - multifd_send_set_error(): used when an error happened on the sender + side, set error and quit state only + + - multifd_send_terminate_threads(): used only by the main thread to kick + all multifd send threads out of sleep, for the last recycling. + +Use multifd_send_set_error() in the three old call sites where only the +error will be set. + +Use multifd_send_terminate_threads() in the last one where the main thread +will kick the multifd threads at last in multifd_save_cleanup(). + +Both helpers will need to set quitting=1. + +Suggested-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-16-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 27 ++++++++++++++++++--------- + migration/trace-events | 2 +- + 2 files changed, 19 insertions(+), 10 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3b7984cf99..59ccc42c05 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -537,10 +537,9 @@ int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + return 1; + } + +-static void multifd_send_terminate_threads(Error *err) ++/* Multifd send side hit an error; remember it and prepare to quit */ ++static void multifd_send_set_error(Error *err) + { +- int i; +- + /* + * We don't want to exit each threads twice. Depending on where + * we get the error, or if there are two independent errors in two +@@ -551,8 +550,6 @@ static void multifd_send_terminate_threads(Error *err) + return; + } + +- trace_multifd_send_terminate_threads(err != NULL); +- + if (err) { + MigrationState *s = migrate_get_current(); + migrate_set_error(s, err); +@@ -564,7 +561,19 @@ static void multifd_send_terminate_threads(Error *err) + MIGRATION_STATUS_FAILED); + } + } ++} ++ ++static void multifd_send_terminate_threads(void) ++{ ++ int i; ++ ++ trace_multifd_send_terminate_threads(); + ++ /* ++ * Tell everyone we're quitting. No xchg() needed here; we simply ++ * always set it. ++ */ ++ qatomic_set(&multifd_send_state->exiting, 1); + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -587,7 +596,7 @@ void multifd_save_cleanup(void) + if (!migrate_multifd()) { + return; + } +- multifd_send_terminate_threads(NULL); ++ multifd_send_terminate_threads(); + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -784,7 +793,7 @@ out: + if (ret) { + assert(local_err); + trace_multifd_send_error(p->id); +- multifd_send_terminate_threads(local_err); ++ multifd_send_set_error(local_err); + multifd_send_kick_main(p); + error_free(local_err); + } +@@ -820,7 +829,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + +- multifd_send_terminate_threads(err); ++ multifd_send_set_error(err); + multifd_send_kick_main(p); + error_free(err); + } +@@ -902,7 +911,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + } + + trace_multifd_new_send_channel_async_error(p->id, local_err); +- multifd_send_terminate_threads(local_err); ++ multifd_send_set_error(local_err); + multifd_send_kick_main(p); + object_unref(OBJECT(ioc)); + error_free(local_err); +diff --git a/migration/trace-events b/migration/trace-events +index de4a743c8a..298ad2b0dd 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -141,7 +141,7 @@ multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" + multifd_send_sync_main_signal(uint8_t id) "channel %u" + multifd_send_sync_main_wait(uint8_t id) "channel %u" +-multifd_send_terminate_threads(bool error) "error %d" ++multifd_send_terminate_threads(void) "" + multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" +-- +2.33.0 + diff --git a/migration-multifd-Stick-with-send-recv-on-function-n.patch b/migration-multifd-Stick-with-send-recv-on-function-n.patch new file mode 100644 index 0000000000000000000000000000000000000000..720636b339070dbbe9a1f8755416aca84068305b --- /dev/null +++ b/migration-multifd-Stick-with-send-recv-on-function-n.patch @@ -0,0 +1,156 @@ +From f78f9157a90c7bef026f87fd38f6ce5b785f6cb7 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:55 +0800 +Subject: [45/99] migration/multifd: Stick with send/recv on function names + +commit cde85c37ca54e4a2dbee8653181938499887f6be upstream. + +Most of the multifd code uses send/recv to represent the two sides, but +some rare cases use save/load. + +Since send/recv is the majority, replacing the save/load use cases to use +send/recv globally. Now we reach a consensus on the naming. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-22-peterx@redhat.com +Signed-off-by: Peter Xu +[jz: upstream renamed qemu_mutex_lock_iothread() to qpl_lock(), while + openEuler not yet, resolve context conflict due to this] +Signed-off-by: Jason Zeng +--- + migration/migration.c | 12 ++++++------ + migration/multifd.c | 10 +++++----- + migration/multifd.h | 10 +++++----- + 3 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 2c5258d0b0..f428839dd6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -269,7 +269,7 @@ void migration_incoming_state_destroy(void) + { + struct MigrationIncomingState *mis = migration_incoming_get_current(); + +- multifd_load_cleanup(); ++ multifd_recv_cleanup(); + compress_threads_load_cleanup(); + + if (mis->to_src_file) { +@@ -622,7 +622,7 @@ static void process_incoming_migration_bh(void *opaque) + + trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced"); + +- multifd_load_shutdown(); ++ multifd_recv_shutdown(); + + dirty_bitmap_mig_before_vm_start(); + +@@ -721,7 +721,7 @@ fail: + MIGRATION_STATUS_FAILED); + qemu_fclose(mis->from_src_file); + +- multifd_load_cleanup(); ++ multifd_recv_cleanup(); + compress_threads_load_cleanup(); + + exit(EXIT_FAILURE); +@@ -854,7 +854,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + default_channel = !mis->from_src_file; + } + +- if (multifd_load_setup(errp) != 0) { ++ if (multifd_recv_setup(errp) != 0) { + return; + } + +@@ -1306,7 +1306,7 @@ static void migrate_fd_cleanup(MigrationState *s) + } + qemu_mutex_lock_iothread(); + +- multifd_save_cleanup(); ++ multifd_send_shutdown(); + qemu_mutex_lock(&s->qemu_file_lock); + tmp = s->to_dst_file; + s->to_dst_file = NULL; +@@ -3638,7 +3638,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + return; + } + +- if (multifd_save_setup(&local_err) != 0) { ++ if (multifd_send_setup(&local_err) != 0) { + migrate_set_error(s, local_err); + error_report_err(local_err); + migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +diff --git a/migration/multifd.c b/migration/multifd.c +index 048ff66760..723b1d0b35 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -664,7 +664,7 @@ static void multifd_send_cleanup_state(void) + multifd_send_state = NULL; + } + +-void multifd_save_cleanup(void) ++void multifd_send_shutdown(void) + { + int i; + +@@ -969,7 +969,7 @@ static void multifd_new_send_channel_create(gpointer opaque) + socket_send_channel_create(multifd_new_send_channel_async, opaque); + } + +-int multifd_save_setup(Error **errp) ++int multifd_send_setup(Error **errp) + { + int thread_count; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); +@@ -1067,7 +1067,7 @@ static void multifd_recv_terminate_threads(Error *err) + } + } + +-void multifd_load_shutdown(void) ++void multifd_recv_shutdown(void) + { + if (migrate_multifd()) { + multifd_recv_terminate_threads(NULL); +@@ -1102,7 +1102,7 @@ static void multifd_recv_cleanup_state(void) + multifd_recv_state = NULL; + } + +-void multifd_load_cleanup(void) ++void multifd_recv_cleanup(void) + { + int i; + +@@ -1217,7 +1217,7 @@ static void *multifd_recv_thread(void *opaque) + return NULL; + } + +-int multifd_load_setup(Error **errp) ++int multifd_recv_setup(Error **errp) + { + int thread_count; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); +diff --git a/migration/multifd.h b/migration/multifd.h +index a320c53a6f..9b40a53cb6 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -13,11 +13,11 @@ + #ifndef QEMU_MIGRATION_MULTIFD_H + #define QEMU_MIGRATION_MULTIFD_H + +-int multifd_save_setup(Error **errp); +-void multifd_save_cleanup(void); +-int multifd_load_setup(Error **errp); +-void multifd_load_cleanup(void); +-void multifd_load_shutdown(void); ++int multifd_send_setup(Error **errp); ++void multifd_send_shutdown(void); ++int multifd_recv_setup(Error **errp); ++void multifd_recv_cleanup(void); ++void multifd_recv_shutdown(void); + bool multifd_recv_all_channels_created(void); + void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-- +2.33.0 + diff --git a/migration-multifd-Switch-to-no-compression-when-no-h.patch b/migration-multifd-Switch-to-no-compression-when-no-h.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a701e946cb26e07fa72870debf514a25b6d6544 --- /dev/null +++ b/migration-multifd-Switch-to-no-compression-when-no-h.patch @@ -0,0 +1,169 @@ +From 56d75b83e20501cbd35326823d3450ccede2823a Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:09 +0100 +Subject: [85/99] migration/multifd: Switch to no compression when no hardware + support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit c1dfd12168e1be0a940e97f85044098e18d18178 upstream. + +Send raw packets over if UADK hardware support is not available. This is to +satisfy  Qemu qtest CI which may run on platforms that don't have UADK +hardware support. Subsequent patch will add support for uadk migration +qtest. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 92 +++++++++++++++++++++++----------------- + 1 file changed, 53 insertions(+), 39 deletions(-) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index 70bba92eaa..d12353fb21 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -17,6 +17,7 @@ + #include "migration.h" + #include "multifd.h" + #include "options.h" ++#include "qemu/error-report.h" + #include "uadk/wd_comp.h" + #include "uadk/wd_sched.h" + +@@ -48,29 +49,29 @@ static struct wd_data *multifd_uadk_init_sess(uint32_t count, + uint32_t size = count * page_size; + struct wd_data *wd; + +- if (!uadk_hw_init()) { +- error_setg(errp, "multifd: UADK hardware not available"); +- return NULL; +- } +- + wd = g_new0(struct wd_data, 1); +- ss.alg_type = WD_ZLIB; +- if (compress) { +- ss.op_type = WD_DIR_COMPRESS; +- /* Add an additional page for handling output > input */ +- size += page_size; +- } else { +- ss.op_type = WD_DIR_DECOMPRESS; +- } +- +- /* We use default level 1 compression and 4K window size */ +- param.type = ss.op_type; +- ss.sched_param = ¶m; + +- wd->handle = wd_comp_alloc_sess(&ss); +- if (!wd->handle) { +- error_setg(errp, "multifd: failed wd_comp_alloc_sess"); +- goto out; ++ if (uadk_hw_init()) { ++ ss.alg_type = WD_ZLIB; ++ if (compress) { ++ ss.op_type = WD_DIR_COMPRESS; ++ /* Add an additional page for handling output > input */ ++ size += page_size; ++ } else { ++ ss.op_type = WD_DIR_DECOMPRESS; ++ } ++ /* We use default level 1 compression and 4K window size */ ++ param.type = ss.op_type; ++ ss.sched_param = ¶m; ++ ++ wd->handle = wd_comp_alloc_sess(&ss); ++ if (!wd->handle) { ++ error_setg(errp, "multifd: failed wd_comp_alloc_sess"); ++ goto out; ++ } ++ } else { ++ /* For CI test use */ ++ warn_report_once("UADK hardware not available. Switch to no compression mode"); + } + + wd->buf = g_try_malloc(size); +@@ -82,7 +83,9 @@ static struct wd_data *multifd_uadk_init_sess(uint32_t count, + return wd; + + out_free_sess: +- wd_comp_free_sess(wd->handle); ++ if (wd->handle) { ++ wd_comp_free_sess(wd->handle); ++ } + out: + wd_comp_uninit2(); + g_free(wd); +@@ -91,7 +94,9 @@ out: + + static void multifd_uadk_uninit_sess(struct wd_data *wd) + { +- wd_comp_free_sess(wd->handle); ++ if (wd->handle) { ++ wd_comp_free_sess(wd->handle); ++ } + wd_comp_uninit2(); + g_free(wd->buf); + g_free(wd->buf_hdr); +@@ -188,23 +193,26 @@ static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp) + .dst_len = p->page_size * 2, + }; + +- ret = wd_do_comp_sync(uadk_data->handle, &creq); +- if (ret || creq.status) { +- error_setg(errp, "multifd %u: failed compression, ret %d status %d", +- p->id, ret, creq.status); +- return -1; ++ if (uadk_data->handle) { ++ ret = wd_do_comp_sync(uadk_data->handle, &creq); ++ if (ret || creq.status) { ++ error_setg(errp, "multifd %u: failed compression, ret %d status %d", ++ p->id, ret, creq.status); ++ return -1; ++ } ++ if (creq.dst_len < p->page_size) { ++ uadk_data->buf_hdr[i] = cpu_to_be32(creq.dst_len); ++ prepare_next_iov(p, buf, creq.dst_len); ++ buf += creq.dst_len; ++ } + } +- if (creq.dst_len < p->page_size) { +- uadk_data->buf_hdr[i] = cpu_to_be32(creq.dst_len); +- prepare_next_iov(p, buf, creq.dst_len); +- buf += creq.dst_len; +- } else { +- /* +- * Send raw data if compressed out >= page_size. We might be better +- * off sending raw data if output is slightly less than page_size +- * as well because at the receive end we can skip the decompression. +- * But it is tricky to find the right number here. +- */ ++ /* ++ * Send raw data if no UADK hardware or if compressed out >= page_size. ++ * We might be better off sending raw data if output is slightly less ++ * than page_size as well because at the receive end we can skip the ++ * decompression. But it is tricky to find the right number here. ++ */ ++ if (!uadk_data->handle || creq.dst_len >= p->page_size) { + uadk_data->buf_hdr[i] = cpu_to_be32(p->page_size); + prepare_next_iov(p, p->pages->block->host + p->pages->offset[i], + p->page_size); +@@ -323,6 +331,12 @@ static int multifd_uadk_recv(MultiFDRecvParams *p, Error **errp) + continue; + } + ++ if (unlikely(!uadk_data->handle)) { ++ error_setg(errp, "multifd %u: UADK HW not available for decompression", ++ p->id); ++ return -1; ++ } ++ + ret = wd_do_comp_sync(uadk_data->handle, &creq); + if (ret || creq.status) { + error_setg(errp, "multifd %u: failed decompression, ret %d status %d", +-- +2.33.0 + diff --git a/migration-multifd-Unify-multifd-and-TLS-connection-p.patch b/migration-multifd-Unify-multifd-and-TLS-connection-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..8058286fd3f532578ab28cb524bd3972dc109b12 --- /dev/null +++ b/migration-multifd-Unify-multifd-and-TLS-connection-p.patch @@ -0,0 +1,175 @@ +From 7b385b0d528dfe3490bb3c8f58937bde1685f0f1 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:17 -0300 +Subject: [53/99] migration/multifd: Unify multifd and TLS connection paths + +commit 2576ae488ef9aa692486157df7d8b410919cd219 upstream. + +During multifd channel creation (multifd_send_new_channel_async) when +TLS is enabled, the multifd_channel_connect function is called twice, +once to create the TLS handshake thread and another time after the +asynchrounous TLS handshake has finished. + +This creates a slightly confusing call stack where +multifd_channel_connect() is called more times than the number of +channels. It also splits error handling between the two callers of +multifd_channel_connect() causing some code duplication. Lastly, it +gets in the way of having a single point to determine whether all +channel creation tasks have been initiated. + +Refactor the code to move the reentrancy one level up at the +multifd_new_send_channel_async() level, de-duplicating the error +handling and allowing for the next patch to introduce a +synchronization point common to all the multifd channel creation, +regardless of TLS. + +Note that the previous code would never fail once p->c had been set. +This patch changes this assumption, which affects refcounting, so add +comments around object_unref to explain the situation. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-6-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 83 ++++++++++++++++++++++----------------------- + 1 file changed, 40 insertions(+), 43 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 1299248fea..85d1e7c347 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -873,30 +873,7 @@ out: + return NULL; + } + +-static bool multifd_channel_connect(MultiFDSendParams *p, +- QIOChannel *ioc, +- Error **errp); +- +-static void multifd_tls_outgoing_handshake(QIOTask *task, +- gpointer opaque) +-{ +- MultiFDSendParams *p = opaque; +- QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); +- Error *err = NULL; +- +- if (!qio_task_propagate_error(task, &err)) { +- trace_multifd_tls_outgoing_handshake_complete(ioc); +- if (multifd_channel_connect(p, ioc, &err)) { +- return; +- } +- } +- +- trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); +- +- multifd_send_set_error(err); +- multifd_send_kick_main(p); +- error_free(err); +-} ++static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque); + + static void *multifd_tls_handshake_thread(void *opaque) + { +@@ -904,7 +881,7 @@ static void *multifd_tls_handshake_thread(void *opaque) + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c); + + qio_channel_tls_handshake(tioc, +- multifd_tls_outgoing_handshake, ++ multifd_new_send_channel_async, + p, + NULL, + NULL); +@@ -924,6 +901,10 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + return false; + } + ++ /* ++ * Ownership of the socket channel now transfers to the newly ++ * created TLS channel, which has already taken a reference. ++ */ + object_unref(OBJECT(ioc)); + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); +@@ -940,18 +921,7 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error **errp) + { +- trace_multifd_set_outgoing_channel( +- ioc, object_get_typename(OBJECT(ioc)), +- migrate_get_current()->hostname); +- +- if (migrate_channel_requires_tls_upgrade(ioc)) { +- /* +- * tls_channel_connect will call back to this +- * function after the TLS handshake, +- * so we mustn't call multifd_send_thread until then +- */ +- return multifd_tls_channel_connect(p, ioc, errp); +- } ++ qio_channel_set_delay(ioc, false); + + migration_ioc_register_yank(ioc); + p->registered_yank = true; +@@ -963,24 +933,51 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + return true; + } + ++/* ++ * When TLS is enabled this function is called once to establish the ++ * TLS connection and a second time after the TLS handshake to create ++ * the multifd channel. Without TLS it goes straight into the channel ++ * creation. ++ */ + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + { + MultiFDSendParams *p = opaque; + QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); + Error *local_err = NULL; ++ bool ret; + + trace_multifd_new_send_channel_async(p->id); +- if (!qio_task_propagate_error(task, &local_err)) { +- qio_channel_set_delay(ioc, false); +- if (multifd_channel_connect(p, ioc, &local_err)) { +- return; +- } ++ ++ if (qio_task_propagate_error(task, &local_err)) { ++ ret = false; ++ goto out; ++ } ++ ++ trace_multifd_set_outgoing_channel(ioc, object_get_typename(OBJECT(ioc)), ++ migrate_get_current()->hostname); ++ ++ if (migrate_channel_requires_tls_upgrade(ioc)) { ++ ret = multifd_tls_channel_connect(p, ioc, &local_err); ++ } else { ++ ret = multifd_channel_connect(p, ioc, &local_err); + } + ++ if (ret) { ++ return; ++ } ++ ++out: + trace_multifd_new_send_channel_async_error(p->id, local_err); + multifd_send_set_error(local_err); + multifd_send_kick_main(p); +- object_unref(OBJECT(ioc)); ++ if (!p->c) { ++ /* ++ * If no channel has been created, drop the initial ++ * reference. Otherwise cleanup happens at ++ * multifd_send_channel_destroy() ++ */ ++ object_unref(OBJECT(ioc)); ++ } + error_free(local_err); + } + +-- +2.33.0 + diff --git a/migration-multifd-Zero-p-flags-before-starting-filli.patch b/migration-multifd-Zero-p-flags-before-starting-filli.patch new file mode 100644 index 0000000000000000000000000000000000000000..eae6a6252e9003117b9e412e4a20c4bfc9ba15cd --- /dev/null +++ b/migration-multifd-Zero-p-flags-before-starting-filli.patch @@ -0,0 +1,50 @@ +From c927bd2c10ee92131eba56ab8d2c26dd9dedfe50 Mon Sep 17 00:00:00 2001 +From: "Maciej S. Szmigiero" +Date: Tue, 29 Oct 2024 15:58:15 +0100 +Subject: [96/99] migration/multifd: Zero p->flags before starting filling a + packet + +commit 00b4b216534d84ace7b0583cec70a3aaf256cb25 upstream. + +This way there aren't stale flags there. + +p->flags can't contain SYNC to be sent at the next RAM packet since syncs +are now handled separately in multifd_send_thread. + +Reviewed-by: Fabiano Rosas +Reviewed-by: Peter Xu +Signed-off-by: Maciej S. Szmigiero +Link: https://lore.kernel.org/r/1c96b6cdb797e6f035eb1a4ad9bfc24f4c7f5df8.1730203967.git.maciej.szmigiero@oracle.com +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd.c +[jz: resolve simple context conflict] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3761a803ed..36581a5631 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -898,6 +898,7 @@ static void *multifd_send_thread(void *opaque) + if (qatomic_load_acquire(&p->pending_job)) { + MultiFDPages_t *pages = p->pages; + ++ p->flags = 0; + p->iovs_num = 0; + assert(pages->num); + +@@ -944,7 +945,6 @@ static void *multifd_send_thread(void *opaque) + } + /* p->next_packet_size will always be zero for a SYNC packet */ + stat64_add(&mig_stats.multifd_bytes, p->packet_len); +- p->flags = 0; + } + + qatomic_set(&p->pending_sync, false); +-- +2.33.0 + diff --git a/migration-multifd-add-qpl-compression-method.patch b/migration-multifd-add-qpl-compression-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..50a0565398ac5c84390cc82a56b01d8b137d4261 --- /dev/null +++ b/migration-multifd-add-qpl-compression-method.patch @@ -0,0 +1,125 @@ +From 0f0f9c2c5a658a77c1d99e1d1ec166b8259ec307 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:07 +0800 +Subject: [75/99] migration/multifd: add qpl compression method + +commit 354cac2859e48ec5f7ee72a2a071da6c60a462d0 upstream. + +add the Query Processing Library (QPL) compression method + +Introduce the qpl as a new multifd migration compression method, it can +use In-Memory Analytics Accelerator(IAA) to accelerate compression and +decompression, which can not only reduce network bandwidth requirement +but also reduce host compression and decompression CPU overhead. + +How to enable qpl compression during migration: +migrate_set_parameter multifd-compression qpl + +There is no qpl compression level parameter added since it only supports +level one, users do not need to specify the qpl compression level. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Peter Xu +Reviewed-by: Fabiano Rosas +[fixed docs spacing in migration.json] +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-qpl.c | 20 ++++++++++++++++++++ + migration/multifd.h | 1 + + qapi/migration.json | 8 +++++++- + 5 files changed, 30 insertions(+), 2 deletions(-) + create mode 100644 migration/multifd-qpl.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index b3b9238b65..6ee9744e00 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -711,7 +711,7 @@ const PropertyInfo qdev_prop_fdc_drive_type = { + const PropertyInfo qdev_prop_multifd_compression = { + .name = "MultiFDCompression", + .description = "multifd_compression values, " +- "none/zlib/zstd", ++ "none/zlib/zstd/qpl", + .enum_table = &MultiFDCompression_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index d619ebf238..6652f68d32 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed() + system_ss.add(files('block.c')) + endif + system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) ++system_ss.add(when: qpl, if_true: files('multifd-qpl.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +new file mode 100644 +index 0000000000..056a68a060 +--- /dev/null ++++ b/migration/multifd-qpl.c +@@ -0,0 +1,20 @@ ++/* ++ * Multifd qpl compression accelerator implementation ++ * ++ * Copyright (c) 2023 Intel Corporation ++ * ++ * Authors: ++ * Yuan Liu ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/module.h" ++ ++static void multifd_qpl_register(void) ++{ ++ /* noop */ ++} ++ ++migration_init(multifd_qpl_register); +diff --git a/migration/multifd.h b/migration/multifd.h +index d99603c6a4..11f05dd6d5 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -33,6 +33,7 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + #define MULTIFD_FLAG_NOCOMP (0 << 1) + #define MULTIFD_FLAG_ZLIB (1 << 1) + #define MULTIFD_FLAG_ZSTD (2 << 1) ++#define MULTIFD_FLAG_QPL (4 << 1) + + /* This value needs to be a multiple of qemu_target_page_size() */ + #define MULTIFD_PACKET_SIZE (512 * 1024) +diff --git a/qapi/migration.json b/qapi/migration.json +index fc3178b1dc..f8f3f6f272 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -625,11 +625,17 @@ + # + # @zstd: use zstd compression method. + # ++# @qpl: use qpl compression method. Query Processing Library(qpl) is ++# based on the deflate compression algorithm and use the Intel ++# In-Memory Analytics Accelerator(IAA) accelerated compression ++# and decompression. (Since 9.1) ++# + # Since: 5.0 + ## + { 'enum': 'MultiFDCompression', + 'data': [ 'none', 'zlib', +- { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] } ++ { 'name': 'zstd', 'if': 'CONFIG_ZSTD' }, ++ { 'name': 'qpl', 'if': 'CONFIG_QPL' } ] } + + ## + # @MigMode: +-- +2.33.0 + diff --git a/migration-multifd-add-uadk-compression-framework.patch b/migration-multifd-add-uadk-compression-framework.patch new file mode 100644 index 0000000000000000000000000000000000000000..bb0f86304a70c4a69c03258fbfdb1c038d4890ca --- /dev/null +++ b/migration-multifd-add-uadk-compression-framework.patch @@ -0,0 +1,121 @@ +From cf49f952f849aecd772144cee5285b746bfae228 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:06 +0100 +Subject: [82/99] migration/multifd: add uadk compression framework + +commit f3d8bb759d13a2e33389f00fa338d0761309029a upstream. + +Adds the skeleton to support uadk compression method. +Complete functionality will be added in subsequent patches. + +Acked-by: Markus Armbruster +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-uadk.c | 20 ++++++++++++++++++++ + migration/multifd.h | 5 +++-- + qapi/migration.json | 5 ++++- + 5 files changed, 29 insertions(+), 4 deletions(-) + create mode 100644 migration/multifd-uadk.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 6ee9744e00..650c42eaf8 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -711,7 +711,7 @@ const PropertyInfo qdev_prop_fdc_drive_type = { + const PropertyInfo qdev_prop_multifd_compression = { + .name = "MultiFDCompression", + .description = "multifd_compression values, " +- "none/zlib/zstd/qpl", ++ "none/zlib/zstd/qpl/uadk", + .enum_table = &MultiFDCompression_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index 6652f68d32..264d04657f 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -41,6 +41,7 @@ if get_option('live_block_migration').allowed() + endif + system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) + system_ss.add(when: qpl, if_true: files('multifd-qpl.c')) ++system_ss.add(when: uadk, if_true: files('multifd-uadk.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +new file mode 100644 +index 0000000000..c2bb07535b +--- /dev/null ++++ b/migration/multifd-uadk.c +@@ -0,0 +1,20 @@ ++/* ++ * Multifd UADK compression accelerator implementation ++ * ++ * Copyright (c) 2024 Huawei Technologies R & D (UK) Ltd ++ * ++ * Authors: ++ * Shameer Kolothum ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/module.h" ++ ++static void multifd_uadk_register(void) ++{ ++ /* noop for now */ ++} ++migration_init(multifd_uadk_register); +diff --git a/migration/multifd.h b/migration/multifd.h +index 41965df7a9..ace4ba050d 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -29,13 +29,14 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) + +-/* We reserve 3 bits for compression methods */ +-#define MULTIFD_FLAG_COMPRESSION_MASK (7 << 1) ++/* We reserve 4 bits for compression methods */ ++#define MULTIFD_FLAG_COMPRESSION_MASK (0xf << 1) + /* we need to be compatible. Before compression value was 0 */ + #define MULTIFD_FLAG_NOCOMP (0 << 1) + #define MULTIFD_FLAG_ZLIB (1 << 1) + #define MULTIFD_FLAG_ZSTD (2 << 1) + #define MULTIFD_FLAG_QPL (4 << 1) ++#define MULTIFD_FLAG_UADK (8 << 1) + + /* This value needs to be a multiple of qemu_target_page_size() */ + #define MULTIFD_PACKET_SIZE (512 * 1024) +diff --git a/qapi/migration.json b/qapi/migration.json +index f8f3f6f272..f1a17c511b 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -630,12 +630,15 @@ + # In-Memory Analytics Accelerator(IAA) accelerated compression + # and decompression. (Since 9.1) + # ++# @uadk: use UADK library compression method. (Since 9.1) ++# + # Since: 5.0 + ## + { 'enum': 'MultiFDCompression', + 'data': [ 'none', 'zlib', + { 'name': 'zstd', 'if': 'CONFIG_ZSTD' }, +- { 'name': 'qpl', 'if': 'CONFIG_QPL' } ] } ++ { 'name': 'qpl', 'if': 'CONFIG_QPL' }, ++ { 'name': 'uadk', 'if': 'CONFIG_UADK' } ] } + + ## + # @MigMode: +-- +2.33.0 + diff --git a/migration-multifd-clean-pages-after-filling-packet.patch b/migration-multifd-clean-pages-after-filling-packet.patch deleted file mode 100644 index 596c5244691dc0a60a486598a74e23466a62645b..0000000000000000000000000000000000000000 --- a/migration-multifd-clean-pages-after-filling-packet.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 0f7e704a4faa661583ea6d82659f206e561f23d4 Mon Sep 17 00:00:00 2001 -From: Wei Yang -Date: Sat, 26 Oct 2019 07:19:59 +0800 -Subject: [PATCH 3/8] migration/multifd: clean pages after filling packet - -This is a preparation for the next patch: - - not use multifd during postcopy. - -Without enabling postcopy, everything looks good. While after enabling -postcopy, migration may fail even not use multifd during postcopy. The -reason is the pages is not properly cleared and *old* target page will -continue to be transferred. - -After clean pages, migration succeeds. - -Signed-off-by: Wei Yang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/ram.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 840e354..c2eb1ed 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -947,10 +947,10 @@ static int multifd_send_pages(RAMState *rs) - } - qemu_mutex_unlock(&p->mutex); - } -- p->pages->used = 0; -+ assert(!p->pages->used); -+ assert(!p->pages->block); - - p->packet_num = multifd_send_state->packet_num++; -- p->pages->block = NULL; - multifd_send_state->pages = p->pages; - p->pages = pages; - transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; -@@ -1137,6 +1137,7 @@ static void *multifd_send_thread(void *opaque) - p->num_packets++; - p->num_pages += used; - p->pages->used = 0; -+ p->pages->block = NULL; - qemu_mutex_unlock(&p->mutex); - - trace_multifd_send(p->id, packet_num, used, flags, --- -1.8.3.1 - diff --git a/migration-multifd-fix-destroyed-mutex-access-in-term.patch b/migration-multifd-fix-destroyed-mutex-access-in-term.patch deleted file mode 100644 index a927ea533c253ff242c5867cde1055453668c1c5..0000000000000000000000000000000000000000 --- a/migration-multifd-fix-destroyed-mutex-access-in-term.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 34d797aa134a33c1d67ca85d9d9f996d58162276 Mon Sep 17 00:00:00 2001 -From: Jiahui Cen -Date: Wed, 23 Oct 2019 11:47:37 +0800 -Subject: [PATCH 09/10] migration/multifd: fix destroyed mutex access in - terminating multifd threads - -One multifd will lock all the other multifds' IOChannel mutex to inform them -to quit by setting p->quit or shutting down p->c. In this senario, if some -multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup -had destroyed their mutex, it could cause destroyed mutex access when trying -lock their mutex. - -Here is the coredump stack: - #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 - #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 - #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 - #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 - #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 - #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 - #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 - #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 - #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 - #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 - -To fix it up, let's destroy the mutex after all the other multifd threads had -been terminated. - -Change-Id: I4124d43e8558ba302052bdc53fdae7cfcf9d8687 -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/ram.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 029f1cdf..d7d2d5ec 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1033,6 +1033,10 @@ void multifd_save_cleanup(void) - if (p->running) { - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDSendParams *p = &multifd_send_state->params[i]; -+ - socket_send_channel_destroy(p->c); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); -@@ -1306,6 +1310,10 @@ int multifd_load_cleanup(Error **errp) - qemu_sem_post(&p->sem_sync); - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDRecvParams *p = &multifd_recv_state->params[i]; -+ - object_unref(OBJECT(p->c)); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); --- -2.19.1 diff --git a/migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch b/migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch deleted file mode 100644 index 021fbcf8a6e4053b5e051ce3885b677d678259ca..0000000000000000000000000000000000000000 --- a/migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 26ffadd08711aa4ef62932ac0ecf5048518b2801 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 14:50:12 +0800 -Subject: [PATCH] migration/multifd: fix hangup with TLS-Multifd due to - blocking handshake -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The qemu main loop could hang up forever when we enable TLS+Multifd. -The Src multifd_send_0 invokes tls handshake, it sends hello to sever -and wait response. -However, the Dst main qemu loop has been waiting recvmsg() for multifd_recv_1. -Both of Src and Dst main qemu loop are blocking and waiting for reponse which -results in hanging up forever. - -Src: (multifd_send_0) Dst: (multifd_recv_1) -multifd_channel_connect migration_channel_process_incoming - multifd_tls_channel_connect migration_tls_channel_process_incoming - multifd_tls_channel_connect qio_channel_tls_handshake_task - qio_channel_tls_handshake gnutls_handshake - qio_channel_tls_handshake_task ... - qcrypto_tls_session_handshake ... - gnutls_handshake ... - ... ... - recvmsg (Blocking I/O waiting for response) recvmsg (Blocking I/O waiting for response) - -Fix this by offloadinig handshake work to a background thread. - -Reported-by: Yan Jin -Suggested-by: Daniel P. Berrangé -Signed-off-by: Chuan Zheng -Message-Id: <1604643893-8223-1-git-send-email-zhengchuan@huawei.com> -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 23 +++++++++++++++++------ - 1 file changed, 17 insertions(+), 6 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index dc9831d7f3..a37dbfc049 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1220,6 +1220,19 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, - multifd_channel_connect(p, ioc, err); - } - -+static void *multifd_tls_handshake_thread(void *opaque) -+{ -+ MultiFDSendParams *p = opaque; -+ QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c); -+ -+ qio_channel_tls_handshake(tioc, -+ multifd_tls_outgoing_handshake, -+ p, -+ NULL, -+ NULL); -+ return NULL; -+} -+ - static void multifd_tls_channel_connect(MultiFDSendParams *p, - QIOChannel *ioc, - Error **errp) -@@ -1235,12 +1248,10 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, - - trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); - qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); -- qio_channel_tls_handshake(tioc, -- multifd_tls_outgoing_handshake, -- p, -- NULL, -- NULL); -- -+ p->c = QIO_CHANNEL(tioc); -+ qemu_thread_create(&p->thread, "multifd-tls-handshake-worker", -+ multifd_tls_handshake_thread, p, -+ QEMU_THREAD_JOINABLE); - } - - static bool multifd_channel_connect(MultiFDSendParams *p, --- -2.27.0 - diff --git a/migration-multifd-fix-nullptr-access-in-multifd_send.patch b/migration-multifd-fix-nullptr-access-in-multifd_send.patch deleted file mode 100644 index f2d278a135434e3b0838be7876b1fe2a616816cd..0000000000000000000000000000000000000000 --- a/migration-multifd-fix-nullptr-access-in-multifd_send.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 6a08ee257a95d9f2514bd995e90ddf46d3f78b41 Mon Sep 17 00:00:00 2001 -From: Zheng Chuan -Date: Tue, 21 Apr 2020 19:49:26 +0800 -Subject: [PATCH 10/10] migration/multifd: fix nullptr access in - multifd_send_terminate_threads - -If the multifd_send_threads is not created when migration is failed, -multifd_save_cleanup would be called twice. In this senario, the -multifd_send_state is accessed after it has been released, the result -is that the source VM is crashing down. - -Here is the coredump stack: - Program received signal SIGSEGV, Segmentation fault. - 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; - #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 - #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 - #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 - #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 - #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 - #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 - #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 - #10 0x00005629334c5acf in main_loop () at vl.c:1810 - #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 - -If the multifd_send_threads is not created when migration is failed. -In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. - -Change-Id: I7441efe2ed542054ecd2a4da8146e2652824b452 -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/ram.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index d7d2d5ec..1858d66c 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1205,7 +1205,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - - if (qio_task_propagate_error(task, &local_err)) { - migrate_set_error(migrate_get_current(), local_err); -- multifd_save_cleanup(); -+ /* Error happen, we need to tell who pay attention to me */ -+ qemu_sem_post(&multifd_send_state->channels_ready); -+ qemu_sem_post(&p->sem_sync); -+ /* -+ * Although multifd_send_thread is not created, but main migration -+ * thread neet to judge whether it is running, so we need to mark -+ * its status. -+ */ -+ p->quit = true; - } else { - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); --- -2.19.1 diff --git a/migration-multifd-fix-nullptr-access-in-terminating-m.patch b/migration-multifd-fix-nullptr-access-in-terminating-m.patch deleted file mode 100644 index d403b28f28a708a94d7799618053e53c7d75b939..0000000000000000000000000000000000000000 --- a/migration-multifd-fix-nullptr-access-in-terminating-m.patch +++ /dev/null @@ -1,75 +0,0 @@ -From d9a847f0982fcca6f63031215065c346fcc27bbc Mon Sep 17 00:00:00 2001 -From: Zheng Chuan -Date: Fri, 24 Apr 2020 11:58:33 +0800 -Subject: [PATCH 06/10] migration/multifd: fix nullptr access in terminating - multifd threads - -One multifd channel will shutdown all the other multifd's IOChannel when it -fails to receive an IOChannel. In this senario, if some multifds had not -received its IOChannel yet, it would try to shutdown its IOChannel which could -cause nullptr access at qio_channel_shutdown. - -Here is the coredump stack: - #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 - #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 - #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 - #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 - #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 - #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 - #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket -.c:166 - #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 - #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 - #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 - #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 - #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 - #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 - -To fix it up, let's check p->c before calling qio_channel_shutdown. - -Change-Id: Ib36c1b3d866a3ad92d1460512df840cfb8736ab6 -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/ram.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 51811c2d..756a525f 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1112,6 +1112,7 @@ static void *multifd_send_thread(void *opaque) - rcu_register_thread(); - - if (multifd_send_initial_packet(p, &local_err) < 0) { -+ ret = -1; - goto out; - } - /* initial packet */ -@@ -1178,9 +1179,7 @@ out: - * who pay attention to me. - */ - if (ret != 0) { -- if (flags & MULTIFD_FLAG_SYNC) { -- qemu_sem_post(&p->sem_sync); -- } -+ qemu_sem_post(&p->sem_sync); - qemu_sem_post(&multifd_send_state->channels_ready); - } - -@@ -1279,7 +1278,9 @@ static void multifd_recv_terminate_threads(Error *err) - - normal quit, i.e. everything went fine, just finished - - error quit: We close the channels so the channel threads - finish the qio_channel_read_all_eof() */ -- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ if (p->c) { -+ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ } - qemu_mutex_unlock(&p->mutex); - } - } --- -2.19.1 diff --git a/migration-multifd-fix-potential-wrong-acception-orde.patch b/migration-multifd-fix-potential-wrong-acception-orde.patch deleted file mode 100644 index 6b8f18ce71abccf8987fb9261817654b3b10d631..0000000000000000000000000000000000000000 --- a/migration-multifd-fix-potential-wrong-acception-orde.patch +++ /dev/null @@ -1,302 +0,0 @@ -From 71f3e496c128b46f803cc4776154b02a5e505cb2 Mon Sep 17 00:00:00 2001 -From: Zheng Chuan -Date: Wed, 22 Apr 2020 13:45:39 +0800 -Subject: [PATCH] migration/multifd: fix potential wrong acception order of - IOChannel - -Multifd assumes the migration thread IOChannel is always established before -the multifd IOChannels, but this assumption will be broken in many situations -like network packet loss. - -For example: -Step1: Source (migration thread IOChannel) --SYN--> Destination -Step2: Source (migration thread IOChannel) <--SYNACK Destination -Step3: Source (migration thread IOChannel, lost) --ACK-->X Destination -Step4: Source (multifd IOChannel) --SYN--> Destination -Step5: Source (multifd IOChannel) <--SYNACK Destination -Step6: Source (multifd IOChannel, ESTABLISHED) --ACK--> Destination -Step7: Destination accepts multifd IOChannel -Step8: Source (migration thread IOChannel, ESTABLISHED) -ACK,DATA-> Destination -Step9: Destination accepts migration thread IOChannel - -The above situation can be reproduced by creating a weak network environment, -such as "tc qdisc add dev eth0 root netem loss 50%". The wrong acception order -will cause magic check failure and thus lead to migration failure. - -This patch fixes this issue by sending a migration IOChannel initial packet with -a unique id when using multifd migration. Since the multifd IOChannels will also -send initial packets, the destination can judge whether the processing IOChannel -belongs to multifd by checking the id in the initial packet. This mechanism can -ensure that different IOChannels will go to correct branches in our test. - -Change-Id: I63d1c32c7b66063bd6a3c5e7d63500555bd148b9 -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang - -diff --git a/migration/channel.c b/migration/channel.c -index 20e4c8e2..74621814 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -82,6 +82,15 @@ void migration_channel_connect(MigrationState *s, - return; - } - } else { -+ if (migrate_use_multifd()) { -+ /* multifd migration cannot distinguish migration IOChannel -+ * from multifd IOChannels, so we need to send an initial packet -+ * to show it is migration IOChannel -+ */ -+ migration_send_initial_packet(ioc, -+ migrate_multifd_channels(), -+ &error); -+ } - QEMUFile *f = qemu_fopen_channel_output(ioc); - - qemu_mutex_lock(&s->qemu_file_lock); -diff --git a/migration/migration.c b/migration/migration.c -index 114c33a1..8f2fc2b4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -517,12 +517,6 @@ static void migration_incoming_setup(QEMUFile *f) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -- if (multifd_load_setup() != 0) { -- /* We haven't been able to create multifd threads -- nothing better to do */ -- exit(EXIT_FAILURE); -- } -- - if (!mis->from_src_file) { - mis->from_src_file = f; - } -@@ -580,36 +574,41 @@ void migration_fd_process_incoming(QEMUFile *f) - void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); -- bool start_migration; -- -- if (!mis->from_src_file) { -- /* The first connection (multifd may have multiple) */ -- QEMUFile *f = qemu_fopen_channel_input(ioc); -+ Error *local_err = NULL; -+ int id = 0; - -- /* If it's a recovery, we're done */ -- if (postcopy_try_recover(f)) { -- return; -- } -+ if (migrate_use_multifd()) { -+ id = migration_recv_initial_packet(ioc, &local_err); -+ } -+ if (!migrate_use_multifd() || id == migrate_multifd_channels()) { -+ if (!mis->from_src_file) { -+ /* The migration connection (multifd may have multiple) */ -+ QEMUFile *f = qemu_fopen_channel_input(ioc); - -- migration_incoming_setup(f); -+ /* If it's a recovery, we're done */ -+ if (postcopy_try_recover(f)) { -+ return; -+ } - -- /* -- * Common migration only needs one channel, so we can start -- * right now. Multifd needs more than one channel, we wait. -- */ -- start_migration = !migrate_use_multifd(); -- } else { -- Error *local_err = NULL; -+ migration_incoming_setup(f); -+ } -+ } else if (id >= 0) { - /* Multiple connections */ - assert(migrate_use_multifd()); -- start_migration = multifd_recv_new_channel(ioc, &local_err); -+ multifd_recv_new_channel(ioc, id, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } -+ } else { -+ /* Bad connections */ -+ multifd_recv_terminate_threads(local_err); -+ error_propagate(errp, local_err); -+ return; - } - -- if (start_migration) { -+ /* Once we have all the channels we need, we can start migration */ -+ if (migration_has_all_channels()) { - migration_incoming_process(); - } - } -diff --git a/migration/migration.h b/migration/migration.h -index 1fdd7b21..feb34430 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -339,4 +339,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); - void migration_make_urgent_request(void); - void migration_consume_urgent_request(void); - -+int migration_send_initial_packet(QIOChannel *c, uint8_t id, Error **errp); -+int migration_recv_initial_packet(QIOChannel *c, Error **errp); -+ - #endif -diff --git a/migration/ram.c b/migration/ram.c -index 756a525f..029f1cdf 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -593,7 +593,7 @@ typedef struct { - uint8_t id; - uint8_t unused1[7]; /* Reserved for future use */ - uint64_t unused2[4]; /* Reserved for future use */ --} __attribute__((packed)) MultiFDInit_t; -+} __attribute__((packed)) MigrationInit_t; - - typedef struct { - uint32_t magic; -@@ -702,26 +702,26 @@ typedef struct { - QemuSemaphore sem_sync; - } MultiFDRecvParams; - --static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) -+int migration_send_initial_packet(QIOChannel *c, uint8_t id, Error **errp) - { -- MultiFDInit_t msg; -+ MigrationInit_t msg; - int ret; - - msg.magic = cpu_to_be32(MULTIFD_MAGIC); - msg.version = cpu_to_be32(MULTIFD_VERSION); -- msg.id = p->id; -+ msg.id = id; - memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid)); - -- ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp); -+ ret = qio_channel_write_all(c, (char *)&msg, sizeof(msg), errp); - if (ret != 0) { - return -1; - } - return 0; - } - --static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) -+int migration_recv_initial_packet(QIOChannel *c, Error **errp) - { -- MultiFDInit_t msg; -+ MigrationInit_t msg; - int ret; - - ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp); -@@ -756,8 +756,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) - } - - if (msg.id > migrate_multifd_channels()) { -- error_setg(errp, "multifd: received channel version %d " -- "expected %d", msg.version, MULTIFD_VERSION); -+ error_setg(errp, "multifd: received channel id %d " -+ "expected [0-%d]", msg.id, migrate_multifd_channels()); - return -1; - } - -@@ -1111,7 +1111,7 @@ static void *multifd_send_thread(void *opaque) - trace_multifd_send_thread_start(p->id); - rcu_register_thread(); - -- if (multifd_send_initial_packet(p, &local_err) < 0) { -+ if (migration_send_initial_packet(p->c, p->id, &local_err) < 0) { - ret = -1; - goto out; - } -@@ -1255,7 +1255,7 @@ struct { - uint64_t packet_num; - } *multifd_recv_state; - --static void multifd_recv_terminate_threads(Error *err) -+void multifd_recv_terminate_threads(Error *err) - { - int i; - -@@ -1470,21 +1470,10 @@ bool multifd_recv_all_channels_created(void) - * - Return false and do not set @errp when correctly receiving the current one; - * - Return false and set @errp when failing to receive the current channel. - */ --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) -+void multifd_recv_new_channel(QIOChannel *ioc, int id, Error **errp) - { - MultiFDRecvParams *p; - Error *local_err = NULL; -- int id; -- -- id = multifd_recv_initial_packet(ioc, &local_err); -- if (id < 0) { -- multifd_recv_terminate_threads(local_err); -- error_propagate_prepend(errp, local_err, -- "failed to receive packet" -- " via multifd channel %d: ", -- atomic_read(&multifd_recv_state->count)); -- return false; -- } - - p = &multifd_recv_state->params[id]; - if (p->c != NULL) { -@@ -1492,7 +1481,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - id); - multifd_recv_terminate_threads(local_err); - error_propagate(errp, local_err); -- return false; -+ return; - } - p->c = ioc; - object_ref(OBJECT(ioc)); -@@ -1503,8 +1492,6 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, - QEMU_THREAD_JOINABLE); - atomic_inc(&multifd_recv_state->count); -- return atomic_read(&multifd_recv_state->count) == -- migrate_multifd_channels(); - } - - /** -diff --git a/migration/ram.h b/migration/ram.h -index bd0eee79..a788ff0e 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -46,7 +46,8 @@ void multifd_save_cleanup(void); - int multifd_load_setup(void); - int multifd_load_cleanup(Error **errp); - bool multifd_recv_all_channels_created(void); --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); -+void multifd_recv_new_channel(QIOChannel *ioc, int id, Error **errp); -+void multifd_recv_terminate_threads(Error *err); - - uint64_t ram_pagesize_summary(void); - int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); -diff --git a/migration/socket.c b/migration/socket.c -index 98efdc02..093b956b 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -22,6 +22,7 @@ - #include "channel.h" - #include "socket.h" - #include "migration.h" -+#include "ram.h" - #include "qemu-file.h" - #include "io/channel-socket.h" - #include "io/net-listener.h" -@@ -181,6 +182,12 @@ static void socket_start_incoming_migration(SocketAddress *saddr, - - qio_net_listener_set_name(listener, "migration-socket-listener"); - -+ if (multifd_load_setup() != 0) { -+ /* We haven't been able to create multifd threads -+ nothing better to do */ -+ exit(EXIT_FAILURE); -+ } -+ - if (qio_net_listener_open_sync(listener, saddr, errp) < 0) { - object_unref(OBJECT(listener)); - return; --- -2.23.0 diff --git a/migration-multifd-implement-initialization-of-qpl-co.patch b/migration-multifd-implement-initialization-of-qpl-co.patch new file mode 100644 index 0000000000000000000000000000000000000000..36176ad0c48a7079ba19746f542566cfb5841f1b --- /dev/null +++ b/migration-multifd-implement-initialization-of-qpl-co.patch @@ -0,0 +1,369 @@ +From 41fed938d3474ab517e689feeb8abf5e2876d2df Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:08 +0800 +Subject: [77/99] migration/multifd: implement initialization of qpl + compression + +commit 34e104b897da6e144a5f34e7c5eebf8a4c4d9d59 upstream. + +during initialization, a software job is allocated to each channel +for software path fallabck when the IAA hardware is unavailable or +the hardware job submission fails. If the IAA hardware is available, +multiple hardware jobs are allocated for batch processing. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-qpl.c | 328 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 327 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +index 056a68a060..6791a204d5 100644 +--- a/migration/multifd-qpl.c ++++ b/migration/multifd-qpl.c +@@ -9,12 +9,338 @@ + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ ++ + #include "qemu/osdep.h" + #include "qemu/module.h" ++#include "qapi/error.h" ++#include "multifd.h" ++#include "qpl/qpl.h" ++ ++typedef struct { ++ /* the QPL hardware path job */ ++ qpl_job *job; ++ /* indicates if fallback to software path is required */ ++ bool fallback_sw_path; ++ /* output data from the software path */ ++ uint8_t *sw_output; ++ /* output data length from the software path */ ++ uint32_t sw_output_len; ++} QplHwJob; ++ ++typedef struct { ++ /* array of hardware jobs, the number of jobs equals the number pages */ ++ QplHwJob *hw_jobs; ++ /* the QPL software job for the slow path and software fallback */ ++ qpl_job *sw_job; ++ /* the number of pages that the QPL needs to process at one time */ ++ uint32_t page_num; ++ /* array of compressed page buffers */ ++ uint8_t *zbuf; ++ /* array of compressed page lengths */ ++ uint32_t *zlen; ++ /* the status of the hardware device */ ++ bool hw_avail; ++} QplData; ++ ++/** ++ * check_hw_avail: check if IAA hardware is available ++ * ++ * If the IAA hardware does not exist or is unavailable, ++ * the QPL hardware job initialization will fail. ++ * ++ * Returns true if IAA hardware is available, otherwise false. ++ * ++ * @job_size: indicates the hardware job size if hardware is available ++ */ ++static bool check_hw_avail(uint32_t *job_size) ++{ ++ qpl_path_t path = qpl_path_hardware; ++ uint32_t size = 0; ++ qpl_job *job; ++ ++ if (qpl_get_job_size(path, &size) != QPL_STS_OK) { ++ return false; ++ } ++ assert(size > 0); ++ job = g_malloc0(size); ++ if (qpl_init_job(path, job) != QPL_STS_OK) { ++ g_free(job); ++ return false; ++ } ++ g_free(job); ++ *job_size = size; ++ return true; ++} ++ ++/** ++ * multifd_qpl_free_sw_job: clean up software job ++ * ++ * Free the software job resources. ++ * ++ * @qpl: pointer to the QplData structure ++ */ ++static void multifd_qpl_free_sw_job(QplData *qpl) ++{ ++ assert(qpl); ++ if (qpl->sw_job) { ++ qpl_fini_job(qpl->sw_job); ++ g_free(qpl->sw_job); ++ qpl->sw_job = NULL; ++ } ++} ++ ++/** ++ * multifd_qpl_free_jobs: clean up hardware jobs ++ * ++ * Free all hardware job resources. ++ * ++ * @qpl: pointer to the QplData structure ++ */ ++static void multifd_qpl_free_hw_job(QplData *qpl) ++{ ++ assert(qpl); ++ if (qpl->hw_jobs) { ++ for (int i = 0; i < qpl->page_num; i++) { ++ qpl_fini_job(qpl->hw_jobs[i].job); ++ g_free(qpl->hw_jobs[i].job); ++ qpl->hw_jobs[i].job = NULL; ++ } ++ g_free(qpl->hw_jobs); ++ qpl->hw_jobs = NULL; ++ } ++} ++ ++/** ++ * multifd_qpl_init_sw_job: initialize a software job ++ * ++ * Use the QPL software path to initialize a job ++ * ++ * @qpl: pointer to the QplData structure ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_init_sw_job(QplData *qpl, Error **errp) ++{ ++ qpl_path_t path = qpl_path_software; ++ uint32_t size = 0; ++ qpl_job *job = NULL; ++ qpl_status status; ++ ++ status = qpl_get_job_size(path, &size); ++ if (status != QPL_STS_OK) { ++ error_setg(errp, "qpl_get_job_size failed with error %d", status); ++ return -1; ++ } ++ job = g_malloc0(size); ++ status = qpl_init_job(path, job); ++ if (status != QPL_STS_OK) { ++ error_setg(errp, "qpl_init_job failed with error %d", status); ++ g_free(job); ++ return -1; ++ } ++ qpl->sw_job = job; ++ return 0; ++} ++ ++/** ++ * multifd_qpl_init_jobs: initialize hardware jobs ++ * ++ * Use the QPL hardware path to initialize jobs ++ * ++ * @qpl: pointer to the QplData structure ++ * @size: the size of QPL hardware path job ++ * @errp: pointer to an error ++ */ ++static void multifd_qpl_init_hw_job(QplData *qpl, uint32_t size, Error **errp) ++{ ++ qpl_path_t path = qpl_path_hardware; ++ qpl_job *job = NULL; ++ qpl_status status; ++ ++ qpl->hw_jobs = g_new0(QplHwJob, qpl->page_num); ++ for (int i = 0; i < qpl->page_num; i++) { ++ job = g_malloc0(size); ++ status = qpl_init_job(path, job); ++ /* the job initialization should succeed after check_hw_avail */ ++ assert(status == QPL_STS_OK); ++ qpl->hw_jobs[i].job = job; ++ } ++} ++ ++/** ++ * multifd_qpl_init: initialize QplData structure ++ * ++ * Allocate and initialize a QplData structure ++ * ++ * Returns a QplData pointer on success or NULL on error ++ * ++ * @num: the number of pages ++ * @size: the page size ++ * @errp: pointer to an error ++ */ ++static QplData *multifd_qpl_init(uint32_t num, uint32_t size, Error **errp) ++{ ++ uint32_t job_size = 0; ++ QplData *qpl; ++ ++ qpl = g_new0(QplData, 1); ++ qpl->page_num = num; ++ if (multifd_qpl_init_sw_job(qpl, errp) != 0) { ++ g_free(qpl); ++ return NULL; ++ } ++ qpl->hw_avail = check_hw_avail(&job_size); ++ if (qpl->hw_avail) { ++ multifd_qpl_init_hw_job(qpl, job_size, errp); ++ } ++ qpl->zbuf = g_malloc0(size * num); ++ qpl->zlen = g_new0(uint32_t, num); ++ return qpl; ++} ++ ++/** ++ * multifd_qpl_deinit: clean up QplData structure ++ * ++ * Free jobs, buffers and the QplData structure ++ * ++ * @qpl: pointer to the QplData structure ++ */ ++static void multifd_qpl_deinit(QplData *qpl) ++{ ++ if (qpl) { ++ multifd_qpl_free_sw_job(qpl); ++ multifd_qpl_free_hw_job(qpl); ++ g_free(qpl->zbuf); ++ g_free(qpl->zlen); ++ g_free(qpl); ++ } ++} ++ ++/** ++ * multifd_qpl_send_setup: set up send side ++ * ++ * Set up the channel with QPL compression. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_send_setup(MultiFDSendParams *p, Error **errp) ++{ ++ QplData *qpl; ++ ++ qpl = multifd_qpl_init(p->page_count, p->page_size, errp); ++ if (!qpl) { ++ return -1; ++ } ++ p->compress_data = qpl; ++ ++ /* ++ * the page will be compressed independently and sent using an IOV. The ++ * additional two IOVs are used to store packet header and compressed data ++ * length ++ */ ++ p->iov = g_new0(struct iovec, p->page_count + 2); ++ return 0; ++} ++ ++/** ++ * multifd_qpl_send_cleanup: clean up send side ++ * ++ * Close the channel and free memory. ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static void multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp) ++{ ++ multifd_qpl_deinit(p->compress_data); ++ p->compress_data = NULL; ++ g_free(p->iov); ++ p->iov = NULL; ++} ++ ++/** ++ * multifd_qpl_send_prepare: prepare data to be able to send ++ * ++ * Create a compressed buffer with all the pages that we are going to ++ * send. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_send_prepare(MultiFDSendParams *p, Error **errp) ++{ ++ /* Implement in next patch */ ++ return -1; ++} ++ ++/** ++ * multifd_qpl_recv_setup: set up receive side ++ * ++ * Create the compressed channel and buffer. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_recv_setup(MultiFDRecvParams *p, Error **errp) ++{ ++ QplData *qpl; ++ ++ qpl = multifd_qpl_init(p->page_count, p->page_size, errp); ++ if (!qpl) { ++ return -1; ++ } ++ p->compress_data = qpl; ++ return 0; ++} ++ ++/** ++ * multifd_qpl_recv_cleanup: set up receive side ++ * ++ * Close the channel and free memory. ++ * ++ * @p: Params for the channel being used ++ */ ++static void multifd_qpl_recv_cleanup(MultiFDRecvParams *p) ++{ ++ multifd_qpl_deinit(p->compress_data); ++ p->compress_data = NULL; ++} ++ ++/** ++ * multifd_qpl_recv: read the data from the channel into actual pages ++ * ++ * Read the compressed buffer, and uncompress it into the actual ++ * pages. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp) ++{ ++ /* Implement in next patch */ ++ return -1; ++} ++ ++static MultiFDMethods multifd_qpl_ops = { ++ .send_setup = multifd_qpl_send_setup, ++ .send_cleanup = multifd_qpl_send_cleanup, ++ .send_prepare = multifd_qpl_send_prepare, ++ .recv_setup = multifd_qpl_recv_setup, ++ .recv_cleanup = multifd_qpl_recv_cleanup, ++ .recv = multifd_qpl_recv, ++}; + + static void multifd_qpl_register(void) + { +- /* noop */ ++ multifd_register_ops(MULTIFD_COMPRESSION_QPL, &multifd_qpl_ops); + } + + migration_init(multifd_qpl_register); +-- +2.33.0 + diff --git a/migration-multifd-implement-qpl-compression-and-deco.patch b/migration-multifd-implement-qpl-compression-and-deco.patch new file mode 100644 index 0000000000000000000000000000000000000000..43d807f719829fac172d0bbdfb76aa2aed96ac8f --- /dev/null +++ b/migration-multifd-implement-qpl-compression-and-deco.patch @@ -0,0 +1,510 @@ +From 9c0666808448c393ffff4b44e3e5bb0f62e48a8f Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:09 +0800 +Subject: [78/99] migration/multifd: implement qpl compression and + decompression + +commit f6fe9fea995249ecc2cd72975d803fbf4d512c02 upstream. + +QPL compression and decompression will use IAA hardware path if the IAA +hardware is available. Otherwise the QPL library software path is used. + +The hardware path will automatically fall back to QPL software path if +the IAA queues are busy. In some scenarios, this may happen frequently, +such as configuring 4 channels but only one IAA device is available. In +the case of insufficient IAA hardware resources, retry and fallback can +help optimize performance: + + 1. Retry + SW fallback: + total time: 14649 ms + downtime: 25 ms + throughput: 17666.57 mbps + pages-per-second: 1509647 + + 2. No fallback, always wait for work queues to become available + total time: 18381 ms + downtime: 25 ms + throughput: 13698.65 mbps + pages-per-second: 859607 + +If both the hardware and software paths fail, the uncompressed page is +sent directly. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-qpl.c | 424 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 420 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +index 6791a204d5..9265098ee7 100644 +--- a/migration/multifd-qpl.c ++++ b/migration/multifd-qpl.c +@@ -13,9 +13,14 @@ + #include "qemu/osdep.h" + #include "qemu/module.h" + #include "qapi/error.h" ++#include "qapi/qapi-types-migration.h" ++#include "exec/ramblock.h" + #include "multifd.h" + #include "qpl/qpl.h" + ++/* Maximum number of retries to resubmit a job if IAA work queues are full */ ++#define MAX_SUBMIT_RETRY_NUM (3) ++ + typedef struct { + /* the QPL hardware path job */ + qpl_job *job; +@@ -260,6 +265,225 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp) + p->iov = NULL; + } + ++/** ++ * multifd_qpl_prepare_job: prepare the job ++ * ++ * Set the QPL job parameters and properties. ++ * ++ * @job: pointer to the qpl_job structure ++ * @is_compression: indicates compression and decompression ++ * @input: pointer to the input data buffer ++ * @input_len: the length of the input data ++ * @output: pointer to the output data buffer ++ * @output_len: the length of the output data ++ */ ++static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression, ++ uint8_t *input, uint32_t input_len, ++ uint8_t *output, uint32_t output_len) ++{ ++ job->op = is_compression ? qpl_op_compress : qpl_op_decompress; ++ job->next_in_ptr = input; ++ job->next_out_ptr = output; ++ job->available_in = input_len; ++ job->available_out = output_len; ++ job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; ++ /* only supports compression level 1 */ ++ job->level = 1; ++} ++ ++/** ++ * multifd_qpl_prepare_comp_job: prepare the compression job ++ * ++ * Set the compression job parameters and properties. ++ * ++ * @job: pointer to the qpl_job structure ++ * @input: pointer to the input data buffer ++ * @output: pointer to the output data buffer ++ * @size: the page size ++ */ ++static void multifd_qpl_prepare_comp_job(qpl_job *job, uint8_t *input, ++ uint8_t *output, uint32_t size) ++{ ++ /* ++ * Set output length to less than the page size to force the job to ++ * fail in case it compresses to a larger size. We'll send that page ++ * without compression and skip the decompression operation on the ++ * destination. ++ */ ++ multifd_qpl_prepare_job(job, true, input, size, output, size - 1); ++} ++ ++/** ++ * multifd_qpl_prepare_decomp_job: prepare the decompression job ++ * ++ * Set the decompression job parameters and properties. ++ * ++ * @job: pointer to the qpl_job structure ++ * @input: pointer to the input data buffer ++ * @len: the length of the input data ++ * @output: pointer to the output data buffer ++ * @size: the page size ++ */ ++static void multifd_qpl_prepare_decomp_job(qpl_job *job, uint8_t *input, ++ uint32_t len, uint8_t *output, ++ uint32_t size) ++{ ++ multifd_qpl_prepare_job(job, false, input, len, output, size); ++} ++ ++/** ++ * multifd_qpl_fill_iov: fill in the IOV ++ * ++ * Fill in the QPL packet IOV ++ * ++ * @p: Params for the channel being used ++ * @data: pointer to the IOV data ++ * @len: The length of the IOV data ++ */ ++static void multifd_qpl_fill_iov(MultiFDSendParams *p, uint8_t *data, ++ uint32_t len) ++{ ++ p->iov[p->iovs_num].iov_base = data; ++ p->iov[p->iovs_num].iov_len = len; ++ p->iovs_num++; ++ p->next_packet_size += len; ++} ++ ++/** ++ * multifd_qpl_fill_packet: fill the compressed page into the QPL packet ++ * ++ * Fill the compressed page length and IOV into the QPL packet ++ * ++ * @idx: The index of the compressed length array ++ * @p: Params for the channel being used ++ * @data: pointer to the compressed page buffer ++ * @len: The length of the compressed page ++ */ ++static void multifd_qpl_fill_packet(uint32_t idx, MultiFDSendParams *p, ++ uint8_t *data, uint32_t len) ++{ ++ QplData *qpl = p->compress_data; ++ ++ qpl->zlen[idx] = cpu_to_be32(len); ++ multifd_qpl_fill_iov(p, data, len); ++} ++ ++/** ++ * multifd_qpl_submit_job: submit a job to the hardware ++ * ++ * Submit a QPL hardware job to the IAA device ++ * ++ * Returns true if the job is submitted successfully, otherwise false. ++ * ++ * @job: pointer to the qpl_job structure ++ */ ++static bool multifd_qpl_submit_job(qpl_job *job) ++{ ++ qpl_status status; ++ uint32_t num = 0; ++ ++retry: ++ status = qpl_submit_job(job); ++ if (status == QPL_STS_QUEUES_ARE_BUSY_ERR) { ++ if (num < MAX_SUBMIT_RETRY_NUM) { ++ num++; ++ goto retry; ++ } ++ } ++ return (status == QPL_STS_OK); ++} ++ ++/** ++ * multifd_qpl_compress_pages_slow_path: compress pages using slow path ++ * ++ * Compress the pages using software. If compression fails, the uncompressed ++ * page will be sent. ++ * ++ * @p: Params for the channel being used ++ */ ++static void multifd_qpl_compress_pages_slow_path(MultiFDSendParams *p) ++{ ++ QplData *qpl = p->compress_data; ++ uint32_t size = p->page_size; ++ qpl_job *job = qpl->sw_job; ++ uint8_t *zbuf = qpl->zbuf; ++ uint8_t *buf; ++ ++ for (int i = 0; i < p->pages->normal_num; i++) { ++ buf = p->pages->block->host + p->pages->offset[i]; ++ multifd_qpl_prepare_comp_job(job, buf, zbuf, size); ++ if (qpl_execute_job(job) == QPL_STS_OK) { ++ multifd_qpl_fill_packet(i, p, zbuf, job->total_out); ++ } else { ++ /* send the uncompressed page */ ++ multifd_qpl_fill_packet(i, p, buf, size); ++ } ++ zbuf += size; ++ } ++} ++ ++/** ++ * multifd_qpl_compress_pages: compress pages ++ * ++ * Submit the pages to the IAA hardware for compression. If hardware ++ * compression fails, it falls back to software compression. If software ++ * compression also fails, the uncompressed page is sent. ++ * ++ * @p: Params for the channel being used ++ */ ++static void multifd_qpl_compress_pages(MultiFDSendParams *p) ++{ ++ QplData *qpl = p->compress_data; ++ MultiFDPages_t *pages = p->pages; ++ uint32_t size = p->page_size; ++ QplHwJob *hw_job; ++ uint8_t *buf; ++ uint8_t *zbuf; ++ ++ for (int i = 0; i < pages->normal_num; i++) { ++ buf = pages->block->host + pages->offset[i]; ++ zbuf = qpl->zbuf + (size * i); ++ hw_job = &qpl->hw_jobs[i]; ++ multifd_qpl_prepare_comp_job(hw_job->job, buf, zbuf, size); ++ if (multifd_qpl_submit_job(hw_job->job)) { ++ hw_job->fallback_sw_path = false; ++ } else { ++ /* ++ * The IAA work queue is full, any immediate subsequent job ++ * submission is likely to fail, sending the page via the QPL ++ * software path at this point gives us a better chance of ++ * finding the queue open for the next pages. ++ */ ++ hw_job->fallback_sw_path = true; ++ multifd_qpl_prepare_comp_job(qpl->sw_job, buf, zbuf, size); ++ if (qpl_execute_job(qpl->sw_job) == QPL_STS_OK) { ++ hw_job->sw_output = zbuf; ++ hw_job->sw_output_len = qpl->sw_job->total_out; ++ } else { ++ hw_job->sw_output = buf; ++ hw_job->sw_output_len = size; ++ } ++ } ++ } ++ ++ for (int i = 0; i < pages->normal_num; i++) { ++ buf = pages->block->host + pages->offset[i]; ++ zbuf = qpl->zbuf + (size * i); ++ hw_job = &qpl->hw_jobs[i]; ++ if (hw_job->fallback_sw_path) { ++ multifd_qpl_fill_packet(i, p, hw_job->sw_output, ++ hw_job->sw_output_len); ++ continue; ++ } ++ if (qpl_wait_job(hw_job->job) == QPL_STS_OK) { ++ multifd_qpl_fill_packet(i, p, zbuf, hw_job->job->total_out); ++ } else { ++ /* send the uncompressed page */ ++ multifd_qpl_fill_packet(i, p, buf, size); ++ } ++ } ++} ++ + /** + * multifd_qpl_send_prepare: prepare data to be able to send + * +@@ -273,8 +497,26 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int multifd_qpl_send_prepare(MultiFDSendParams *p, Error **errp) + { +- /* Implement in next patch */ +- return -1; ++ QplData *qpl = p->compress_data; ++ uint32_t len = 0; ++ ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } ++ ++ /* The first IOV is used to store the compressed page lengths */ ++ len = p->pages->normal_num * sizeof(uint32_t); ++ multifd_qpl_fill_iov(p, (uint8_t *) qpl->zlen, len); ++ if (qpl->hw_avail) { ++ multifd_qpl_compress_pages(p); ++ } else { ++ multifd_qpl_compress_pages_slow_path(p); ++ } ++ ++out: ++ p->flags |= MULTIFD_FLAG_QPL; ++ multifd_send_fill_packet(p); ++ return 0; + } + + /** +@@ -312,6 +554,140 @@ static void multifd_qpl_recv_cleanup(MultiFDRecvParams *p) + p->compress_data = NULL; + } + ++/** ++ * multifd_qpl_process_and_check_job: process and check a QPL job ++ * ++ * Process the job and check whether the job output length is the ++ * same as the specified length ++ * ++ * Returns true if the job execution succeeded and the output length ++ * is equal to the specified length, otherwise false. ++ * ++ * @job: pointer to the qpl_job structure ++ * @is_hardware: indicates whether the job is a hardware job ++ * @len: Specified output length ++ * @errp: pointer to an error ++ */ ++static bool multifd_qpl_process_and_check_job(qpl_job *job, bool is_hardware, ++ uint32_t len, Error **errp) ++{ ++ qpl_status status; ++ ++ status = (is_hardware ? qpl_wait_job(job) : qpl_execute_job(job)); ++ if (status != QPL_STS_OK) { ++ error_setg(errp, "qpl job failed with error %d", status); ++ return false; ++ } ++ if (job->total_out != len) { ++ error_setg(errp, "qpl decompressed len %u, expected len %u", ++ job->total_out, len); ++ return false; ++ } ++ return true; ++} ++ ++/** ++ * multifd_qpl_decompress_pages_slow_path: decompress pages using slow path ++ * ++ * Decompress the pages using software ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_decompress_pages_slow_path(MultiFDRecvParams *p, ++ Error **errp) ++{ ++ QplData *qpl = p->compress_data; ++ uint32_t size = p->page_size; ++ qpl_job *job = qpl->sw_job; ++ uint8_t *zbuf = qpl->zbuf; ++ uint8_t *addr; ++ uint32_t len; ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ len = qpl->zlen[i]; ++ addr = p->host + p->normal[i]; ++ /* the page is uncompressed, load it */ ++ if (len == size) { ++ memcpy(addr, zbuf, size); ++ zbuf += size; ++ continue; ++ } ++ multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); ++ if (!multifd_qpl_process_and_check_job(job, false, size, errp)) { ++ return -1; ++ } ++ zbuf += len; ++ } ++ return 0; ++} ++ ++/** ++ * multifd_qpl_decompress_pages: decompress pages ++ * ++ * Decompress the pages using the IAA hardware. If hardware ++ * decompression fails, it falls back to software decompression. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_decompress_pages(MultiFDRecvParams *p, Error **errp) ++{ ++ QplData *qpl = p->compress_data; ++ uint32_t size = p->page_size; ++ uint8_t *zbuf = qpl->zbuf; ++ uint8_t *addr; ++ uint32_t len; ++ qpl_job *job; ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ addr = p->host + p->normal[i]; ++ len = qpl->zlen[i]; ++ /* the page is uncompressed if received length equals the page size */ ++ if (len == size) { ++ memcpy(addr, zbuf, size); ++ zbuf += size; ++ continue; ++ } ++ ++ job = qpl->hw_jobs[i].job; ++ multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); ++ if (multifd_qpl_submit_job(job)) { ++ qpl->hw_jobs[i].fallback_sw_path = false; ++ } else { ++ /* ++ * The IAA work queue is full, any immediate subsequent job ++ * submission is likely to fail, sending the page via the QPL ++ * software path at this point gives us a better chance of ++ * finding the queue open for the next pages. ++ */ ++ qpl->hw_jobs[i].fallback_sw_path = true; ++ job = qpl->sw_job; ++ multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); ++ if (!multifd_qpl_process_and_check_job(job, false, size, errp)) { ++ return -1; ++ } ++ } ++ zbuf += len; ++ } ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ /* ignore pages that have already been processed */ ++ if (qpl->zlen[i] == size || qpl->hw_jobs[i].fallback_sw_path) { ++ continue; ++ } ++ ++ job = qpl->hw_jobs[i].job; ++ if (!multifd_qpl_process_and_check_job(job, true, size, errp)) { ++ return -1; ++ } ++ } ++ return 0; ++} + /** + * multifd_qpl_recv: read the data from the channel into actual pages + * +@@ -325,8 +701,48 @@ static void multifd_qpl_recv_cleanup(MultiFDRecvParams *p) + */ + static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp) + { +- /* Implement in next patch */ +- return -1; ++ QplData *qpl = p->compress_data; ++ uint32_t in_size = p->next_packet_size; ++ uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ uint32_t len = 0; ++ uint32_t zbuf_len = 0; ++ int ret; ++ ++ if (flags != MULTIFD_FLAG_QPL) { ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", ++ p->id, flags, MULTIFD_FLAG_QPL); ++ return -1; ++ } ++ multifd_recv_zero_page_process(p); ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ ++ /* read compressed page lengths */ ++ len = p->normal_num * sizeof(uint32_t); ++ assert(len < in_size); ++ ret = qio_channel_read_all(p->c, (void *) qpl->zlen, len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ for (int i = 0; i < p->normal_num; i++) { ++ qpl->zlen[i] = be32_to_cpu(qpl->zlen[i]); ++ assert(qpl->zlen[i] <= p->page_size); ++ zbuf_len += qpl->zlen[i]; ++ } ++ ++ /* read compressed pages */ ++ assert(in_size == len + zbuf_len); ++ ret = qio_channel_read_all(p->c, (void *) qpl->zbuf, zbuf_len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ if (qpl->hw_avail) { ++ return multifd_qpl_decompress_pages(p, errp); ++ } ++ return multifd_qpl_decompress_pages_slow_path(p, errp); + } + + static MultiFDMethods multifd_qpl_ops = { +-- +2.33.0 + diff --git a/migration-multifd-include-ram.h-in-multifd.h.patch b/migration-multifd-include-ram.h-in-multifd.h.patch new file mode 100644 index 0000000000000000000000000000000000000000..55a131ca8cedc7d544c00941aeecf85169ee8d7c --- /dev/null +++ b/migration-multifd-include-ram.h-in-multifd.h.patch @@ -0,0 +1,31 @@ +From 8b069af63b1dc70ffdcc2662289164b3fd6e29f3 Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Wed, 2 Apr 2025 18:09:21 +0800 +Subject: [76/99] migration/multifd: include ram.h in multifd.h + +Header file ram.h was included by multifd.h when mapped-ram was +introduced in upstream code. This inclusion is needed by qpl when +multifd-qpl.c includes multifd.h. Add this inclusion here since +we don't backport mapped-ram + +Signed-off-by: Jason Zeng +--- + migration/multifd.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd.h b/migration/multifd.h +index 11f05dd6d5..41965df7a9 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -13,6 +13,8 @@ + #ifndef QEMU_MIGRATION_MULTIFD_H + #define QEMU_MIGRATION_MULTIFD_H + ++#include "ram.h" ++ + bool multifd_send_setup(void); + void multifd_send_shutdown(void); + int multifd_recv_setup(Error **errp); +-- +2.33.0 + diff --git a/migration-multifd-multifd_send_kick_main.patch b/migration-multifd-multifd_send_kick_main.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f033c10f53a54878a911f7a72e92bbf34970dbf --- /dev/null +++ b/migration-multifd-multifd_send_kick_main.patch @@ -0,0 +1,76 @@ +From fa8d23b539d417e69cc0a02f13ca66ef2b506d8e Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:36 +0800 +Subject: [26/99] migration/multifd: multifd_send_kick_main() + +commit 48c0f5d56fd2ff0a0cda23301637b742c690f59a upstream. + +When a multifd sender thread hit errors, it always needs to kick the main +thread by kicking all the semaphores that it can be waiting upon. + +Provide a helper for it and deduplicate the code. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-3-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 07e7e78029..d2da6178b0 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -373,6 +373,18 @@ struct { + MultiFDMethods *ops; + } *multifd_send_state; + ++/* ++ * The migration thread can wait on either of the two semaphores. This ++ * function can be used to kick the main thread out of waiting on either of ++ * them. Should mostly only be called when something wrong happened with ++ * the current multifd send thread. ++ */ ++static void multifd_send_kick_main(MultiFDSendParams *p) ++{ ++ qemu_sem_post(&p->sem_sync); ++ qemu_sem_post(&multifd_send_state->channels_ready); ++} ++ + /* + * How we use multifd_send_state->pages and channel->pages? + * +@@ -743,8 +755,7 @@ out: + assert(local_err); + trace_multifd_send_error(p->id); + multifd_send_terminate_threads(local_err); +- qemu_sem_post(&p->sem_sync); +- qemu_sem_post(&multifd_send_state->channels_ready); ++ multifd_send_kick_main(p); + error_free(local_err); + } + +@@ -785,8 +796,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + * is not created, and then tell who pay attention to me. + */ + p->quit = true; +- qemu_sem_post(&multifd_send_state->channels_ready); +- qemu_sem_post(&p->sem_sync); ++ multifd_send_kick_main(p); + error_free(err); + } + +@@ -856,8 +866,7 @@ static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, + { + migrate_set_error(migrate_get_current(), err); + /* Error happen, we need to tell who pay attention to me */ +- qemu_sem_post(&multifd_send_state->channels_ready); +- qemu_sem_post(&p->sem_sync); ++ multifd_send_kick_main(p); + /* + * Although multifd_send_thread is not created, but main migration + * thread need to judge whether it is running, so we need to mark +-- +2.33.0 + diff --git a/migration-multifd-multifd_send_prepare_header.patch b/migration-multifd-multifd_send_prepare_header.patch new file mode 100644 index 0000000000000000000000000000000000000000..9e492d0a9e759cc0423f898c6812e26c40575c95 --- /dev/null +++ b/migration-multifd-multifd_send_prepare_header.patch @@ -0,0 +1,82 @@ +From fb749030a3151fff95a84f478ec5bcc1b5e0d07c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:46 +0800 +Subject: [36/99] migration/multifd: multifd_send_prepare_header() + +commit 452b205702335ddd45554aaf0eb37baf50bdfa00 upstream. + +Introduce a helper multifd_send_prepare_header() to setup the header packet +for multifd sender. + +It's fine to setup the IOV[0] _before_ send_prepare() because the packet +buffer is already ready, even if the content is to be filled in. + +With this helper, we can already slightly clean up the zero copy path. + +Note that I explicitly put it into multifd.h, because I want it inlined +directly into multifd*.c where necessary later. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-13-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 16 ++++++++-------- + migration/multifd.h | 8 ++++++++ + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f545faaa52..a42e152268 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -716,10 +716,14 @@ static void *multifd_send_thread(void *opaque) + if (qatomic_read(&p->pending_job)) { + MultiFDPages_t *pages = p->pages; + +- if (use_zero_copy_send) { +- p->iovs_num = 0; +- } else { +- p->iovs_num = 1; ++ p->iovs_num = 0; ++ ++ if (!use_zero_copy_send) { ++ /* ++ * Only !zerocopy needs the header in IOV; zerocopy will ++ * send it separately. ++ */ ++ multifd_send_prepare_header(p); + } + + assert(pages->num); +@@ -739,10 +743,6 @@ static void *multifd_send_thread(void *opaque) + if (ret != 0) { + break; + } +- } else { +- /* Send header using the same writev call */ +- p->iov[0].iov_len = p->packet_len; +- p->iov[0].iov_base = p->packet; + } + + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, +diff --git a/migration/multifd.h b/migration/multifd.h +index 2e4ad0dc56..4ec005f53f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -209,5 +209,13 @@ typedef struct { + + void multifd_register_ops(int method, MultiFDMethods *ops); + ++static inline void multifd_send_prepare_header(MultiFDSendParams *p) ++{ ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ p->iovs_num++; ++} ++ ++ + #endif + +-- +2.33.0 + diff --git a/migration-multifd-not-use-multifd-during-postcopy.patch b/migration-multifd-not-use-multifd-during-postcopy.patch deleted file mode 100644 index 6df61bfdd8d637854acea0e13e787db04dbdeca2..0000000000000000000000000000000000000000 --- a/migration-multifd-not-use-multifd-during-postcopy.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 7331554bd6ab230404b20d612aed20a95c20eba6 Mon Sep 17 00:00:00 2001 -From: Wei Yang -Date: Sat, 26 Oct 2019 07:20:00 +0800 -Subject: [PATCH 4/8] migration/multifd: not use multifd during postcopy - -We don't support multifd during postcopy, but user still could enable -both multifd and postcopy. This leads to migration failure. - -Skip multifd during postcopy. - -Signed-off-by: Wei Yang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela ---- - migration/ram.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index c2eb1ed..aace3a5 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2571,10 +2571,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, - } - - /* -- * do not use multifd for compression as the first page in the new -- * block should be posted out before sending the compressed page -+ * Do not use multifd for: -+ * 1. Compression as the first page in the new block should be posted out -+ * before sending the compressed page -+ * 2. In postcopy as one whole host page should be placed - */ -- if (!save_page_use_compression(rs) && migrate_use_multifd()) { -+ if (!save_page_use_compression(rs) && migrate_use_multifd() -+ && !migration_in_postcopy()) { - return ram_save_multifd_page(rs, block, offset); - } - --- -1.8.3.1 - diff --git a/migration-multifd-put-IOV-initialization-into-compre.patch b/migration-multifd-put-IOV-initialization-into-compre.patch new file mode 100644 index 0000000000000000000000000000000000000000..652dc53b62926db72c0901d19fea2dd8f4a4ca76 --- /dev/null +++ b/migration-multifd-put-IOV-initialization-into-compre.patch @@ -0,0 +1,168 @@ +From 4e0ebb941ba15c31e7d19d44189bf47fee3181c9 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:05 +0800 +Subject: [73/99] migration/multifd: put IOV initialization into compression + method + +commit d9d3e4f243214f742425d9d8360f0794bb05c999 upstream. + +Different compression methods may require different numbers of IOVs. +Based on streaming compression of zlib and zstd, all pages will be +compressed to a data block, so two IOVs are needed for packet header +and compressed data block. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 7 +++++++ + migration/multifd-zstd.c | 8 +++++++- + migration/multifd.c | 22 ++++++++++++---------- + 3 files changed, 26 insertions(+), 11 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index b210725f6e..2df4983780 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -70,6 +70,10 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + goto err_free_zbuff; + } + p->compress_data = z; ++ ++ /* Needs 2 IOVs, one for packet header and one for compressed data */ ++ p->iov = g_new0(struct iovec, 2); ++ + return 0; + + err_free_zbuff: +@@ -101,6 +105,9 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + z->buf = NULL; + g_free(p->compress_data); + p->compress_data = NULL; ++ ++ g_free(p->iov); ++ p->iov = NULL; + } + + /** +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 256858df0a..ca17b7e310 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -52,7 +52,6 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + struct zstd_data *z = g_new0(struct zstd_data, 1); + int res; + +- p->compress_data = z; + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); +@@ -77,6 +76,10 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } ++ p->compress_data = z; ++ ++ /* Needs 2 IOVs, one for packet header and one for compressed data */ ++ p->iov = g_new0(struct iovec, 2); + return 0; + } + +@@ -98,6 +101,9 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + z->zbuff = NULL; + g_free(p->compress_data); + p->compress_data = NULL; ++ ++ g_free(p->iov); ++ p->iov = NULL; + } + + /** +diff --git a/migration/multifd.c b/migration/multifd.c +index 4394952fbb..0fcecc3759 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -113,6 +113,13 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; + } + ++ if (multifd_use_packets()) { ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, p->page_count + 1); ++ } else { ++ p->iov = g_new0(struct iovec, p->page_count); ++ } ++ + return 0; + } + +@@ -126,6 +133,8 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + */ + static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + { ++ g_free(p->iov); ++ p->iov = NULL; + return; + } + +@@ -202,6 +211,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) + { ++ p->iov = g_new0(struct iovec, p->page_count); + return 0; + } + +@@ -214,6 +224,8 @@ static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) + */ + static void nocomp_recv_cleanup(MultiFDRecvParams *p) + { ++ g_free(p->iov); ++ p->iov = NULL; + } + + /** +@@ -734,8 +746,6 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; + multifd_send_state->ops->send_cleanup(p, errp); + + return *errp == NULL; +@@ -1120,11 +1130,6 @@ bool multifd_send_setup(void) + p->packet = g_malloc0(p->packet_len); + p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); + p->packet->version = cpu_to_be32(MULTIFD_VERSION); +- +- /* We need one extra place for the packet header */ +- p->iov = g_new0(struct iovec, page_count + 1); +- } else { +- p->iov = g_new0(struct iovec, page_count); + } + p->name = g_strdup_printf("multifdsend_%d", i); + p->page_size = qemu_target_page_size(); +@@ -1224,8 +1229,6 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; + g_free(p->normal); + p->normal = NULL; + g_free(p->zero); +@@ -1403,7 +1406,6 @@ int multifd_recv_setup(Error **errp) + p->packet = g_malloc0(p->packet_len); + } + p->name = g_strdup_printf("multifdrecv_%d", i); +- p->iov = g_new0(struct iovec, page_count); + p->normal = g_new0(ram_addr_t, page_count); + p->zero = g_new0(ram_addr_t, page_count); + p->page_count = page_count; +-- +2.33.0 + diff --git a/migration-multifd-solve-zero-page-causing-multiple-p.patch b/migration-multifd-solve-zero-page-causing-multiple-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..57dab982858b1da85c82ecea0c260a39bc216d35 --- /dev/null +++ b/migration-multifd-solve-zero-page-causing-multiple-p.patch @@ -0,0 +1,132 @@ +From 57c611db900ca4373f3a34d3d87d57bb4f0bba00 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 1 Apr 2024 23:41:10 +0800 +Subject: [71/99] migration/multifd: solve zero page causing multiple page + faults + +commit 5ef7e26bdb7eda10d6d5e1b77121be9945e5e550 upstream. + +Implemented recvbitmap tracking of received pages in multifd. + +If the zero page appears for the first time in the recvbitmap, this +page is not checked and set. + +If the zero page has already appeared in the recvbitmap, there is no +need to check the data but directly set the data to 0, because it is +unlikely that the zero page will be migrated multiple times. + +Signed-off-by: Yuan Liu +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240401154110.2028453-2-yuan1.liu@intel.com +[peterx: touch up the comment, as the bitmap is used outside postcopy now] +Signed-off-by: Peter Xu + + Conflicts: + include/exec/ramblock.h +[jz: resolve context conflict due to mapped-ram which was not backported] +Signed-off-by: Jason Zeng +--- + include/exec/ramblock.h | 2 +- + migration/multifd-zero-page.c | 4 +++- + migration/multifd-zlib.c | 1 + + migration/multifd-zstd.c | 1 + + migration/multifd.c | 1 + + migration/ram.c | 4 ++++ + migration/ram.h | 1 + + 7 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h +index 69c6a53902..8f9579ed70 100644 +--- a/include/exec/ramblock.h ++++ b/include/exec/ramblock.h +@@ -44,7 +44,7 @@ struct RAMBlock { + size_t page_size; + /* dirty bitmap used during migration */ + unsigned long *bmap; +- /* bitmap of already received pages in postcopy */ ++ /* Bitmap of already received pages. Only used on destination side. */ + unsigned long *receivedmap; + + /* +diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c +index 1ba38be636..e1b8370f88 100644 +--- a/migration/multifd-zero-page.c ++++ b/migration/multifd-zero-page.c +@@ -80,8 +80,10 @@ void multifd_recv_zero_page_process(MultiFDRecvParams *p) + { + for (int i = 0; i < p->zero_num; i++) { + void *page = p->host + p->zero[i]; +- if (!buffer_is_zero(page, p->page_size)) { ++ if (ramblock_recv_bitmap_test_byte_offset(p->block, p->zero[i])) { + memset(page, 0, p->page_size); ++ } else { ++ ramblock_recv_bitmap_set_offset(p->block, p->zero[i]); + } + } + } +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 83c0374380..b210725f6e 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -284,6 +284,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + if (i == p->normal_num - 1) { + flush = Z_SYNC_FLUSH; + } +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 02112255ad..256858df0a 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -278,6 +278,7 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) + z->in.pos = 0; + + for (i = 0; i < p->normal_num; i++) { ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + z->out.dst = p->host + p->normal[i]; + z->out.size = p->page_size; + z->out.pos = 0; +diff --git a/migration/multifd.c b/migration/multifd.c +index 6c01179858..4394952fbb 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -251,6 +251,7 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + for (int i = 0; i < p->normal_num; i++) { + p->iov[i].iov_base = p->host + p->normal[i]; + p->iov[i].iov_len = p->page_size; ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + } + return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); + } +diff --git a/migration/ram.c b/migration/ram.c +index fe2e4c6164..6acf518a34 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -275,6 +275,10 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, + nr); + } + ++void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset) ++{ ++ set_bit_atomic(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap); ++} + #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL) + + /* +diff --git a/migration/ram.h b/migration/ram.h +index 9b937a446b..cd263df026 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -69,6 +69,7 @@ int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); + bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); + void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); + void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); ++void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset); + int64_t ramblock_recv_bitmap_send(QEMUFile *file, + const char *block_name); + bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); +-- +2.33.0 + diff --git a/migration-ram-Accelerate-the-loading-of-CSV-guest-s-.patch b/migration-ram-Accelerate-the-loading-of-CSV-guest-s-.patch new file mode 100644 index 0000000000000000000000000000000000000000..c915c3e3a0b946c92f5ed88bb42d2d7686c06982 --- /dev/null +++ b/migration-ram-Accelerate-the-loading-of-CSV-guest-s-.patch @@ -0,0 +1,37 @@ +From eac3cab8dcd005b33365b5196801268d696a11bc Mon Sep 17 00:00:00 2001 +From: fangbaoshun +Date: Mon, 2 Aug 2021 14:49:45 +0800 +Subject: [PATCH] migration/ram: Accelerate the loading of CSV guest's + encrypted pages + +When memory encryption is enabled, the guest memory will be encrypted with +the guest specific key. The patch introduces an accelerate solution which +queued the pages into list and load them togather by COMMAND_BATCH. + +Signed-off-by: hanliyang +--- + migration/ram.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 7747f5af3a..790c0413c1 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1297,6 +1297,14 @@ static int load_encrypted_data(QEMUFile *f, uint8_t *ptr) + return ops->load_incoming_page(f, ptr); + } else if (flag == RAM_SAVE_SHARED_REGIONS_LIST) { + return ops->load_incoming_shared_regions_list(f); ++ } else if (flag == RAM_SAVE_ENCRYPTED_PAGE_BATCH) { ++ return ops->queue_incoming_page(f, ptr); ++ } else if (flag == RAM_SAVE_ENCRYPTED_PAGE_BATCH_END) { ++ if (ops->queue_incoming_page(f, ptr)) { ++ error_report("Failed to queue incoming data"); ++ return -EINVAL; ++ } ++ return ops->load_queued_incoming_pages(f); + } else { + error_report("unknown encrypted flag %x", flag); + return 1; +-- +2.41.0.windows.1 + diff --git a/migration-ram-Accelerate-the-transmission-of-CSV-gue.patch b/migration-ram-Accelerate-the-transmission-of-CSV-gue.patch new file mode 100644 index 0000000000000000000000000000000000000000..75773d3edd19dc1da9f3a54601a04a89ce6054ae --- /dev/null +++ b/migration-ram-Accelerate-the-transmission-of-CSV-gue.patch @@ -0,0 +1,208 @@ +From e2b3943bf75d34f5e913e05fbdf8116179812866 Mon Sep 17 00:00:00 2001 +From: fangbaoshun +Date: Mon, 2 Aug 2021 14:35:51 +0800 +Subject: [PATCH] migration/ram: Accelerate the transmission of CSV guest's + encrypted pages + +When memory encryption is enabled, the guest memory will be encrypted with +the guest specific key. The patch introduces an accelerate solution which +queued the pages into list and send them togather by COMMAND_BATCH. + +Signed-off-by: hanliyang +--- + configs/devices/i386-softmmu/default.mak | 1 + + hw/i386/Kconfig | 5 + + migration/ram.c | 119 +++++++++++++++++++++++ + target/i386/csv.h | 2 + + 4 files changed, 127 insertions(+) + +diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak +index db83ffcab9..e948e54e4e 100644 +--- a/configs/devices/i386-softmmu/default.mak ++++ b/configs/devices/i386-softmmu/default.mak +@@ -24,6 +24,7 @@ + #CONFIG_VTD=n + #CONFIG_SGX=n + #CONFIG_CSV=n ++#CONFIG_HYGON_CSV_MIG_ACCEL=n + + # Boards: + # +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index 08f3ae43f8..682e324f1c 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -12,8 +12,13 @@ config SGX + + config CSV + bool ++ select HYGON_CSV_MIG_ACCEL + depends on SEV + ++config HYGON_CSV_MIG_ACCEL ++ bool ++ depends on CSV ++ + config PC + bool + imply APPLESMC +diff --git a/migration/ram.c b/migration/ram.c +index 1abe8476f7..7747f5af3a 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -67,6 +67,7 @@ + + /* Defines RAM_SAVE_ENCRYPTED_PAGE and RAM_SAVE_SHARED_REGION_LIST */ + #include "target/i386/sev.h" ++#include "target/i386/csv.h" + #include "sysemu/kvm.h" + + #include "hw/boards.h" /* for machine_dump_guest_core() */ +@@ -2336,6 +2337,112 @@ out: + return ret; + } + ++#ifdef CONFIG_HYGON_CSV_MIG_ACCEL ++/** ++ * ram_save_encrypted_pages_in_batch: send the given encrypted pages to ++ * the stream. ++ * ++ * Sending pages of 4K size in batch. The saving stops at the end of ++ * the block. ++ * ++ * The caller must be with ram_state.bitmap_mutex held to call this ++ * function. ++ * ++ * Returns the number of pages written or negative on error ++ * ++ * @rs: current RAM state ++ * @pss: data about the page we want to send ++ */ ++static int ++ram_save_encrypted_pages_in_batch(RAMState *rs, PageSearchStatus *pss) ++{ ++ bool page_dirty; ++ int ret; ++ int tmppages, pages = 0; ++ uint8_t *p; ++ uint32_t host_len = 0; ++ uint64_t bytes_xmit = 0; ++ ram_addr_t offset, start_offset = 0; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *)object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ ++ do { ++ page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page); ++ ++ /* Check the pages is dirty and if it is send it */ ++ if (page_dirty) { ++ /* Process the unencrypted page */ ++ if (!encrypted_test_list(rs, pss->block, pss->page)) { ++ tmppages = migration_ops->ram_save_target_page(rs, pss); ++ } else { ++ /* Caculate the offset and host virtual address of the page */ ++ offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; ++ p = pss->block->host + offset; ++ ++ /* Record the offset and host virtual address of the first ++ * page in this loop which will be used below. ++ */ ++ if (host_len == 0) { ++ start_offset = offset | RAM_SAVE_FLAG_ENCRYPTED_DATA; ++ } else { ++ offset |= (RAM_SAVE_FLAG_ENCRYPTED_DATA | RAM_SAVE_FLAG_CONTINUE); ++ } ++ ++ /* Queue the outgoing page if the page is not zero page. ++ * If the queued pages are up to the outgoing page window size, ++ * process them below. ++ */ ++ if (ops->queue_outgoing_page(p, TARGET_PAGE_SIZE, offset)) ++ return -1; ++ ++ tmppages = 1; ++ host_len += TARGET_PAGE_SIZE; ++ ++ stat64_add(&mig_stats.normal_pages, 1); ++ } ++ } else { ++ tmppages = 0; ++ } ++ ++ if (tmppages >= 0) { ++ pages += tmppages; ++ } else { ++ return tmppages; ++ } ++ ++ pss_find_next_dirty(pss); ++ } while (offset_in_ramblock(pss->block, ++ ((ram_addr_t)pss->page) << TARGET_PAGE_BITS) && ++ host_len < CSV_OUTGOING_PAGE_WINDOW_SIZE); ++ ++ /* Check if there are any queued pages */ ++ if (host_len != 0) { ++ ram_transferred_add(save_page_header(pss, pss->pss_channel, ++ pss->block, start_offset)); ++ /* if only one page queued, flag is BATCH_END, else flag is BATCH */ ++ if (host_len > TARGET_PAGE_SIZE) ++ qemu_put_be32(pss->pss_channel, RAM_SAVE_ENCRYPTED_PAGE_BATCH); ++ else ++ qemu_put_be32(pss->pss_channel, RAM_SAVE_ENCRYPTED_PAGE_BATCH_END); ++ ram_transferred_add(4); ++ /* Process the queued pages in batch */ ++ ret = ops->save_queued_outgoing_pages(pss->pss_channel, &bytes_xmit); ++ if (ret) { ++ return -1; ++ } ++ ram_transferred_add(bytes_xmit); ++ } ++ ++ /* The offset we leave with is the last one we looked at */ ++ pss->page--; ++ ++ return pages; ++} ++#endif ++ + /** + * ram_save_host_page: save a whole host page + * +@@ -2371,6 +2478,18 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) + return 0; + } + ++#ifdef CONFIG_HYGON_CSV_MIG_ACCEL ++ /* ++ * If command_batch function is enabled and memory encryption is enabled ++ * then use command batch APIs to accelerate the sending process ++ * to write the outgoing buffer to the wire. The encryption APIs ++ * will re-encrypt the data with transport key so that data is prototect ++ * on the wire. ++ */ ++ if (memcrypt_enabled() && is_hygon_cpu() && !migration_in_postcopy()) ++ return ram_save_encrypted_pages_in_batch(rs, pss); ++#endif ++ + /* Update host page boundary information */ + pss_host_page_prepare(pss); + +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 977f08b982..74a54f9b9c 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -44,6 +44,8 @@ static bool __attribute__((unused)) is_hygon_cpu(void) + + #endif + ++#define CSV_OUTGOING_PAGE_WINDOW_SIZE (4094 * TARGET_PAGE_SIZE) ++ + typedef struct CsvBatchCmdList CsvBatchCmdList; + typedef void (*CsvDestroyCmdNodeFn) (void *data); + +-- +2.41.0.windows.1 + diff --git a/migration-ram-Do-error_free-after-migrate_set_error-.patch b/migration-ram-Do-error_free-after-migrate_set_error-.patch deleted file mode 100644 index 0039f43d86d5506bfca2953904a215d3f178526a..0000000000000000000000000000000000000000 --- a/migration-ram-Do-error_free-after-migrate_set_error-.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 05d1fbd2390d441e5acb606dba3d308d506a8eb1 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Tue, 5 May 2020 11:44:20 +0800 -Subject: [PATCH 1/3] migration/ram: Do error_free after migrate_set_error to - avoid memleaks - -If local_err is not NULL, it use error_copy to set migrate error in -multifd_send_terminate_threads. Thus, we should free it. - -Similarly, fix another leak in multifd_recv_thread. - -The leak stack: -Direct leak of 96 byte(s) in 2 object(s) allocated from: - #0 0xfffdd97fe938 in __interceptor_calloc (/lib64/libasan.so.4+0xee938) - #1 0xfffdd85a8bb0 in g_malloc0 (/lib64/libglib-2.0.so.0+0x58bb0) - #2 0xaaadfc6e41c4 in error_setv util/error.c:61 - #3 0xaaadfc6e4880 in error_setg_errno_internal util/error.c:109 - #4 0xaaadfc6192a8 in qio_channel_socket_writev io/channel-socket.c:552 - #5 0xaaadfc614604 in qio_channel_writev_all io/channel.c:171 - #6 0xaaadfc6147ec in qio_channel_write_all io/channel.c:257 - #7 0xaaadfbaec5fc in multifd_send_thread /usr/src/debug/qemu-4.1.0-4_asan.aarch64/migration/ram.c:1145 - #8 0xaaadfc6db768 in qemu_thread_start util/qemu-thread-posix.c:502 - #9 0xfffdd79a88c8 (/lib64/libpthread.so.0+0x88c8) - #10 0xfffdd78e9578 (/lib64/libc.so.6+0xd9578) - -Indirect leak of 104 byte(s) in 2 object(s) allocated from: - #0 0xfffdd97feb40 in realloc (/lib64/libasan.so.4+0xeeb40) - #1 0xfffdd78fa6e0 in __vasprintf_chk (/lib64/libc.so.6+0xea6e0) - #2 0xfffdd85ee710 in g_vasprintf (/lib64/libglib-2.0.so.0+0x9e710) - #3 0xfffdd85c45c4 in g_strdup_vprintf (/lib64/libglib-2.0.so.0+0x745c4) - #4 0xfffdd85c4674 in g_strdup_printf (/lib64/libglib-2.0.so.0+0x74674) - #5 0xaaadfc6e4214 in error_setv util/error.c:65 - #6 0xaaadfc6e4880 in error_setg_errno_internal util/error.c:109 - #7 0xaaadfc6192a8 in qio_channel_socket_writev io/channel-socket.c:552 - #8 0xaaadfc614604 in qio_channel_writev_all io/channel.c:171 - #9 0xaaadfc6147ec in qio_channel_write_all io/channel.c:257 - #10 0xaaadfbaec5fc in multifd_send_thread /usr/src/debug/qemu-4.1.0-4_asan.aarch64/migration/ram.c:1145 - #11 0xaaadfc6db768 in qemu_thread_start util/qemu-thread-posix.c:502 - #12 0xfffdd79a88c8 (/lib64/libpthread.so.0+0x88c8) - #13 0xfffdd78e9578 (/lib64/libc.so.6+0xd9578) - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan ---- - migration/ram.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 1858d66c..6baf1412 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1176,6 +1176,7 @@ static void *multifd_send_thread(void *opaque) - out: - if (local_err) { - multifd_send_terminate_threads(local_err); -+ error_free(local_err); - } - - /* -@@ -1427,6 +1428,7 @@ static void *multifd_recv_thread(void *opaque) - - if (local_err) { - multifd_recv_terminate_threads(local_err); -+ error_free(local_err); - } - qemu_mutex_lock(&p->mutex); - p->running = false; --- -2.23.0 diff --git a/migration-ram-Fix-calculation-of-gfn-correpond-to-a-.patch b/migration-ram-Fix-calculation-of-gfn-correpond-to-a-.patch new file mode 100644 index 0000000000000000000000000000000000000000..811959046f00fd84e2eb1882341b5ecdb94ea290 --- /dev/null +++ b/migration-ram-Fix-calculation-of-gfn-correpond-to-a-.patch @@ -0,0 +1,57 @@ +From ec2518709b8d461c3a165c1722ccd2e585cec161 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Sun, 16 Jan 2022 20:05:02 -0500 +Subject: [PATCH] migration/ram: Fix calculation of gfn correpond to a page in + ramblock + +A RAMBlock contains a host memory region which may consist of many +discontiguous MemoryRegion in AddressSpace of a Guest, so we cannot +get gpa by MemoryRegion.addr. Since KVM memslot records the relationship +between gpa and hva, so we can pass the hva of page in RAMBlock to +kvm_phisical_memory_addr_from_host() to get the expected gpa. + +Signed-off-by: hanliyang +--- + migration/ram.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 66a36736ad..1abe8476f7 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -67,6 +67,7 @@ + + /* Defines RAM_SAVE_ENCRYPTED_PAGE and RAM_SAVE_SHARED_REGION_LIST */ + #include "target/i386/sev.h" ++#include "sysemu/kvm.h" + + #include "hw/boards.h" /* for machine_dump_guest_core() */ + +@@ -2145,6 +2146,8 @@ static bool encrypted_test_list(RAMState *rs, RAMBlock *block, + struct ConfidentialGuestMemoryEncryptionOps *ops = + cgs_class->memory_encryption_ops; + unsigned long gfn; ++ hwaddr paddr = 0; ++ int ret; + + /* ROM devices contains the unencrypted data */ + if (memory_region_is_rom(block->mr)) { +@@ -2167,7 +2170,14 @@ static bool encrypted_test_list(RAMState *rs, RAMBlock *block, + * Translate page in ram_addr_t address space to GPA address + * space using memory region. + */ +- gfn = page + (block->mr->addr >> TARGET_PAGE_BITS); ++ if (kvm_enabled()) { ++ ret = kvm_physical_memory_addr_from_host(kvm_state, ++ block->host + (page << TARGET_PAGE_BITS), &paddr); ++ if (ret == 0) { ++ return false; ++ } ++ } ++ gfn = paddr >> TARGET_PAGE_BITS; + + return ops->is_gfn_in_unshared_region(gfn); + } +-- +2.41.0.windows.1 + diff --git a/migration-ram-Force-encrypted-status-for-VGA-vram.patch b/migration-ram-Force-encrypted-status-for-VGA-vram.patch new file mode 100644 index 0000000000000000000000000000000000000000..e33e8dfb7aa7e274818c378150f1bed1b0fc4a95 --- /dev/null +++ b/migration-ram-Force-encrypted-status-for-VGA-vram.patch @@ -0,0 +1,32 @@ +From e6a20047ca9f61d7fc544e4f0b9b26aa268ccda7 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Tue, 8 Dec 2020 22:57:46 -0500 +Subject: [PATCH] migration/ram: Force encrypted status for VGA vram + +The VGA vram memory region act as frame buffer of VM. This memory +is decrypted in the QEMU process. For CSV VM live migration, we +should avoid memory encryption status check on VGA vram. + +Signed-off-by: hanliyang +--- + migration/ram.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 9ecd8580c5..66a36736ad 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2159,6 +2159,10 @@ static bool encrypted_test_list(RAMState *rs, RAMBlock *block, + return false; + } + ++ if (!strcmp(memory_region_name(block->mr), "vga.vram")) { ++ return false; ++ } ++ + /* + * Translate page in ram_addr_t address space to GPA address + * space using memory region. +-- +2.41.0.windows.1 + diff --git a/migration-ram-Force-encrypted-status-for-flash0-flas.patch b/migration-ram-Force-encrypted-status-for-flash0-flas.patch new file mode 100644 index 0000000000000000000000000000000000000000..d8bd6dff718f2bf3214009d34151b5f57d88c487 --- /dev/null +++ b/migration-ram-Force-encrypted-status-for-flash0-flas.patch @@ -0,0 +1,44 @@ +From cbbac2aa57d5609c254f99bf247d16e4b9fd7de3 Mon Sep 17 00:00:00 2001 +From: Ashish Kalra +Date: Tue, 27 Jul 2021 18:05:25 +0000 +Subject: [PATCH] migration/ram: Force encrypted status for flash0 & flash1 + devices. + +cherry-picked from https://github.com/AMDESE/qemu/commit/803d6a4c8d. + +Currently OVMF clears the C-bit and marks NonExistent memory space +as decrypted in the page encryption bitmap. By marking the +NonExistent memory space as decrypted it gurantees any future MMIO adds +will work correctly, but this marks flash0 device space as decrypted. +At reset the SEV core will be in forced encrypted state, so this +decrypted marking of flash0 device space will cause VCPU reset to fail +as flash0 device pages will be migrated incorrectly. + +Signed-off-by: Ashish Kalra +Signed-off-by: hanliyang +--- + migration/ram.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index beac7ea2c0..9ecd8580c5 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2151,6 +2151,14 @@ static bool encrypted_test_list(RAMState *rs, RAMBlock *block, + return false; + } + ++ if (!strcmp(memory_region_name(block->mr), "system.flash0")) { ++ return true; ++ } ++ ++ if (!strcmp(memory_region_name(block->mr), "system.flash1")) { ++ return false; ++ } ++ + /* + * Translate page in ram_addr_t address space to GPA address + * space using memory region. +-- +2.41.0.windows.1 + diff --git a/migration-ram-Optimize-ram_save_host_page.patch b/migration-ram-Optimize-ram_save_host_page.patch deleted file mode 100644 index c58a6dcb6a5f3dc85be056f1c6ffd3a0bf3ba972..0000000000000000000000000000000000000000 --- a/migration-ram-Optimize-ram_save_host_page.patch +++ /dev/null @@ -1,95 +0,0 @@ -From ae1a8506aa45266f2bf77a8d428f5ccd970a9b13 Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Tue, 16 Mar 2021 20:57:16 +0800 -Subject: [PATCH] migration/ram: Optimize ram_save_host_page() - -Starting from pss->page, ram_save_host_page() will check every page -and send the dirty pages up to the end of the current host page or -the boundary of used_length of the block. If the host page size is -a huge page, the step "check" will take a lot of time. - -It will improve performance to use migration_bitmap_find_dirty(). - -Tested on Kunpeng 920; VM parameters: 1U 4G (page size 1G) -The time of ram_save_host_page() in the last round of ram saving: -before optimize: 9250us after optimize: 34us - -Signed-off-by: Keqian Zhu -Signed-off-by: Kunkun Jiang -Reviewed-by: Peter Xu -Message-Id: <20210316125716.1243-3-jiangkunkun@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 43 +++++++++++++++++++++---------------------- - 1 file changed, 21 insertions(+), 22 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 22063e00b4..1bd99ff9e5 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3052,6 +3052,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - int tmppages, pages = 0; - size_t pagesize_bits = - qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; -+ unsigned long hostpage_boundary = -+ QEMU_ALIGN_UP(pss->page + 1, pagesize_bits); - - if (ramblock_is_ignored(pss->block)) { - error_report("block %s should not be migrated !", pss->block->idstr); -@@ -3060,34 +3062,31 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - - do { - /* Check the pages is dirty and if it is send it */ -- if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { -- pss->page++; -- continue; -- } -- -- tmppages = ram_save_target_page(rs, pss, last_stage); -- if (tmppages < 0) { -- return tmppages; -- } -+ if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { -+ tmppages = ram_save_target_page(rs, pss, last_stage); -+ if (tmppages < 0) { -+ return tmppages; -+ } - -- pages += tmppages; -- if (pss->block->unsentmap) { -- clear_bit(pss->page, pss->block->unsentmap); -- } -+ pages += tmppages; -+ if (pss->block->unsentmap) { -+ clear_bit(pss->page, pss->block->unsentmap); -+ } - -- pss->page++; -- /* -- * Allow rate limiting to happen in the middle of huge pages if -- * something is sent in the current iteration. -- */ -- if (pagesize_bits > 1 && tmppages > 0) { -- migration_rate_limit(); -+ /* -+ * Allow rate limiting to happen in the middle of huge pages if -+ * something is sent in the current iteration. -+ */ -+ if (pagesize_bits > 1 && tmppages > 0) { -+ migration_rate_limit(); -+ } - } -+ pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); - } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); - -- /* The offset we leave with is the last one we looked at */ -- pss->page--; -+ /* The offset we leave with is the min boundary of host page and block */ -+ pss->page = MIN(pss->page, hostpage_boundary) - 1; - return pages; - } - --- -2.27.0 - diff --git a/migration-ram-Reduce-unnecessary-rate-limiting.patch b/migration-ram-Reduce-unnecessary-rate-limiting.patch deleted file mode 100644 index 64374dd3e255224e650c8de3e93669db04a6c413..0000000000000000000000000000000000000000 --- a/migration-ram-Reduce-unnecessary-rate-limiting.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 338d691c985ad5b3624ef36e4beaac82982c8f0a Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Tue, 16 Mar 2021 20:57:15 +0800 -Subject: [PATCH] migration/ram: Reduce unnecessary rate limiting - -When the host page is a huge page and something is sent in the -current iteration, migration_rate_limit() should be executed. -If not, it can be omitted. - -Signed-off-by: Keqian Zhu -Signed-off-by: Kunkun Jiang -Reviewed-by: David Edmondson -Reviewed-by: Dr. David Alan Gilbert -Message-Id: <20210316125716.1243-2-jiangkunkun@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 2077ba5be4..22063e00b4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3076,8 +3076,13 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - } - - pss->page++; -- /* Allow rate limiting to happen in the middle of huge pages */ -- migration_rate_limit(); -+ /* -+ * Allow rate limiting to happen in the middle of huge pages if -+ * something is sent in the current iteration. -+ */ -+ if (pagesize_bits > 1 && tmppages > 0) { -+ migration_rate_limit(); -+ } - } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); - --- -2.27.0 - diff --git a/migration-ram-add-support-to-send-encrypted-pages.patch b/migration-ram-add-support-to-send-encrypted-pages.patch new file mode 100644 index 0000000000000000000000000000000000000000..ef5fa89f3b86e41bb77c1de9794cac42acd965b9 --- /dev/null +++ b/migration-ram-add-support-to-send-encrypted-pages.patch @@ -0,0 +1,343 @@ +From af3077a2f19f0604c4e7f8b94eb0338b7f1f85d6 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 16:53:19 +0000 +Subject: [PATCH] migration/ram: add support to send encrypted pages + +cherry-picked from https://github.com/AMDESE/qemu/commit/2d6bda0d4cf. + +When memory encryption is enabled, the guest memory will be encrypted with +the guest specific key. The patch introduces RAM_SAVE_FLAG_ENCRYPTED_PAGE +flag to distinguish the encrypted data from plaintext. Encrypted pages +may need special handling. The sev_save_outgoing_page() is used +by the sender to write the encrypted pages onto the socket, similarly the +sev_load_incoming_page() is used by the target to read the +encrypted pages from the socket and load into the guest memory. + +Signed-off-by: Brijesh Singh +Co-developed-by: Ashish Kalra +Signed-off-by: Ashish Kalra +[ Fix conflicts. ] +Signed-off-by: hanliyang +--- + migration/migration.h | 2 + + migration/ram.c | 174 +++++++++++++++++++++++++++++++++++++++++- + target/i386/sev.c | 14 ++++ + target/i386/sev.h | 4 + + 4 files changed, 192 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.h b/migration/migration.h +index 2f26c9509b..eeddb7c0bd 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -553,4 +553,6 @@ int migration_stop_vm(RunState state); + + void migrate_fd_cancel(MigrationState *s); + ++bool memcrypt_enabled(void); ++ + #endif +diff --git a/migration/ram.c b/migration/ram.c +index f9b2b9b985..beac7ea2c0 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -63,6 +63,10 @@ + #include "options.h" + #include "sysemu/dirtylimit.h" + #include "sysemu/kvm.h" ++#include "exec/confidential-guest-support.h" ++ ++/* Defines RAM_SAVE_ENCRYPTED_PAGE and RAM_SAVE_SHARED_REGION_LIST */ ++#include "target/i386/sev.h" + + #include "hw/boards.h" /* for machine_dump_guest_core() */ + +@@ -92,7 +96,16 @@ + /* 0x80 is reserved in rdma.h for RAM_SAVE_FLAG_HOOK */ + #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 + #define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 +-/* We can't use any flag that is bigger than 0x200 */ ++#define RAM_SAVE_FLAG_ENCRYPTED_DATA 0x400 ++ ++bool memcrypt_enabled(void) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ if(ms->cgs) ++ return ms->cgs->ready; ++ else ++ return false; ++} + + XBZRLECacheStats xbzrle_counters; + +@@ -1206,6 +1219,88 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + return 1; + } + ++/** ++ * ram_save_encrypted_page - send the given encrypted page to the stream ++ */ ++static int ram_save_encrypted_page(RAMState *rs, PageSearchStatus *pss) ++{ ++ QEMUFile *file = pss->pss_channel; ++ int ret; ++ uint8_t *p; ++ RAMBlock *block = pss->block; ++ ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; ++ uint64_t bytes_xmit = 0; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ ++ p = block->host + offset; ++ trace_ram_save_page(block->idstr, (uint64_t)offset, p); ++ ++ ram_transferred_add(save_page_header(pss, file, block, ++ offset | RAM_SAVE_FLAG_ENCRYPTED_DATA)); ++ qemu_put_be32(file, RAM_SAVE_ENCRYPTED_PAGE); ++ ret = ops->save_outgoing_page(file, p, TARGET_PAGE_SIZE, &bytes_xmit); ++ if (ret) { ++ return -1; ++ } ++ ram_transferred_add(4 + bytes_xmit); ++ stat64_add(&mig_stats.normal_pages, 1); ++ ++ return 1; ++} ++ ++/** ++ * ram_save_shared_region_list: send the shared region list ++ */ ++static int ram_save_shared_region_list(RAMState *rs, QEMUFile *f) ++{ ++ int ret; ++ uint64_t bytes_xmit = 0; ++ PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ ++ ram_transferred_add(save_page_header(pss, f, ++ pss->last_sent_block, ++ RAM_SAVE_FLAG_ENCRYPTED_DATA)); ++ qemu_put_be32(f, RAM_SAVE_SHARED_REGIONS_LIST); ++ ret = ops->save_outgoing_shared_regions_list(f, &bytes_xmit); ++ if (ret < 0) { ++ return ret; ++ } ++ ram_transferred_add(4 + bytes_xmit); ++ ++ return 0; ++} ++ ++static int load_encrypted_data(QEMUFile *f, uint8_t *ptr) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ ++ int flag; ++ ++ flag = qemu_get_be32(f); ++ ++ if (flag == RAM_SAVE_ENCRYPTED_PAGE) { ++ return ops->load_incoming_page(f, ptr); ++ } else if (flag == RAM_SAVE_SHARED_REGIONS_LIST) { ++ return ops->load_incoming_shared_regions_list(f); ++ } else { ++ error_report("unknown encrypted flag %x", flag); ++ return 1; ++ } ++} ++ + /** + * ram_save_page: send the given page to the stream + * +@@ -2036,6 +2131,35 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss, + compress_send_queued_data); + } + ++/** ++ * encrypted_test_list: check if the page is encrypted ++ * ++ * Returns a bool indicating whether the page is encrypted. ++ */ ++static bool encrypted_test_list(RAMState *rs, RAMBlock *block, ++ unsigned long page) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ unsigned long gfn; ++ ++ /* ROM devices contains the unencrypted data */ ++ if (memory_region_is_rom(block->mr)) { ++ return false; ++ } ++ ++ /* ++ * Translate page in ram_addr_t address space to GPA address ++ * space using memory region. ++ */ ++ gfn = page + (block->mr->addr >> TARGET_PAGE_BITS); ++ ++ return ops->is_gfn_in_unshared_region(gfn); ++} ++ + /** + * ram_save_target_page_legacy: save one target page + * +@@ -2054,6 +2178,17 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + return res; + } + ++ /* ++ * If memory encryption is enabled then use memory encryption APIs ++ * to write the outgoing buffer to the wire. The encryption APIs ++ * will take care of accessing the guest memory and re-encrypt it ++ * for the transport purposes. ++ */ ++ if (memcrypt_enabled() && ++ encrypted_test_list(rs, pss->block, pss->page)) { ++ return ram_save_encrypted_page(rs, pss); ++ } ++ + if (save_compress_page(rs, pss, offset)) { + return 1; + } +@@ -2919,6 +3054,18 @@ void qemu_guest_free_page_hint(void *addr, size_t len) + } + } + ++static int ram_encrypted_save_setup(void) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ MigrationParameters *p = &migrate_get_current()->parameters; ++ ++ return ops->save_setup(p->sev_pdh, p->sev_plat_cert, p->sev_amd_cert); ++} ++ + /* + * Each of ram_save_setup, ram_save_iterate and ram_save_complete has + * long-running RCU critical section. When rcu-reclaims in the code +@@ -2954,6 +3101,13 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f; + + WITH_RCU_READ_LOCK_GUARD() { ++ ++ if (memcrypt_enabled()) { ++ if (ram_encrypted_save_setup()) { ++ return -1; ++ } ++ } ++ + qemu_put_be64(f, ram_bytes_total_with_ignored() + | RAM_SAVE_FLAG_MEM_SIZE); + +@@ -3183,6 +3337,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + qemu_file_set_error(f, ret); + return ret; + } ++ ++ /* send the shared regions list */ ++ if (memcrypt_enabled()) { ++ ret = ram_save_shared_region_list(rs, f); ++ if (ret < 0) { ++ qemu_file_set_error(f, ret); ++ return ret; ++ } ++ } + } + + ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); +@@ -3920,7 +4083,8 @@ static int ram_load_precopy(QEMUFile *f) + } + + if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | +- RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { ++ RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE | ++ RAM_SAVE_FLAG_ENCRYPTED_DATA)) { + RAMBlock *block = ram_block_from_stream(mis, f, flags, + RAM_CHANNEL_PRECOPY); + +@@ -4013,6 +4177,12 @@ static int ram_load_precopy(QEMUFile *f) + qemu_file_set_error(f, ret); + } + break; ++ case RAM_SAVE_FLAG_ENCRYPTED_DATA: ++ if (load_encrypted_data(f, host)) { ++ error_report("Failed to load encrypted data"); ++ ret = -EINVAL; ++ } ++ break; + default: + error_report("Unknown combination of migration flags: 0x%x", flags); + ret = -EINVAL; +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 92aedf0503..47f41aefe7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -183,6 +183,7 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_setup = sev_save_setup, + .save_outgoing_page = sev_save_outgoing_page, + .load_incoming_page = sev_load_incoming_page, ++ .is_gfn_in_unshared_region = sev_is_gfn_in_unshared_region, + .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, + .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, + }; +@@ -1822,6 +1823,19 @@ int sev_load_incoming_shared_regions_list(QEMUFile *f) + return 0; + } + ++bool sev_is_gfn_in_unshared_region(unsigned long gfn) ++{ ++ SevGuestState *s = sev_guest; ++ struct shared_region *pos; ++ ++ QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { ++ if (gfn >= pos->gfn_start && gfn < pos->gfn_end) { ++ return false; ++ } ++ } ++ return true; ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 5b4231c859..b9c2afb799 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -38,6 +38,9 @@ typedef struct SevKernelLoaderContext { + size_t cmdline_size; + } SevKernelLoaderContext; + ++#define RAM_SAVE_ENCRYPTED_PAGE 0x1 ++#define RAM_SAVE_SHARED_REGIONS_LIST 0x2 ++ + #ifdef CONFIG_SEV + bool sev_enabled(void); + bool sev_es_enabled(void); +@@ -66,6 +69,7 @@ int sev_remove_shared_regions_list(unsigned long gfn_start, + int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end); + int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent); + int sev_load_incoming_shared_regions_list(QEMUFile *f); ++bool sev_is_gfn_in_unshared_region(unsigned long gfn); + + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); + +-- +2.41.0.windows.1 + diff --git a/migration-ram-fix-memleaks-in-multifd_new_send_chann.patch b/migration-ram-fix-memleaks-in-multifd_new_send_chann.patch deleted file mode 100644 index f9cb2bf652b90968144c673fd6c59655acfd785f..0000000000000000000000000000000000000000 --- a/migration-ram-fix-memleaks-in-multifd_new_send_chann.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 4d456b243a41a8e91535b2820fd6ed4f6fb4a194 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Tue, 5 May 2020 15:50:54 +0800 -Subject: [PATCH 2/3] migration/ram: fix memleaks in - multifd_new_send_channel_async - -When error happen in multifd_new_send_channel_async, 'sioc' will not be used -to create the multifd_send_thread. Let's free it to avoid a memleak. And also -do error_free after migrate_set_error() to avoid another leak in the same place. - -The leak stack: -Direct leak of 2160 byte(s) in 6 object(s) allocated from: - #0 0xfffdd97fe754 in malloc (/lib64/libasan.so.4+0xee754) - #1 0xfffdd85a8b48 in g_malloc (/lib64/libglib-2.0.so.0+0x58b48) - #2 0xaaadfc4e2b10 in object_new_with_type qom/object.c:634 - #3 0xaaadfc619468 in qio_channel_socket_new io/channel-socket.c:56 - #4 0xaaadfc3d3e74 in socket_send_channel_create migration/socket.c:37 - #5 0xaaadfbaed6f4 in multifd_save_setup /usr/src/debug/qemu-4.1.0-4_asan.aarch64/migration/ram.c:1255 - #6 0xaaadfc3d2f78 in migrate_fd_connect migration/migration.c:3359 - #7 0xaaadfc3d6240 in migration_channel_connect migration/channel.c:101 - #8 0xaaadfc3d3590 in socket_outgoing_migration migration/socket.c:108 - #9 0xaaadfc625a64 in qio_task_complete io/task.c:195 - #10 0xaaadfc625ed0 in qio_task_thread_result io/task.c:111 - #11 0xfffdd859edec (/lib64/libglib-2.0.so.0+0x4edec) - #12 0xfffdd85a2a78 in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x52a78) - #13 0xaaadfc6d3b84 in glib_pollfds_poll util/main-loop.c:218 - #14 0xaaadfc6d3b84 in os_host_main_loop_wait util/main-loop.c:241 - #15 0xaaadfc6d3b84 in main_loop_wait util/main-loop.c:517 - #16 0xaaadfbf9206c in main_loop /usr/src/debug/qemu-4.1.0-4_asan.aarch64/vl.c:1791 - #17 0xaaadfba1b124 in main /usr/src/debug/qemu-4.1.0-4_asan.aarch64/vl.c:4473 - #18 0xfffdd7833f5c in __libc_start_main (/lib64/libc.so.6+0x23f5c) - #19 0xaaadfba26360 (/usr/libexec/qemu-kvm+0x886360) - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan ---- - migration/ram.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 6baf1412..840e3548 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1215,6 +1215,8 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - * its status. - */ - p->quit = true; -+ object_unref(OBJECT(sioc)); -+ error_free(local_err); - } else { - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); --- -2.23.0 diff --git a/migration-ram-fix-use-after-free-of-local_err.patch b/migration-ram-fix-use-after-free-of-local_err.patch deleted file mode 100644 index f74e3b18df98ae0e5a88ff9224fa06c8ea24197a..0000000000000000000000000000000000000000 --- a/migration-ram-fix-use-after-free-of-local_err.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 019526f7f7b42a7d1b8a74e1db6a8050adf9e1fb Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 24 Mar 2020 18:36:29 +0300 -Subject: [PATCH 08/14] migration/ram: fix use after free of local_err - -local_err is used again in migration_bitmap_sync_precopy() after -precopy_notify(), so we must zero it. Otherwise try to set -non-NULL local_err will crash. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200324153630.11882-6-vsementsov@virtuozzo.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Peng Liang ---- - migration/ram.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 840e35480b04..5d1ae7570018 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1912,6 +1912,7 @@ static void migration_bitmap_sync_precopy(RAMState *rs) - */ - if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) { - error_report_err(local_err); -+ local_err = NULL; - } - - migration_bitmap_sync(rs); --- -2.26.2 - diff --git a/migration-rdma-cleanup-rdma-context-before-g_free-to.patch b/migration-rdma-cleanup-rdma-context-before-g_free-to.patch deleted file mode 100644 index a39894ada540a713645b0735b719eb4d5a3edbff..0000000000000000000000000000000000000000 --- a/migration-rdma-cleanup-rdma-context-before-g_free-to.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 9867dc6fc3f131324b73664b9617376270d8d013 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Fri, 8 May 2020 06:07:55 -0400 -Subject: [PATCH 4/5] migration/rdma: cleanup rdma context before g_free to - avoid memleaks - -When error happen in initializing 'rdma_return_path', we should cleanup rdma context -before g_free(rdma) to avoid some memleaks. This patch fix that. - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Message-Id: <20200508100755.7875-3-pannengyuan@huawei.com> -Reviewed-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert ---- - migration/rdma.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/migration/rdma.c b/migration/rdma.c -index 3036221e..bb24dac5 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -4103,20 +4103,20 @@ void rdma_start_outgoing_migration(void *opaque, - rdma_return_path = qemu_rdma_data_init(host_port, errp); - - if (rdma_return_path == NULL) { -- goto err; -+ goto return_path_err; - } - - ret = qemu_rdma_source_init(rdma_return_path, - s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); - - if (ret) { -- goto err; -+ goto return_path_err; - } - - ret = qemu_rdma_connect(rdma_return_path, errp); - - if (ret) { -- goto err; -+ goto return_path_err; - } - - rdma->return_path = rdma_return_path; -@@ -4129,6 +4129,8 @@ void rdma_start_outgoing_migration(void *opaque, - s->to_dst_file = qemu_fopen_rdma(rdma, "wb"); - migrate_fd_connect(s, NULL); - return; -+return_path_err: -+ qemu_rdma_cleanup(rdma); - err: - g_free(rdma); - g_free(rdma_return_path); --- -2.23.0 - diff --git a/migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch b/migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch deleted file mode 100644 index 5e0fb101d827377551a7858f225cf365367e12b7..0000000000000000000000000000000000000000 --- a/migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 8ae2e3b8be812bcbdeb6151c685026bcaedd4a4b Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Sat, 9 May 2020 15:25:42 +0800 -Subject: [PATCH 3/3] migration/rdma: fix a memleak on error path in - rdma_start_incoming_migration - -'rdma->host' is malloced in qemu_rdma_data_init, but forgot to free on the error -path in rdma_start_incoming_migration(), this patch fix that. - -Direct leak of 2 byte(s) in 1 object(s) allocated from: - #0 0xfffce56d34fb in __interceptor_malloc (/lib64/libasan.so.4+0xd34fb) - #1 0xfffce5158aa3 in g_malloc (/lib64/libglib-2.0.so.0+0x58aa3) - #2 0xfffce5174213 in g_strdup (/lib64/libglib-2.0.so.0+0x74213) - #3 0xaaad7c569ddf in qemu_rdma_data_init /Images/qemu/migration/rdma.c:2647 - #4 0xaaad7c57c99f in rdma_start_incoming_migration /Images/qemu/migration/rdma.c:4020 - #5 0xaaad7c52b35f in qemu_start_incoming_migration /Images/qemu/migration/migration.c:371 - #6 0xaaad7be173bf in qemu_init /Images/qemu/softmmu/vl.c:4464 - #7 0xaaad7bb29843 in main /Images/qemu/softmmu/main.c:48 - #8 0xfffce3713f5f in __libc_start_main (/lib64/libc.so.6+0x23f5f) - #9 0xaaad7bb2bf73 (/Images/qemu/build/aarch64-softmmu/qemu-system-aarch64+0x8fbf73) - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan ---- - migration/rdma.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/migration/rdma.c b/migration/rdma.c -index 3036221e..b5fdb6a7 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -4068,6 +4068,9 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) - return; - err: - error_propagate(errp, local_err); -+ if (rdma) { -+ g_free(rdma->host); -+ } - g_free(rdma); - g_free(rdma_return_path); - } --- -2.23.0 diff --git a/migration-register_savevm_live-doesn-t-need-dev.patch b/migration-register_savevm_live-doesn-t-need-dev.patch deleted file mode 100644 index a980deccbcac98d709a35e62f41c7d52e39b0d11..0000000000000000000000000000000000000000 --- a/migration-register_savevm_live-doesn-t-need-dev.patch +++ /dev/null @@ -1,201 +0,0 @@ -From 0f7cde69416f85ec3d3f57769ae38db3d72fda8c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 22 Aug 2019 12:54:33 +0100 -Subject: [PATCH] migration: register_savevm_live doesn't need dev - -Commit 78dd48df3 removed the last caller of register_savevm_live for an -instantiable device (rather than a single system wide device); -so trim out the parameter. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20190822115433.12070-1-dgilbert@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Cornelia Huck -Signed-off-by: Dr. David Alan Gilbert ---- - docs/devel/migration.rst | 3 +-- - hw/ppc/spapr.c | 2 +- - hw/s390x/s390-skeys.c | 2 +- - hw/s390x/s390-stattrib.c | 2 +- - hw/s390x/tod.c | 2 +- - include/migration/register.h | 3 +-- - migration/block-dirty-bitmap.c | 2 +- - migration/block.c | 2 +- - migration/ram.c | 2 +- - migration/savevm.c | 23 +---------------------- - net/slirp.c | 2 +- - 11 files changed, 11 insertions(+), 34 deletions(-) - -diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst -index 220059679a..cc6f839fce 100644 ---- a/docs/devel/migration.rst -+++ b/docs/devel/migration.rst -@@ -183,8 +183,7 @@ another to load the state back. - - .. code:: c - -- int register_savevm_live(DeviceState *dev, -- const char *idstr, -+ int register_savevm_live(const char *idstr, - int instance_id, - int version_id, - SaveVMHandlers *ops, -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index b0f37c34a4..289967c3de 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3069,7 +3069,7 @@ static void spapr_machine_init(MachineState *machine) - * interface, this is a legacy from the sPAPREnvironment structure - * which predated MachineState but had a similar function */ - vmstate_register(NULL, 0, &vmstate_spapr, spapr); -- register_savevm_live(NULL, "spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, -+ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_htab_handlers, spapr); - - qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), -diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c -index e5bd92c0c7..fb7d57865d 100644 ---- a/hw/s390x/s390-skeys.c -+++ b/hw/s390x/s390-skeys.c -@@ -388,7 +388,7 @@ static inline void s390_skeys_set_migration_enabled(Object *obj, bool value, - ss->migration_enabled = value; - - if (ss->migration_enabled) { -- register_savevm_live(NULL, TYPE_S390_SKEYS, 0, 1, -+ register_savevm_live(TYPE_S390_SKEYS, 0, 1, - &savevm_s390_storage_keys, ss); - } else { - unregister_savevm(DEVICE(ss), TYPE_S390_SKEYS, ss); -diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c -index 766f2015a4..5ee15d5e82 100644 ---- a/hw/s390x/s390-stattrib.c -+++ b/hw/s390x/s390-stattrib.c -@@ -382,7 +382,7 @@ static void s390_stattrib_instance_init(Object *obj) - { - S390StAttribState *sas = S390_STATTRIB(obj); - -- register_savevm_live(NULL, TYPE_S390_STATTRIB, 0, 0, -+ register_savevm_live(TYPE_S390_STATTRIB, 0, 0, - &savevm_s390_stattrib_handlers, sas); - - object_property_add_bool(obj, "migration-enabled", -diff --git a/hw/s390x/tod.c b/hw/s390x/tod.c -index a9fca8eb0b..d6b22bb966 100644 ---- a/hw/s390x/tod.c -+++ b/hw/s390x/tod.c -@@ -100,7 +100,7 @@ static void s390_tod_realize(DeviceState *dev, Error **errp) - S390TODState *td = S390_TOD(dev); - - /* Legacy migration interface */ -- register_savevm_live(NULL, "todclock", 0, 1, &savevm_tod, td); -+ register_savevm_live("todclock", 0, 1, &savevm_tod, td); - } - - static void s390_tod_class_init(ObjectClass *oc, void *data) -diff --git a/include/migration/register.h b/include/migration/register.h -index 8b2bc5b129..f3ba10b6ef 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -68,8 +68,7 @@ typedef struct SaveVMHandlers { - int (*resume_prepare)(MigrationState *s, void *opaque); - } SaveVMHandlers; - --int register_savevm_live(DeviceState *dev, -- const char *idstr, -+int register_savevm_live(const char *idstr, - uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index 4a896a09eb..11e8feb595 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -733,7 +733,7 @@ void dirty_bitmap_mig_init(void) - { - QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list); - -- register_savevm_live(NULL, "dirty-bitmap", 0, 1, -+ register_savevm_live("dirty-bitmap", 0, 1, - &savevm_dirty_bitmap_handlers, - &dirty_bitmap_mig_state); - } -diff --git a/migration/block.c b/migration/block.c -index 91f98ef44a..ec15d1d6b3 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -1030,6 +1030,6 @@ void blk_mig_init(void) - QSIMPLEQ_INIT(&block_mig_state.blk_list); - qemu_mutex_init(&block_mig_state.lock); - -- register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers, -+ register_savevm_live("block", 0, 1, &savevm_block_handlers, - &block_mig_state); - } -diff --git a/migration/ram.c b/migration/ram.c -index d6657a8093..2077ba5be4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -5125,5 +5125,5 @@ static SaveVMHandlers savevm_ram_handlers = { - void ram_mig_init(void) - { - qemu_mutex_init(&XBZRLE.lock); -- register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); -+ register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state); - } -diff --git a/migration/savevm.c b/migration/savevm.c -index f0974380e5..cdb79222a4 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -683,8 +683,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) - of the system, so instance_id should be removed/replaced. - Meanwhile pass -1 as instance_id if you do not already have a clearly - distinguishing id for all instances of your device class. */ --int register_savevm_live(DeviceState *dev, -- const char *idstr, -+int register_savevm_live(const char *idstr, - uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, -@@ -703,26 +702,6 @@ int register_savevm_live(DeviceState *dev, - se->is_ram = 1; - } - -- if (dev) { -- char *id = qdev_get_dev_path(dev); -- if (id) { -- if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >= -- sizeof(se->idstr)) { -- error_report("Path too long for VMState (%s)", id); -- g_free(id); -- g_free(se); -- -- return -1; -- } -- g_free(id); -- -- se->compat = g_new0(CompatEntry, 1); -- pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr); -- se->compat->instance_id = instance_id == -1 ? -- calculate_compat_instance_id(idstr) : instance_id; -- instance_id = -1; -- } -- } - pstrcat(se->idstr, sizeof(se->idstr), idstr); - - if (instance_id == VMSTATE_INSTANCE_ID_ANY) { -diff --git a/net/slirp.c b/net/slirp.c -index b34cb29276..f42f496641 100644 ---- a/net/slirp.c -+++ b/net/slirp.c -@@ -576,7 +576,7 @@ static int net_slirp_init(NetClientState *peer, const char *model, - * specific version? - */ - g_assert(slirp_state_version() == 4); -- register_savevm_live(NULL, "slirp", 0, slirp_state_version(), -+ register_savevm_live("slirp", 0, slirp_state_version(), - &savevm_slirp_state, s->slirp); - - s->poll_notifier.notify = net_slirp_poll_notify; --- -2.27.0 - diff --git a/migration-report-migration-related-thread-pid-to-lib.patch b/migration-report-migration-related-thread-pid-to-lib.patch new file mode 100644 index 0000000000000000000000000000000000000000..9fd6fbb9a543dac44c6f9921620bb1112df267f4 --- /dev/null +++ b/migration-report-migration-related-thread-pid-to-lib.patch @@ -0,0 +1,54 @@ +From 7caa5d818e0fa0e1cee2513f2fde4e81f8b5cc13 Mon Sep 17 00:00:00 2001 +From: zhengchuan +Date: Mon, 5 Dec 2022 20:52:25 +0800 +Subject: [PATCH] migration: report migration related thread pid to libvirt + +in order to control migration thread cgroup, +we need to report migration related thread pid to libvirt + +Signed-off-by:zhengchuan +--- + migration/migration.c | 3 +++ + qapi/migration.json | 12 ++++++++++++ + 2 files changed, 15 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 3ce04b2aaf..7c2fdde26b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3299,6 +3299,9 @@ static void *migration_thread(void *opaque) + MigThrError thr_error; + bool urgent = false; + ++ /* report migration thread pid to libvirt */ ++ qapi_event_send_migration_pid(qemu_get_thread_id()); ++ + thread = migration_threads_add("live_migration", qemu_get_thread_id()); + + rcu_register_thread(); +diff --git a/qapi/migration.json b/qapi/migration.json +index 29af841f4e..b442d0d878 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1447,6 +1447,18 @@ + { 'event': 'MIGRATION_PASS', + 'data': { 'pass': 'int' } } + ++## ++# @MIGRATION_PID: ++# ++# Emitted when migration thread appear ++# ++# @pid: pid of migration thread ++# ++# Since: EulerOS Virtual ++## ++{ 'event': 'MIGRATION_PID', ++ 'data': { 'pid': 'int' } } ++ + ## + # @COLOMessage: + # +-- +2.27.0 + diff --git a/migration-report-multiFd-related-thread-pid-to-libvi.patch b/migration-report-multiFd-related-thread-pid-to-libvi.patch new file mode 100644 index 0000000000000000000000000000000000000000..77730286d76ec64da9e5f1dd78a62ca88b2e852b --- /dev/null +++ b/migration-report-multiFd-related-thread-pid-to-libvi.patch @@ -0,0 +1,62 @@ +From e387eaeef8845993a437ad19eaf988fb101d3fdd Mon Sep 17 00:00:00 2001 +From: zhengchuan +Date: Mon, 5 Dec 2022 20:56:35 +0800 +Subject: [PATCH] migration: report multiFd related thread pid to libvirt + +report multiFd related thread pid to libvirt in order to +pin multiFd thread to different cpu. + +Signed-off-by:zhengchuan +--- + migration/multifd.c | 4 ++++ + qapi/migration.json | 12 ++++++++++++ + 2 files changed, 16 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 409460684f..7d373a245e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -17,6 +17,7 @@ + #include "exec/ramblock.h" + #include "qemu/error-report.h" + #include "qapi/error.h" ++#include "qapi/qapi-events-migration.h" + #include "ram.h" + #include "migration.h" + #include "migration-stats.h" +@@ -657,6 +658,9 @@ static void *multifd_send_thread(void *opaque) + + thread = migration_threads_add(p->name, qemu_get_thread_id()); + ++ /* report multifd thread pid to libvirt */ ++ qapi_event_send_migration_multifd_pid(qemu_get_thread_id()); ++ + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); + +diff --git a/qapi/migration.json b/qapi/migration.json +index b442d0d878..5d0855a1d8 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1447,6 +1447,18 @@ + { 'event': 'MIGRATION_PASS', + 'data': { 'pass': 'int' } } + ++## ++# @MIGRATION_MULTIFD_PID: ++# ++# Emitted when multifd thread appear ++# ++# @pid: pid of multifd thread ++# ++# Since: EulerOS Virtual ++## ++{ 'event': 'MIGRATION_MULTIFD_PID', ++ 'data': { 'pid': 'int' } } ++ + ## + # @MIGRATION_PID: + # +-- +2.27.0 + diff --git a/migration-savevm-release-gslist-after-dump_vmstate_j.patch b/migration-savevm-release-gslist-after-dump_vmstate_j.patch deleted file mode 100644 index d5ec9b881005dc21ec927a9f4b37f57999c89c1f..0000000000000000000000000000000000000000 --- a/migration-savevm-release-gslist-after-dump_vmstate_j.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 0d8c145e986d4f500f065d2d8645e95175324e62 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Wed, 19 Feb 2020 17:47:05 +0800 -Subject: [PATCH 8/9] migration/savevm: release gslist after dump_vmstate_json - -'list' forgot to free at the end of dump_vmstate_json_to_file(), although it's called only once, but seems like a clean code. - -Fix the leak as follow: -Direct leak of 16 byte(s) in 1 object(s) allocated from: - #0 0x7fb946abd768 in __interceptor_malloc (/lib64/libasan.so.5+0xef768) - #1 0x7fb945eca445 in g_malloc (/lib64/libglib-2.0.so.0+0x52445) - #2 0x7fb945ee2066 in g_slice_alloc (/lib64/libglib-2.0.so.0+0x6a066) - #3 0x7fb945ee3139 in g_slist_prepend (/lib64/libglib-2.0.so.0+0x6b139) - #4 0x5585db591581 in object_class_get_list_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1084 - #5 0x5585db590f66 in object_class_foreach_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1028 - #6 0x7fb945eb35f7 in g_hash_table_foreach (/lib64/libglib-2.0.so.0+0x3b5f7) - #7 0x5585db59110c in object_class_foreach /mnt/sdb/qemu-new/qemu/qom/object.c:1038 - #8 0x5585db5916b6 in object_class_get_list /mnt/sdb/qemu-new/qemu/qom/object.c:1092 - #9 0x5585db335ca0 in dump_vmstate_json_to_file /mnt/sdb/qemu-new/qemu/migration/savevm.c:638 - #10 0x5585daa5bcbf in main /mnt/sdb/qemu-new/qemu/vl.c:4420 - #11 0x7fb941204812 in __libc_start_main ../csu/libc-start.c:308 - #12 0x5585da29420d in _start (/mnt/sdb/qemu-new/qemu/build/x86_64-softmmu/qemu-system-x86_64+0x27f020d) - -Indirect leak of 7472 byte(s) in 467 object(s) allocated from: - #0 0x7fb946abd768 in __interceptor_malloc (/lib64/libasan.so.5+0xef768) - #1 0x7fb945eca445 in g_malloc (/lib64/libglib-2.0.so.0+0x52445) - #2 0x7fb945ee2066 in g_slice_alloc (/lib64/libglib-2.0.so.0+0x6a066) - #3 0x7fb945ee3139 in g_slist_prepend (/lib64/libglib-2.0.so.0+0x6b139) - #4 0x5585db591581 in object_class_get_list_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1084 - #5 0x5585db590f66 in object_class_foreach_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1028 - #6 0x7fb945eb35f7 in g_hash_table_foreach (/lib64/libglib-2.0.so.0+0x3b5f7) - #7 0x5585db59110c in object_class_foreach /mnt/sdb/qemu-new/qemu/qom/object.c:1038 - #8 0x5585db5916b6 in object_class_get_list /mnt/sdb/qemu-new/qemu/qom/object.c:1092 - #9 0x5585db335ca0 in dump_vmstate_json_to_file /mnt/sdb/qemu-new/qemu/migration/savevm.c:638 - #10 0x5585daa5bcbf in main /mnt/sdb/qemu-new/qemu/vl.c:4420 - #11 0x7fb941204812 in __libc_start_main ../csu/libc-start.c:308 - #12 0x5585da29420d in _start (/mnt/sdb/qemu-new/qemu/build/x86_64-softmmu/qemu-system-x86_64+0x27f020d) - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Reviewed-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Juan Quintela -Signed-off-by: AlexChen ---- - migration/savevm.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/migration/savevm.c b/migration/savevm.c -index 7d89c57..8163de7 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -614,6 +614,7 @@ void dump_vmstate_json_to_file(FILE *out_file) - } - fprintf(out_file, "\n}\n"); - fclose(out_file); -+ g_slist_free(list); - } - - static uint32_t calculate_new_instance_id(const char *idstr) --- -1.8.3.1 - diff --git a/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch b/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ee3f088a239688d68c7aaa807781fbd2664dc72 --- /dev/null +++ b/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch @@ -0,0 +1,47 @@ +From dfb9372702b2fb994392b8a6e8a39964c2656ae6 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 08:49:41 +0800 +Subject: [PATCH] migration: skip cache_drop for bios bootloader and nvram + template + +Qemu enabled page cache dropping for raw device on the destionation host +during shared storage migration. +However, fsync may take 300ms to multiple seconds to return in multiple-migration +scene, because all domains in a host share bios bootloader file, skip cache_drop +for bios bootloader and nvram template to avoid downtime increase. +--- + block.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index b7cb963929..3bfd4be6b4 100644 +--- a/block.c ++++ b/block.c +@@ -68,6 +68,9 @@ + + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + ++#define DEFAULT_BIOS_BOOT_LOADER_DIR "/usr/share/edk2" ++#define DEFAULT_NVRAM_TEMPLATE_DIR "/var/lib/libvirt/qemu/nvram" ++ + /* Protected by BQL */ + static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = + QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); +@@ -7017,7 +7020,13 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert_bdrv_graph_readable(); + +- if (bs->drv->bdrv_co_invalidate_cache) { ++ /* ++ * It's not necessary for bios bootloader and nvram template to drop cache ++ * when migration, skip this step for them to avoid dowtime increase. ++ */ ++ if (bs->drv->bdrv_co_invalidate_cache && ++ !strstr(bs->filename, DEFAULT_BIOS_BOOT_LOADER_DIR) && ++ !strstr(bs->filename, DEFAULT_NVRAM_TEMPLATE_DIR)) { + bs->drv->bdrv_co_invalidate_cache(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); +-- +2.27.0 + diff --git a/migration-tls-add-error-handling-in-multifd_tls_hand.patch b/migration-tls-add-error-handling-in-multifd_tls_hand.patch deleted file mode 100644 index de444af35d9713e092f98a89485a8a8c590a2203..0000000000000000000000000000000000000000 --- a/migration-tls-add-error-handling-in-multifd_tls_hand.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 4bf84b63bf1b2fba031fc6c3f4948785d534df3b Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Fri, 5 Mar 2021 16:10:57 +0800 -Subject: [PATCH] migration/tls: add error handling in - multifd_tls_handshake_thread - -If any error happens during multifd send thread creating (e.g. channel broke -because new domain is destroyed by the dst), multifd_tls_handshake_thread -may exit silently, leaving main migration thread hanging (ram_save_setup -> -multifd_send_sync_main -> qemu_sem_wait(&p->sem_sync)). -Fix that by adding error handling in multifd_tls_handshake_thread. - -Signed-off-by: Hao Wang ---- - migration/ram.c | 11 ++++++++++- - 1 file changed, 10 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 3338363e9d..d4ac696899 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1516,7 +1516,16 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, - } else { - trace_multifd_tls_outgoing_handshake_complete(ioc); - } -- multifd_channel_connect(p, ioc, err); -+ -+ if (!multifd_channel_connect(p, ioc, err)) { -+ /* -+ * Error happen, mark multifd_send_thread status as 'quit' although it -+ * is not created, and then tell who pay attention to me. -+ */ -+ p->quit = true; -+ qemu_sem_post(&multifd_send_state->channels_ready); -+ qemu_sem_post(&p->sem_sync); -+ } - } - - static void *multifd_tls_handshake_thread(void *opaque) --- -2.27.0 - diff --git a/migration-tls-add-support-for-multifd-tls-handshake.patch b/migration-tls-add-support-for-multifd-tls-handshake.patch deleted file mode 100644 index f81bb6194cdd98b8a83046cc6cfc831d108d5aae..0000000000000000000000000000000000000000 --- a/migration-tls-add-support-for-multifd-tls-handshake.patch +++ /dev/null @@ -1,125 +0,0 @@ -From e283c7dab15fed5af2904480230f86cf81b67aed Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 11:38:37 +0800 -Subject: [PATCH] migration/tls: add support for multifd tls-handshake -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Similar like migration main thread, we need to do handshake -for each multifd thread. - -Signed-off-by: Chuan Zheng -Signed-off-by: Yan Jin -Reviewed-by: Daniel P. Berrangé -Message-Id: <1600139042-104593-6-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 75 insertions(+), 2 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 2b9d00745c..b82c0e6562 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -38,6 +38,7 @@ - #include "ram.h" - #include "migration.h" - #include "socket.h" -+#include "tls.h" - #include "migration/register.h" - #include "migration/misc.h" - #include "qemu-file.h" -@@ -1200,6 +1201,77 @@ out: - return NULL; - } - -+static bool multifd_channel_connect(MultiFDSendParams *p, -+ QIOChannel *ioc, -+ Error *error); -+ -+static void multifd_tls_outgoing_handshake(QIOTask *task, -+ gpointer opaque) -+{ -+ MultiFDSendParams *p = opaque; -+ QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); -+ Error *err = NULL; -+ -+ qio_task_propagate_error(task, &err); -+ multifd_channel_connect(p, ioc, err); -+} -+ -+static void multifd_tls_channel_connect(MultiFDSendParams *p, -+ QIOChannel *ioc, -+ Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ const char *hostname = p->tls_hostname; -+ QIOChannelTLS *tioc; -+ -+ tioc = migration_tls_client_create(s, ioc, hostname, errp); -+ if (!tioc) { -+ return; -+ } -+ -+ qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); -+ qio_channel_tls_handshake(tioc, -+ multifd_tls_outgoing_handshake, -+ p, -+ NULL, -+ NULL); -+ -+} -+ -+static bool multifd_channel_connect(MultiFDSendParams *p, -+ QIOChannel *ioc, -+ Error *error) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ if (!error) { -+ if (s->parameters.tls_creds && -+ *s->parameters.tls_creds && -+ !object_dynamic_cast(OBJECT(ioc), -+ TYPE_QIO_CHANNEL_TLS)) { -+ multifd_tls_channel_connect(p, ioc, &error); -+ if (!error) { -+ /* -+ * tls_channel_connect will call back to this -+ * function after the TLS handshake, -+ * so we mustn't call multifd_send_thread until then -+ */ -+ return false; -+ } else { -+ return true; -+ } -+ } else { -+ /* update for tls qio channel */ -+ p->c = ioc; -+ qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, -+ QEMU_THREAD_JOINABLE); -+ } -+ return false; -+ } -+ -+ return true; -+} -+ - static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, - QIOChannel *ioc, Error *err) - { -@@ -1229,8 +1301,9 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); - p->running = true; -- qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, -- QEMU_THREAD_JOINABLE); -+ if (multifd_channel_connect(p, sioc, local_err)) { -+ goto cleanup; -+ } - return; - } - --- -2.27.0 - diff --git a/migration-tls-add-tls_hostname-into-MultiFDSendParam.patch b/migration-tls-add-tls_hostname-into-MultiFDSendParam.patch deleted file mode 100644 index 3b06a42fad428d20035072601afbcd139b77c291..0000000000000000000000000000000000000000 --- a/migration-tls-add-tls_hostname-into-MultiFDSendParam.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 0aff29297923b32e919ce944030a043e0826d9aa Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 11:25:44 +0800 -Subject: [PATCH] migration/tls: add tls_hostname into MultiFDSendParams -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Since multifd creation is async with migration_channel_connect, we should -pass the hostname from MigrationState to MultiFDSendParams. - -Signed-off-by: Chuan Zheng -Signed-off-by: Yan Jin -Message-Id: <1600139042-104593-4-git-send-email-zhengchuan@huawei.com> -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 1a33c7b3e2..bb8f383c3b 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -621,6 +621,8 @@ typedef struct { - uint8_t id; - /* channel thread name */ - char *name; -+ /* tls hostname */ -+ char *tls_hostname; - /* channel thread id */ - QemuThread thread; - /* communication channel */ -@@ -1041,6 +1043,8 @@ void multifd_save_cleanup(void) - qemu_sem_destroy(&p->sem_sync); - g_free(p->name); - p->name = NULL; -+ g_free(p->tls_hostname); -+ p->tls_hostname = NULL; - multifd_pages_clear(p->pages); - p->pages = NULL; - p->packet_len = 0; -@@ -1229,10 +1233,12 @@ int multifd_save_setup(void) - int thread_count; - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); - uint8_t i; -+ MigrationState *s; - - if (!migrate_use_multifd()) { - return 0; - } -+ s = migrate_get_current(); - thread_count = migrate_multifd_channels(); - multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); - multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); -@@ -1253,6 +1259,7 @@ int multifd_save_setup(void) - + sizeof(ram_addr_t) * page_count; - p->packet = g_malloc0(p->packet_len); - p->name = g_strdup_printf("multifdsend_%d", i); -+ p->tls_hostname = g_strdup(s->hostname); - socket_send_channel_create(multifd_new_send_channel_async, p); - } - return 0; --- -2.27.0 - diff --git a/migration-tls-add-trace-points-for-multifd-tls.patch b/migration-tls-add-trace-points-for-multifd-tls.patch deleted file mode 100644 index a49ef1faad30e725b45c01e10812df9b7b72b7b3..0000000000000000000000000000000000000000 --- a/migration-tls-add-trace-points-for-multifd-tls.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 83cbd3a645e9376a25cd359e8f12f8db025bf071 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 13:56:11 +0800 -Subject: [PATCH] migration/tls: add trace points for multifd-tls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -add trace points for multifd-tls for debug. - -Signed-off-by: Chuan Zheng -Signed-off-by: Yan Jin -Reviewed-by: Daniel P. Berrangé -Message-Id: <1600139042-104593-7-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 10 +++++++++- - migration/trace-events | 4 ++++ - 2 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index b82c0e6562..3ded38c0be 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1212,7 +1212,11 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, - QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); - Error *err = NULL; - -- qio_task_propagate_error(task, &err); -+ if (qio_task_propagate_error(task, &err)) { -+ trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); -+ } else { -+ trace_multifd_tls_outgoing_handshake_complete(ioc); -+ } - multifd_channel_connect(p, ioc, err); - } - -@@ -1229,6 +1233,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, - return; - } - -+ trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); - qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); - qio_channel_tls_handshake(tioc, - multifd_tls_outgoing_handshake, -@@ -1244,6 +1249,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p, - { - MigrationState *s = migrate_get_current(); - -+ trace_multifd_set_outgoing_channel( -+ ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); -+ - if (!error) { - if (s->parameters.tls_creds && - *s->parameters.tls_creds && -diff --git a/migration/trace-events b/migration/trace-events -index 69620c43c2..c0640cd424 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -93,6 +93,10 @@ multifd_send_sync_main_signal(uint8_t id) "channel %d" - multifd_send_sync_main_wait(uint8_t id) "channel %d" - multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 - multifd_send_thread_start(uint8_t id) "%d" -+multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" -+multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" -+multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" -+multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname, void *err) "ioc=%p ioctype=%s hostname=%s err=%p" - ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" - ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p" - ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" --- -2.27.0 - diff --git a/migration-tls-extract-cleanup-function-for-common-us.patch b/migration-tls-extract-cleanup-function-for-common-us.patch deleted file mode 100644 index 5ac83e9200020300060317065bfbfeca0ebf84e2..0000000000000000000000000000000000000000 --- a/migration-tls-extract-cleanup-function-for-common-us.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 29914b97b20a6415476095c913607412a3f7572f Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 11:32:44 +0800 -Subject: [PATCH] migration/tls: extract cleanup function for common-use -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -multifd channel cleanup is need if multifd handshake failed, -let's extract it. - -Signed-off-by: Chuan Zheng -Signed-off-by: Yan Jin -Reviewed-by: Daniel P. Berrangé -Message-Id: <1600139042-104593-5-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 34 ++++++++++++++++++++++------------ - 1 file changed, 22 insertions(+), 12 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index bb8f383c3b..2b9d00745c 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1200,6 +1200,23 @@ out: - return NULL; - } - -+static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, -+ QIOChannel *ioc, Error *err) -+{ -+ migrate_set_error(migrate_get_current(), err); -+ /* Error happen, we need to tell who pay attention to me */ -+ qemu_sem_post(&multifd_send_state->channels_ready); -+ qemu_sem_post(&p->sem_sync); -+ /* -+ * Although multifd_send_thread is not created, but main migration -+ * thread neet to judge whether it is running, so we need to mark -+ * its status. -+ */ -+ p->quit = true; -+ object_unref(OBJECT(ioc)); -+ error_free(err); -+} -+ - static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - { - MultiFDSendParams *p = opaque; -@@ -1207,25 +1224,18 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - Error *local_err = NULL; - - if (qio_task_propagate_error(task, &local_err)) { -- migrate_set_error(migrate_get_current(), local_err); -- /* Error happen, we need to tell who pay attention to me */ -- qemu_sem_post(&multifd_send_state->channels_ready); -- qemu_sem_post(&p->sem_sync); -- /* -- * Although multifd_send_thread is not created, but main migration -- * thread neet to judge whether it is running, so we need to mark -- * its status. -- */ -- p->quit = true; -- object_unref(OBJECT(sioc)); -- error_free(local_err); -+ goto cleanup; - } else { - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); - p->running = true; - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, - QEMU_THREAD_JOINABLE); -+ return; - } -+ -+cleanup: -+ multifd_new_send_channel_cleanup(p, sioc, local_err); - } - - int multifd_save_setup(void) --- -2.27.0 - diff --git a/migration-tls-extract-migration_tls_client_create-fo.patch b/migration-tls-extract-migration_tls_client_create-fo.patch deleted file mode 100644 index 7f538332241d60313f87533cff62785c11e98f39..0000000000000000000000000000000000000000 --- a/migration-tls-extract-migration_tls_client_create-fo.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 4ffa2ea3749066a0444b69ef16ec4e4d6cdad0e1 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Tue, 15 Sep 2020 11:03:58 +0800 -Subject: [PATCH] migration/tls: extract migration_tls_client_create for - common-use -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -migration_tls_client_create will be used in multifd-tls, let's -extract it. - -Signed-off-by: Chuan Zheng -Signed-off-by: Yan Jin -Reviewed-by: Daniel P. Berrangé -Message-Id: <1600139042-104593-3-git-send-email-zhengchuan@huawei.com> -Signed-off-by: Dr. David Alan Gilbert ---- - migration/tls.c | 26 ++++++++++++++++++-------- - migration/tls.h | 6 ++++++ - 2 files changed, 24 insertions(+), 8 deletions(-) - -diff --git a/migration/tls.c b/migration/tls.c -index a0eb553e14..1d5b571d8e 100644 ---- a/migration/tls.c -+++ b/migration/tls.c -@@ -22,7 +22,6 @@ - #include "channel.h" - #include "migration.h" - #include "tls.h" --#include "io/channel-tls.h" - #include "crypto/tlscreds.h" - #include "qemu/error-report.h" - #include "qapi/error.h" -@@ -126,11 +125,10 @@ static void migration_tls_outgoing_handshake(QIOTask *task, - object_unref(OBJECT(ioc)); - } - -- --void migration_tls_channel_connect(MigrationState *s, -- QIOChannel *ioc, -- const char *hostname, -- Error **errp) -+QIOChannelTLS *migration_tls_client_create(MigrationState *s, -+ QIOChannel *ioc, -+ const char *hostname, -+ Error **errp) - { - QCryptoTLSCreds *creds; - QIOChannelTLS *tioc; -@@ -138,7 +136,7 @@ void migration_tls_channel_connect(MigrationState *s, - creds = migration_tls_get_creds( - s, QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, errp); - if (!creds) { -- return; -+ return NULL; - } - - if (s->parameters.tls_hostname && *s->parameters.tls_hostname) { -@@ -146,11 +144,23 @@ void migration_tls_channel_connect(MigrationState *s, - } - if (!hostname) { - error_setg(errp, "No hostname available for TLS"); -- return; -+ return NULL; - } - - tioc = qio_channel_tls_new_client( - ioc, creds, hostname, errp); -+ -+ return tioc; -+} -+ -+void migration_tls_channel_connect(MigrationState *s, -+ QIOChannel *ioc, -+ const char *hostname, -+ Error **errp) -+{ -+ QIOChannelTLS *tioc; -+ -+ tioc = migration_tls_client_create(s, ioc, hostname, errp); - if (!tioc) { - return; - } -diff --git a/migration/tls.h b/migration/tls.h -index cdd70001ed..0cfbe368ba 100644 ---- a/migration/tls.h -+++ b/migration/tls.h -@@ -22,11 +22,17 @@ - #define QEMU_MIGRATION_TLS_H - - #include "io/channel.h" -+#include "io/channel-tls.h" - - void migration_tls_channel_process_incoming(MigrationState *s, - QIOChannel *ioc, - Error **errp); - -+QIOChannelTLS *migration_tls_client_create(MigrationState *s, -+ QIOChannel *ioc, -+ const char *hostname, -+ Error **errp); -+ - void migration_tls_channel_connect(MigrationState *s, - QIOChannel *ioc, - const char *hostname, --- -2.27.0 - diff --git a/migration-tls-fix-inverted-semantics-in-multifd_chan.patch b/migration-tls-fix-inverted-semantics-in-multifd_chan.patch deleted file mode 100644 index 3f5a52aa6db6d5ecd034a1fc2f98c994b84ade64..0000000000000000000000000000000000000000 --- a/migration-tls-fix-inverted-semantics-in-multifd_chan.patch +++ /dev/null @@ -1,55 +0,0 @@ -From ee0d1b508a144ab390fb7bc8b7a4fe3161aebecf Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Fri, 5 Mar 2021 16:09:29 +0800 -Subject: [PATCH] migration/tls: fix inverted semantics in - multifd_channel_connect - -Function multifd_channel_connect() return "true" to indicate failure, -which is rather confusing. Fix that. - -Signed-off-by: Hao Wang ---- - migration/ram.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index ba1e729c39..3338363e9d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1575,9 +1575,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p, - * function after the TLS handshake, - * so we mustn't call multifd_send_thread until then - */ -- return false; -- } else { - return true; -+ } else { -+ return false; - } - } else { - /* update for tls qio channel */ -@@ -1585,10 +1585,10 @@ static bool multifd_channel_connect(MultiFDSendParams *p, - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, - QEMU_THREAD_JOINABLE); - } -- return false; -+ return true; - } - -- return true; -+ return false; - } - - static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, -@@ -1620,7 +1620,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); - p->running = true; -- if (multifd_channel_connect(p, sioc, local_err)) { -+ if (!multifd_channel_connect(p, sioc, local_err)) { - goto cleanup; - } - return; --- -2.27.0 - diff --git a/migration-tls-save-hostname-into-MigrationState.patch b/migration-tls-save-hostname-into-MigrationState.patch deleted file mode 100644 index 538a8f69179d536f2f5bc307a66d4c900c5fd790..0000000000000000000000000000000000000000 --- a/migration-tls-save-hostname-into-MigrationState.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 08ae1eda02ff08b3431b227ed702ea0fc5f8a4a2 Mon Sep 17 00:00:00 2001 -From: Chuan Zheng -Date: Tue, 15 Sep 2020 11:03:57 +0800 -Subject: [PATCH] migration/tls: save hostname into MigrationState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -hostname is need in multifd-tls, save hostname into MigrationState. - -Signed-off-by: Chuan Zheng -Signed-off-by: Yan Jin -Message-Id: <1600139042-104593-2-git-send-email-zhengchuan@huawei.com> -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert ---- - migration/channel.c | 1 + - migration/migration.c | 1 + - migration/migration.h | 5 +++++ - migration/tls.c | 2 ++ - 4 files changed, 9 insertions(+) - -diff --git a/migration/channel.c b/migration/channel.c -index 7462181484..46ed40b89c 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -99,5 +99,6 @@ void migration_channel_connect(MigrationState *s, - } - } - migrate_fd_connect(s, error); -+ g_free(s->hostname); - error_free(error); - } -diff --git a/migration/migration.c b/migration/migration.c -index 7949f2a40b..993d77b7d6 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1710,6 +1710,7 @@ void migrate_init(MigrationState *s) - s->migration_thread_running = false; - error_free(s->error); - s->error = NULL; -+ s->hostname = NULL; - - migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); - -diff --git a/migration/migration.h b/migration/migration.h -index feb344306a..e5aaf2ef70 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -259,6 +259,11 @@ struct MigrationState - * (which is in 4M chunk). - */ - uint8_t clear_bitmap_shift; -+ -+ /* -+ * This save hostname when out-going migration starts -+ */ -+ char *hostname; - }; - - void migrate_set_state(int *state, int old_state, int new_state); -diff --git a/migration/tls.c b/migration/tls.c -index 5171afc6c4..a0eb553e14 100644 ---- a/migration/tls.c -+++ b/migration/tls.c -@@ -155,6 +155,8 @@ void migration_tls_channel_connect(MigrationState *s, - return; - } - -+ /* Save hostname into MigrationState for handshake */ -+ s->hostname = g_strdup(hostname); - trace_migration_tls_outgoing_handshake_start(hostname); - qio_channel_set_name(QIO_CHANNEL(tioc), "migration-tls-outgoing"); - qio_channel_tls_handshake(tioc, --- -2.27.0 - diff --git a/migration-update-ram_counters-for-multifd-sync-packe.patch b/migration-update-ram_counters-for-multifd-sync-packe.patch deleted file mode 100644 index 838380403f1ac31df0f2befd62a14711cee71e58..0000000000000000000000000000000000000000 --- a/migration-update-ram_counters-for-multifd-sync-packe.patch +++ /dev/null @@ -1,35 +0,0 @@ -From e93040851d683f1f7750acfa0e862b4405678f24 Mon Sep 17 00:00:00 2001 -From: Zheng Chuan -Date: Fri, 24 Apr 2020 11:50:41 +0800 -Subject: [PATCH 04/10] migration: update ram_counters for multifd sync packet - -Multifd sync will send MULTIFD_FLAG_SYNC flag info to destination, add -these bytes to ram_counters record. - -Change-Id: I885166f412f58e74de40ea6ffec1c35e82ae4619 -Signed-off-by: Ivan Ren -Suggested-by: Wei Yang -Message-Id: <1564464816-21804-4-git-send-email-ivanren@tencent.com> -Reviewed-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 88ddd2bb..c75716bb 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1085,6 +1085,10 @@ static void multifd_send_sync_main(RAMState *rs) - p->flags |= MULTIFD_FLAG_SYNC; - p->pending_job++; - qemu_file_update_transfer(rs->f, p->packet_len); -+ ram_counters.multifd_bytes += p->packet_len; -+ ram_counters.transferred += p->packet_len; -+ ram_counters.multifd_bytes += p->packet_len; -+ ram_counters.transferred += p->packet_len; - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - } --- -2.19.1 diff --git a/migration-use-migration_is_active-to-represent-activ.patch b/migration-use-migration_is_active-to-represent-activ.patch deleted file mode 100644 index c9e926ad8e16dcfe9c931fae96cc9b8ad8e0cb93..0000000000000000000000000000000000000000 --- a/migration-use-migration_is_active-to-represent-activ.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 9662d44633dd4582dc47d58f63ee63b2c8f60a4f Mon Sep 17 00:00:00 2001 -From: Wei Yang -Date: Wed, 17 Jul 2019 08:53:41 +0800 -Subject: [PATCH] migration: use migration_is_active to represent active state - -Wrap the check into a function to make it easy to read. - -Signed-off-by: Wei Yang -Message-Id: <20190717005341.14140-1-richardw.yang@linux.intel.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert ---- - include/migration/misc.h | 1 + - migration/migration.c | 12 ++++++++---- - 2 files changed, 9 insertions(+), 4 deletions(-) - -diff --git a/include/migration/misc.h b/include/migration/misc.h -index 5cdbabd094..42d6abc920 100644 ---- a/include/migration/misc.h -+++ b/include/migration/misc.h -@@ -61,6 +61,7 @@ void migration_object_init(void); - void migration_shutdown(void); - void qemu_start_incoming_migration(const char *uri, Error **errp); - bool migration_is_idle(void); -+bool migration_is_active(MigrationState *); - void add_migration_state_change_notifier(Notifier *notify); - void remove_migration_state_change_notifier(Notifier *notify); - bool migration_in_setup(MigrationState *); -diff --git a/migration/migration.c b/migration/migration.c -index 9b40380d7c..fd7d81d4b6 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1578,8 +1578,7 @@ static void migrate_fd_cleanup(MigrationState *s) - qemu_fclose(tmp); - } - -- assert((s->state != MIGRATION_STATUS_ACTIVE) && -- (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); -+ assert(!migration_is_active(s)); - - if (s->state == MIGRATION_STATUS_CANCELLING) { - migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, -@@ -1741,6 +1740,12 @@ bool migration_is_idle(void) - return false; - } - -+bool migration_is_active(MigrationState *s) -+{ -+ return (s->state == MIGRATION_STATUS_ACTIVE || -+ s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); -+} -+ - void migrate_init(MigrationState *s) - { - /* -@@ -3307,8 +3312,7 @@ static void *migration_thread(void *opaque) - - trace_migration_thread_setup_complete(); - -- while (s->state == MIGRATION_STATUS_ACTIVE || -- s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { -+ while (migration_is_active(s)) { - int64_t current_time; - - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { --- -2.27.0 - diff --git a/migration.json-add-AMD-SEV-specific-migration-parame.patch b/migration.json-add-AMD-SEV-specific-migration-parame.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc1d6655b6f51fbe1853ce835e4fa4bddccfb83b --- /dev/null +++ b/migration.json-add-AMD-SEV-specific-migration-parame.patch @@ -0,0 +1,265 @@ +From 5ff59a5649385672da42097b24a2428bc2348d9b Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 11:27:00 +0000 +Subject: [PATCH] migration.json: add AMD SEV specific migration parameters + +cherry-picked from https://github.com/AMDESE/qemu/commit/d6a23bde6b6e. + +AMD SEV migration flow requires that target machine's public Diffie-Hellman +key (PDH) and certificate chain must be passed before initiating the guest +migration. User can use QMP 'migrate-set-parameters' to pass the certificate +chain. The certificate chain will be used while creating the outgoing +encryption context. + +Signed-off-by: Brijesh Singh +Signed-off-by: Ashish Kalra +[ Fix conflicts and qapi errors. ] +Signed-off-by: hanliyang +--- + migration/migration-hmp-cmds.c | 28 ++++++++++++++++ + migration/options.c | 60 ++++++++++++++++++++++++++++++++++ + qapi/migration.json | 41 +++++++++++++++++++++-- + 3 files changed, 126 insertions(+), 3 deletions(-) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 1fa6a5f478..7ce0446d46 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -395,6 +395,19 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_MODE), + qapi_enum_lookup(&MigMode_lookup, params->mode)); ++ ++ assert(params->sev_pdh); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_SEV_PDH), ++ params->sev_pdh); ++ assert(params->sev_plat_cert); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_SEV_PLAT_CERT), ++ params->sev_plat_cert); ++ assert(params->sev_amd_cert); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_SEV_AMD_CERT), ++ params->sev_amd_cert); + } + + qapi_free_MigrationParameters(params); +@@ -691,6 +704,21 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_mode = true; + visit_type_MigMode(v, param, &p->mode, &err); + break; ++ case MIGRATION_PARAMETER_SEV_PDH: ++ p->sev_pdh = g_new0(StrOrNull, 1); ++ p->sev_pdh->type = QTYPE_QSTRING; ++ visit_type_str(v, param, &p->sev_pdh->u.s, &err); ++ break; ++ case MIGRATION_PARAMETER_SEV_PLAT_CERT: ++ p->sev_plat_cert = g_new0(StrOrNull, 1); ++ p->sev_plat_cert->type = QTYPE_QSTRING; ++ visit_type_str(v, param, &p->sev_plat_cert->u.s, &err); ++ break; ++ case MIGRATION_PARAMETER_SEV_AMD_CERT: ++ p->sev_amd_cert = g_new0(StrOrNull, 1); ++ p->sev_amd_cert->type = QTYPE_QSTRING; ++ visit_type_str(v, param, &p->sev_amd_cert->u.s, &err); ++ break; + default: + assert(0); + } +diff --git a/migration/options.c b/migration/options.c +index 9b68962a65..71e71ea801 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -183,6 +183,9 @@ Property migration_properties[] = { + DEFINE_PROP_MIG_MODE("mode", MigrationState, + parameters.mode, + MIG_MODE_NORMAL), ++ DEFINE_PROP_STRING("sev-pdh", MigrationState, parameters.sev_pdh), ++ DEFINE_PROP_STRING("sev-plat-cert", MigrationState, parameters.sev_plat_cert), ++ DEFINE_PROP_STRING("sev-amd-cert", MigrationState, parameters.sev_amd_cert), + + /* Migration capabilities */ + DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), +@@ -1012,6 +1015,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->announce_rounds = s->parameters.announce_rounds; + params->has_announce_step = true; + params->announce_step = s->parameters.announce_step; ++ params->sev_pdh = g_strdup(s->parameters.sev_pdh); ++ params->sev_plat_cert = g_strdup(s->parameters.sev_plat_cert); ++ params->sev_amd_cert = g_strdup(s->parameters.sev_amd_cert); + + if (s->parameters.has_block_bitmap_mapping) { + params->has_block_bitmap_mapping = true; +@@ -1063,6 +1069,10 @@ void migrate_params_init(MigrationParameters *params) + params->has_x_vcpu_dirty_limit_period = true; + params->has_vcpu_dirty_limit = true; + params->has_mode = true; ++ ++ params->sev_pdh = g_strdup(""); ++ params->sev_plat_cert = g_strdup(""); ++ params->sev_amd_cert = g_strdup(""); + } + + static bool compress_level_check(MigrationParameters *params, Error **errp) +@@ -1392,6 +1402,19 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_mode) { + dest->mode = params->mode; + } ++ ++ if (params->sev_pdh) { ++ assert(params->sev_pdh->type == QTYPE_QSTRING); ++ dest->sev_pdh = params->sev_pdh->u.s; ++ } ++ if (params->sev_plat_cert) { ++ assert(params->sev_plat_cert->type == QTYPE_QSTRING); ++ dest->sev_plat_cert = params->sev_plat_cert->u.s; ++ } ++ if (params->sev_amd_cert) { ++ assert(params->sev_amd_cert->type == QTYPE_QSTRING); ++ dest->sev_amd_cert = params->sev_amd_cert->u.s; ++ } + } + + static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +@@ -1540,6 +1563,22 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_mode) { + s->parameters.mode = params->mode; + } ++ ++ if (params->sev_pdh) { ++ g_free(s->parameters.sev_pdh); ++ assert(params->sev_pdh->type == QTYPE_QSTRING); ++ s->parameters.sev_pdh = g_strdup(params->sev_pdh->u.s); ++ } ++ if (params->sev_plat_cert) { ++ g_free(s->parameters.sev_plat_cert); ++ assert(params->sev_plat_cert->type == QTYPE_QSTRING); ++ s->parameters.sev_plat_cert = g_strdup(params->sev_plat_cert->u.s); ++ } ++ if (params->sev_amd_cert) { ++ g_free(s->parameters.sev_amd_cert); ++ assert(params->sev_amd_cert->type == QTYPE_QSTRING); ++ s->parameters.sev_amd_cert = g_strdup(params->sev_amd_cert->u.s); ++ } + } + + void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) +@@ -1565,6 +1604,27 @@ void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) + params->tls_authz->type = QTYPE_QSTRING; + params->tls_authz->u.s = strdup(""); + } ++ /* TODO Rewrite "" to null instead */ ++ if (params->sev_pdh ++ && params->sev_pdh->type == QTYPE_QNULL) { ++ qobject_unref(params->sev_pdh->u.n); ++ params->sev_pdh->type = QTYPE_QSTRING; ++ params->sev_pdh->u.s = strdup(""); ++ } ++ /* TODO Rewrite "" to null instead */ ++ if (params->sev_plat_cert ++ && params->sev_plat_cert->type == QTYPE_QNULL) { ++ qobject_unref(params->sev_plat_cert->u.n); ++ params->sev_plat_cert->type = QTYPE_QSTRING; ++ params->sev_plat_cert->u.s = strdup(""); ++ } ++ /* TODO Rewrite "" to null instead */ ++ if (params->sev_amd_cert ++ && params->sev_amd_cert->type == QTYPE_QNULL) { ++ qobject_unref(params->sev_amd_cert->u.n); ++ params->sev_amd_cert->type = QTYPE_QSTRING; ++ params->sev_amd_cert->u.s = strdup(""); ++ } + + migrate_params_test_apply(params, &tmp); + +diff --git a/qapi/migration.json b/qapi/migration.json +index 5d0855a1d8..038e99cba3 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -891,6 +891,15 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @sev-pdh: The target host platform diffie-hellman key encoded in base64 ++# (Since 4.2) ++# ++# @sev-plat-cert: The target host platform certificate chain encoded in base64 ++# (Since 4.2) ++# ++# @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in ++# base64 (Since 4.2) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -925,7 +934,8 @@ + 'block-bitmap-mapping', + { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, + 'vcpu-dirty-limit', +- 'mode'] } ++ 'mode', ++ 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert'] } + + ## + # @MigrateSetParameters: +@@ -1083,6 +1093,15 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @sev-pdh: The target host platform diffie-hellman key encoded in base64 ++# (Since 4.2) ++# ++# @sev-plat-cert: The target host platform certificate chain encoded in base64 ++# (Since 4.2) ++# ++# @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in ++# base64 (Since 4.2) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -1139,7 +1158,11 @@ + '*x-vcpu-dirty-limit-period': { 'type': 'uint64', + 'features': [ 'unstable' ] }, + '*vcpu-dirty-limit': 'uint64', +- '*mode': 'MigMode'} } ++ '*mode': 'MigMode', ++ '*sev-pdh': 'StrOrNull', ++ '*sev-plat-cert': 'StrOrNull', ++ '*sev-amd-cert' : 'StrOrNull' } } ++ + + ## + # @migrate-set-parameters: +@@ -1317,6 +1340,15 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @sev-pdh: The target host platform diffie-hellman key encoded in base64 ++# (Since 4.2) ++# ++# @sev-plat-cert: The target host platform certificate chain encoded in base64 ++# (Since 4.2) ++# ++# @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in ++# base64 (Since 4.2) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -1369,7 +1401,10 @@ + '*x-vcpu-dirty-limit-period': { 'type': 'uint64', + 'features': [ 'unstable' ] }, + '*vcpu-dirty-limit': 'uint64', +- '*mode': 'MigMode'} } ++ '*mode': 'MigMode', ++ '*sev-pdh': 'str', ++ '*sev-plat-cert': 'str', ++ '*sev-amd-cert' : 'str'} } + + ## + # @query-migrate-parameters: +-- +2.41.0.windows.1 + diff --git a/mirror-Do-not-dereference-invalid-pointers.patch b/mirror-Do-not-dereference-invalid-pointers.patch deleted file mode 100644 index b83e0695e5ff96a429e26f65b2c206eebb9b3f86..0000000000000000000000000000000000000000 --- a/mirror-Do-not-dereference-invalid-pointers.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 609aad11051c6f2053cc32b4881f5581c92435f3 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 14 Oct 2019 17:39:28 +0200 -Subject: [PATCH] mirror: Do not dereference invalid pointers - -mirror_exit_common() may be called twice (if it is called from -mirror_prepare() and fails, it will be called from mirror_abort() -again). - -In such a case, many of the pointers in the MirrorBlockJob object will -already be freed. This can be seen most reliably for s->target, which -is set to NULL (and then dereferenced by blk_bs()). - -Cc: qemu-stable@nongnu.org -Fixes: 737efc1eda23b904fbe0e66b37715fb0e5c3e58b -Signed-off-by: Max Reitz -Reviewed-by: John Snow -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-id: 20191014153931.20699-2-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit f93c3add3a773e0e3f6277e5517583c4ad3a43c2) -Signed-off-by: Michael Roth ---- - block/mirror.c | 13 +++++++++---- - 1 file changed, 9 insertions(+), 4 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 062dc42..408486c 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -617,11 +617,11 @@ static int mirror_exit_common(Job *job) - { - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - BlockJob *bjob = &s->common; -- MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque; -+ MirrorBDSOpaque *bs_opaque; - AioContext *replace_aio_context = NULL; -- BlockDriverState *src = s->mirror_top_bs->backing->bs; -- BlockDriverState *target_bs = blk_bs(s->target); -- BlockDriverState *mirror_top_bs = s->mirror_top_bs; -+ BlockDriverState *src; -+ BlockDriverState *target_bs; -+ BlockDriverState *mirror_top_bs; - Error *local_err = NULL; - bool abort = job->ret < 0; - int ret = 0; -@@ -631,6 +631,11 @@ static int mirror_exit_common(Job *job) - } - s->prepared = true; - -+ mirror_top_bs = s->mirror_top_bs; -+ bs_opaque = mirror_top_bs->opaque; -+ src = mirror_top_bs->backing->bs; -+ target_bs = blk_bs(s->target); -+ - if (bdrv_chain_contains(src, target_bs)) { - bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs); - } --- -1.8.3.1 - diff --git a/mirror-Fix-bdrv_has_zero_init-use.patch b/mirror-Fix-bdrv_has_zero_init-use.patch deleted file mode 100644 index 54fde6927f378bdddbe92495f8b5616dfd3f6953..0000000000000000000000000000000000000000 --- a/mirror-Fix-bdrv_has_zero_init-use.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 7fcb1c1a956a8cad5c2e8585e53878edc4fd9eca Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Wed, 24 Jul 2019 19:12:30 +0200 -Subject: [PATCH] mirror: Fix bdrv_has_zero_init() use - -bdrv_has_zero_init() only has meaning for newly created images or image -areas. If the mirror job itself did not create the image, it cannot -rely on bdrv_has_zero_init()'s result to carry any meaning. - -This is the case for drive-mirror with mode=existing and always for -blockdev-mirror. - -Note that we only have to zero-initialize the target with sync=full, -because other modes actually do not promise that the target will contain -the same data as the source after the job -- sync=top only promises to -copy anything allocated in the top layer, and sync=none will only copy -new I/O. (Which is how mirror has always handled it.) - -Signed-off-by: Max Reitz -Message-id: 20190724171239.8764-3-mreitz@redhat.com -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz ---- - block/mirror.c | 11 ++++++++--- - blockdev.c | 16 +++++++++++++--- - include/block/block_int.h | 2 ++ - tests/test-block-iothread.c | 2 +- - 4 files changed, 24 insertions(+), 7 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index ccae49a28e..89a053b265 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -51,6 +51,8 @@ typedef struct MirrorBlockJob { - Error *replace_blocker; - bool is_none_mode; - BlockMirrorBackingMode backing_mode; -+ /* Whether the target image requires explicit zero-initialization */ -+ bool zero_target; - MirrorCopyMode copy_mode; - BlockdevOnError on_source_error, on_target_error; - bool synced; -@@ -779,7 +781,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) - int ret; - int64_t count; - -- if (base == NULL && !bdrv_has_zero_init(target_bs)) { -+ if (s->zero_target) { - if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); - return 0; -@@ -1531,6 +1533,7 @@ static BlockJob *mirror_start_job( - const char *replaces, int64_t speed, - uint32_t granularity, int64_t buf_size, - BlockMirrorBackingMode backing_mode, -+ bool zero_target, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, -@@ -1658,6 +1661,7 @@ static BlockJob *mirror_start_job( - s->on_target_error = on_target_error; - s->is_none_mode = is_none_mode; - s->backing_mode = backing_mode; -+ s->zero_target = zero_target; - s->copy_mode = copy_mode; - s->base = base; - s->granularity = granularity; -@@ -1762,6 +1766,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, -+ bool zero_target, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, -@@ -1779,7 +1784,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - is_none_mode = mode == MIRROR_SYNC_MODE_NONE; - base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; - mirror_start_job(job_id, bs, creation_flags, target, replaces, -- speed, granularity, buf_size, backing_mode, -+ speed, granularity, buf_size, backing_mode, zero_target, - on_source_error, on_target_error, unmap, NULL, NULL, - &mirror_job_driver, is_none_mode, base, false, - filter_node_name, true, copy_mode, errp); -@@ -1806,7 +1811,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - - ret = mirror_start_job( - job_id, bs, creation_flags, base, NULL, speed, 0, 0, -- MIRROR_LEAVE_BACKING_CHAIN, -+ MIRROR_LEAVE_BACKING_CHAIN, false, - on_error, on_error, true, cb, opaque, - &commit_active_job_driver, false, base, auto_complete, - filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, -diff --git a/blockdev.c b/blockdev.c -index 94e5aee30b..4435795b6d 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3739,6 +3739,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - bool has_replaces, const char *replaces, - enum MirrorSyncMode sync, - BlockMirrorBackingMode backing_mode, -+ bool zero_target, - bool has_speed, int64_t speed, - bool has_granularity, uint32_t granularity, - bool has_buf_size, int64_t buf_size, -@@ -3847,7 +3848,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - */ - mirror_start(job_id, bs, target, - has_replaces ? replaces : NULL, job_flags, -- speed, granularity, buf_size, sync, backing_mode, -+ speed, granularity, buf_size, sync, backing_mode, zero_target, - on_source_error, on_target_error, unmap, filter_node_name, - copy_mode, errp); - } -@@ -3863,6 +3864,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - int flags; - int64_t size; - const char *format = arg->format; -+ bool zero_target; - int ret; - - bs = qmp_get_root_bs(arg->device, errp); -@@ -3964,6 +3966,10 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - goto out; - } - -+ zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL && -+ (arg->mode == NEW_IMAGE_MODE_EXISTING || -+ !bdrv_has_zero_init(target_bs))); -+ - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); - if (ret < 0) { - bdrv_unref(target_bs); -@@ -3972,7 +3978,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - - blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, - arg->has_replaces, arg->replaces, arg->sync, -- backing_mode, arg->has_speed, arg->speed, -+ backing_mode, zero_target, -+ arg->has_speed, arg->speed, - arg->has_granularity, arg->granularity, - arg->has_buf_size, arg->buf_size, - arg->has_on_source_error, arg->on_source_error, -@@ -4012,6 +4019,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - AioContext *aio_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - Error *local_err = NULL; -+ bool zero_target; - int ret; - - bs = qmp_get_root_bs(device, errp); -@@ -4024,6 +4032,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - return; - } - -+ zero_target = (sync == MIRROR_SYNC_MODE_FULL); -+ - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - -@@ -4034,7 +4044,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - - blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs, - has_replaces, replaces, sync, backing_mode, -- has_speed, speed, -+ zero_target, has_speed, speed, - has_granularity, granularity, - has_buf_size, buf_size, - has_on_source_error, on_source_error, -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 76117a761a..154b9b5501 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1120,6 +1120,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - * @buf_size: The amount of data that can be in flight at one time. - * @mode: Whether to collapse all images in the chain to the target. - * @backing_mode: How to establish the target's backing chain after completion. -+ * @zero_target: Whether the target should be explicitly zero-initialized - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. - * @unmap: Whether to unmap target where source sectors only contain zeroes. -@@ -1139,6 +1140,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, -+ bool zero_target, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, -diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c -index 1949d5e61a..debfb69bfb 100644 ---- a/tests/test-block-iothread.c -+++ b/tests/test-block-iothread.c -@@ -611,7 +611,7 @@ static void test_propagate_mirror(void) - - /* Start a mirror job */ - mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, -- MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, -+ MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, - BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, - false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, - &error_abort); --- -2.27.0 - diff --git a/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch b/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch deleted file mode 100644 index 52f07f951aef929e9fe58740955f97af25e5ba85..0000000000000000000000000000000000000000 --- a/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch +++ /dev/null @@ -1,52 +0,0 @@ -From e092a17d3825a8f2c93cb429aaa5d857b579b64c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 22 Jul 2019 17:44:27 +0200 -Subject: [PATCH] mirror: Keep mirror_top_bs drained after dropping permissions - -mirror_top_bs is currently implicitly drained through its connection to -the source or the target node. However, the drain section for target_bs -ends early after moving mirror_top_bs from src to target_bs, so that -requests can already be restarted while mirror_top_bs is still present -in the chain, but has dropped all permissions and therefore runs into an -assertion failure like this: - - qemu-system-x86_64: block/io.c:1634: bdrv_co_write_req_prepare: - Assertion `child->perm & BLK_PERM_WRITE' failed. - -Keep mirror_top_bs drained until all graph changes have completed. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit d2da5e288a2e71e82866c8fdefd41b5727300124) -Signed-off-by: Michael Roth ---- - block/mirror.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 0e3f7923cf..681b305de6 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -661,7 +661,10 @@ static int mirror_exit_common(Job *job) - s->target = NULL; - - /* We don't access the source any more. Dropping any WRITE/RESIZE is -- * required before it could become a backing file of target_bs. */ -+ * required before it could become a backing file of target_bs. Not having -+ * these permissions any more means that we can't allow any new requests on -+ * mirror_top_bs from now on, so keep it drained. */ -+ bdrv_drained_begin(mirror_top_bs); - bs_opaque->stop = true; - bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, - &error_abort); -@@ -729,6 +732,7 @@ static int mirror_exit_common(Job *job) - bs_opaque->job = NULL; - - bdrv_drained_end(src); -+ bdrv_drained_end(mirror_top_bs); - s->in_drain = false; - bdrv_unref(mirror_top_bs); - bdrv_unref(src); --- -2.23.0 diff --git a/mirror-Make-sure-that-source-and-target-size-match.patch b/mirror-Make-sure-that-source-and-target-size-match.patch deleted file mode 100644 index 5e4edd26ce42f4507240fe806ed7a2ce57364a41..0000000000000000000000000000000000000000 --- a/mirror-Make-sure-that-source-and-target-size-match.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 9f57569d541acaa4a76513d09ede7d2b19aa69ea Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:24 +0100 -Subject: [PATCH] mirror: Make sure that source and target size match - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-11-kwolf@redhat.com> -Patchwork-id: 97110 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 10/11] mirror: Make sure that source and target size match -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -If the target is shorter than the source, mirror would copy data until -it reaches the end of the target and then fail with an I/O error when -trying to write past the end. - -If the target is longer than the source, the mirror job would complete -successfully, but the target wouldn't actually be an accurate copy of -the source image (it would contain some additional garbage at the end). - -Fix this by checking that both images have the same size when the job -starts. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Message-Id: <20200511135825.219437-4-kwolf@redhat.com> -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit e83dd6808c6e0975970f37b49b27cc37bb54eea8) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index ef6c958ff9..8f0d4544d8 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -853,6 +853,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - BlockDriverState *target_bs = blk_bs(s->target); - bool need_drain = true; - int64_t length; -+ int64_t target_length; - BlockDriverInfo bdi; - char backing_filename[2]; /* we only need 2 characters because we are only - checking for a NULL string */ -@@ -868,24 +869,26 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - goto immediate_exit; - } - -+ target_length = blk_getlength(s->target); -+ if (target_length < 0) { -+ ret = target_length; -+ goto immediate_exit; -+ } -+ - /* Active commit must resize the base image if its size differs from the - * active layer. */ - if (s->base == blk_bs(s->target)) { -- int64_t base_length; -- -- base_length = blk_getlength(s->target); -- if (base_length < 0) { -- ret = base_length; -- goto immediate_exit; -- } -- -- if (s->bdev_length > base_length) { -+ if (s->bdev_length > target_length) { - ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF, - NULL); - if (ret < 0) { - goto immediate_exit; - } - } -+ } else if (s->bdev_length != target_length) { -+ error_setg(errp, "Source and target image have different sizes"); -+ ret = -EINVAL; -+ goto immediate_exit; - } - - if (s->bdev_length == 0) { --- -2.27.0 - diff --git a/mirror-Wait-only-for-in-flight-operations.patch b/mirror-Wait-only-for-in-flight-operations.patch deleted file mode 100644 index d1b00c059a7afc7d25a7ec53f18cae8dbcdad85f..0000000000000000000000000000000000000000 --- a/mirror-Wait-only-for-in-flight-operations.patch +++ /dev/null @@ -1,83 +0,0 @@ -From b4e1ea1c59e4dd8cc95b97ccc4eb1d3957fe5489 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 26 Mar 2020 16:36:28 +0100 -Subject: [PATCH] mirror: Wait only for in-flight operations - -mirror_wait_for_free_in_flight_slot() just picks a random operation to -wait for. However, a MirrorOp is already in s->ops_in_flight when -mirror_co_read() waits for free slots, so if not enough slots are -immediately available, an operation can end up waiting for itself, or -two or more operations can wait for each other to complete, which -results in a hang. - -Fix this by adding a flag to MirrorOp that tells us if the request is -already in flight (and therefore occupies slots that it will later -free), and picking only such operations for waiting. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 -Signed-off-by: Kevin Wolf -Message-Id: <20200326153628.4869-3-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf ---- - block/mirror.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 8f0d4544d8..abcf60a961 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -100,6 +100,7 @@ struct MirrorOp { - - bool is_pseudo_op; - bool is_active_write; -+ bool is_in_flight; - CoQueue waiting_requests; - - QTAILQ_ENTRY(MirrorOp) next; -@@ -290,7 +291,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - * caller of this function. Since there is only one pseudo op - * at any given time, we will always find some real operation - * to wait on. */ -- if (!op->is_pseudo_op && op->is_active_write == active) { -+ if (!op->is_pseudo_op && op->is_in_flight && -+ op->is_active_write == active) -+ { - qemu_co_queue_wait(&op->waiting_requests, NULL); - return; - } -@@ -364,6 +367,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - /* Copy the dirty cluster. */ - s->in_flight++; - s->bytes_in_flight += op->bytes; -+ op->is_in_flight = true; - trace_mirror_one_iteration(s, op->offset, op->bytes); - - ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, -@@ -379,6 +383,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, - op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); -@@ -393,6 +398,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); - mirror_write_complete(op, ret); -@@ -1305,6 +1311,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, - .offset = offset, - .bytes = bytes, - .is_active_write = true, -+ .is_in_flight = true, - }; - qemu_co_queue_init(&op->waiting_requests); - QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); --- -2.27.0 - diff --git a/module-Prevent-crash-by-resetting-local_err-in-modul.patch b/module-Prevent-crash-by-resetting-local_err-in-modul.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ad7270c27e109553303635f252d05c49d4ee1f2 --- /dev/null +++ b/module-Prevent-crash-by-resetting-local_err-in-modul.patch @@ -0,0 +1,45 @@ +From 7187ed9e2010adfe937d6444eb79d8025c118c2c Mon Sep 17 00:00:00 2001 +From: Alexander Ivanov +Date: Fri, 9 Aug 2024 14:13:40 +0200 +Subject: [PATCH] module: Prevent crash by resetting local_err in + module_load_qom_all() + +Set local_err to NULL after it has been freed in error_report_err(). This +avoids triggering assert(*errp == NULL) failure in error_setv() when +local_err is reused in the loop. + +Signed-off-by: Alexander Ivanov +Reviewed-by: Claudio Fontana +Reviewed-by: Denis V. Lunev +Link: https://lore.kernel.org/r/20240809121340.992049-2-alexander.ivanov@virtuozzo.com +[Do the same by moving the declaration instead. - Paolo] +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry picked from commit 940d802b24e63650e0eacad3714e2ce171cba17c) +Signed-off-by: zhujun2 +--- + util/module.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/util/module.c b/util/module.c +index 32e263163c..3eb0f06df1 100644 +--- a/util/module.c ++++ b/util/module.c +@@ -354,13 +354,13 @@ int module_load_qom(const char *type, Error **errp) + void module_load_qom_all(void) + { + const QemuModinfo *modinfo; +- Error *local_err = NULL; + + if (module_loaded_qom_all) { + return; + } + + for (modinfo = module_info; modinfo->name != NULL; modinfo++) { ++ Error *local_err = NULL; + if (!modinfo->objs) { + continue; + } +-- +2.41.0.windows.1 + diff --git a/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch b/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch new file mode 100644 index 0000000000000000000000000000000000000000..6bda51123bb31527b02f67c633a8adcb3e210118 --- /dev/null +++ b/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch @@ -0,0 +1,96 @@ +From a344d8636168ba5f034a908d3394ef88d36133dd Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Thu, 10 Feb 2022 11:18:13 +0800 +Subject: [PATCH] monitor: Discard BLOCK_IO_ERROR event when VM rebooted + +Throttled event like QAPI_EVENT_BLOCK_IO_ERROR may be queued +to limit event rate. Event may be delivered when VM is rebooted +if the event was queued in the *monitor_qapi_event_state* hash table. +Which may casue VM pause and other related problems. +Such as seabios blocked during virtio-scsi initialization: + vring_add_buf(vq, sg, out_num, in_num, 0, 0); + vring_kick(vp, vq, 1); + ------------> VM paused here <----------- + /* Wait for reply */ + while (!vring_more_used(vq)) usleep(5); + +Signed-off-by: Yan Wang +--- + include/monitor/monitor.h | 2 ++ + monitor/monitor.c | 29 +++++++++++++++++++++++++++++ + system/runstate.c | 1 + + 3 files changed, 32 insertions(+) + +diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h +index 965f5d5450..60079086a8 100644 +--- a/include/monitor/monitor.h ++++ b/include/monitor/monitor.h +@@ -63,4 +63,6 @@ void monitor_register_hmp_info_hrt(const char *name, + int error_vprintf_unless_qmp(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0); + int error_printf_unless_qmp(const char *fmt, ...) G_GNUC_PRINTF(1, 2); + ++void monitor_qapi_event_discard_io_error(void); ++ + #endif /* MONITOR_H */ +diff --git a/monitor/monitor.c b/monitor/monitor.c +index e540c1334a..8d59a76612 100644 +--- a/monitor/monitor.c ++++ b/monitor/monitor.c +@@ -34,6 +34,8 @@ + #include "qemu/option.h" + #include "sysemu/qtest.h" + #include "trace.h" ++#include "qemu/log.h" ++#include "qapi/qmp/qobject.h" + + /* + * To prevent flooding clients, events can be throttled. The +@@ -787,6 +789,33 @@ int monitor_init_opts(QemuOpts *opts, Error **errp) + return ret; + } + ++void monitor_qapi_event_discard_io_error(void) ++{ ++ GHashTableIter event_iter; ++ MonitorQAPIEventState *evstate; ++ gpointer key, value; ++ GString *json; ++ ++ qemu_mutex_lock(&monitor_lock); ++ g_hash_table_iter_init(&event_iter, monitor_qapi_event_state); ++ while (g_hash_table_iter_next(&event_iter, &key, &value)) { ++ evstate = key; ++ /* Only QAPI_EVENT_BLOCK_IO_ERROR is discarded */ ++ if (evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) { ++ g_hash_table_iter_remove(&event_iter); ++ json = qobject_to_json(QOBJECT(evstate->qdict)); ++ qemu_log(" %s event discarded\n", json->str); ++ timer_del(evstate->timer); ++ timer_free(evstate->timer); ++ qobject_unref(evstate->data); ++ qobject_unref(evstate->qdict); ++ g_string_free(json, true); ++ g_free(evstate); ++ } ++ } ++ qemu_mutex_unlock(&monitor_lock); ++} ++ + QemuOptsList qemu_mon_opts = { + .name = "mon", + .implied_opt_name = "chardev", +diff --git a/system/runstate.c b/system/runstate.c +index 9d3f627fee..62e6db8d42 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -503,6 +503,7 @@ void qemu_system_reset(ShutdownCause reason) + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } + cpu_synchronize_all_post_reset(); ++ monitor_qapi_event_discard_io_error(); + } + + /* +-- +2.27.0 + diff --git a/monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch b/monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch deleted file mode 100644 index 791449b59540fd0d66cac0c367af8436abb55741..0000000000000000000000000000000000000000 --- a/monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 117082ef493e62e6e2cd972b309e0cd72682ab02 Mon Sep 17 00:00:00 2001 -From: Chen Qun -Date: Tue, 14 Apr 2020 19:50:59 +0800 -Subject: [PATCH] moniter: fix memleak in monitor_fdset_dup_fd_find_remove - -When remove dup_fd in monitor_fdset_dup_fd_find_remove function, -we need to free mon_fdset_fd_dup. ASAN shows memory leak stack: - -Direct leak of 96 byte(s) in 3 object(s) allocated from: - #0 0xfffd37b033b3 in __interceptor_calloc (/lib64/libasan.so.4+0xd33b3) - #1 0xfffd375c71cb in g_malloc0 (/lib64/libglib-2.0.so.0+0x571cb) - #2 0xaaae25bf1c17 in monitor_fdset_dup_fd_add /qemu/monitor.c:2576 - #3 0xaaae265cfd8f in qemu_open /qemu/util/osdep.c:315 - #4 0xaaae264e2b2b in qmp_chardev_open_file_source /qemu/chardev/char-fd.c:122 - #5 0xaaae264e47cf in qmp_chardev_open_file /qemu/chardev/char-file.c:81 - #6 0xaaae264e118b in qemu_char_open /qemu/chardev/char.c:237 - #7 0xaaae264e118b in qemu_chardev_new /qemu/chardev/char.c:964 - #8 0xaaae264e1543 in qemu_chr_new_from_opts /qemu/chardev/char.c:680 - #9 0xaaae25e12e0f in chardev_init_func /qemu/vl.c:2083 - #10 0xaaae26603823 in qemu_opts_foreach /qemu/util/qemu-option.c:1170 - #11 0xaaae258c9787 in main /qemu/vl.c:4089 - #12 0xfffd35b80b9f in __libc_start_main (/lib64/libc.so.6+0x20b9f) - -Reported-by: Euler Robot -Signed-off-by: Chen Qun -(cherry picked from commit a661614de18c89f58cad3fc1bb8aab44e820183a) ---- - monitor/misc.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/monitor/misc.c b/monitor/misc.c -index 00338c00..0d6369ba 100644 ---- a/monitor/misc.c -+++ b/monitor/misc.c -@@ -1746,6 +1746,7 @@ static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove) - if (mon_fdset_fd_dup->fd == dup_fd) { - if (remove) { - QLIST_REMOVE(mon_fdset_fd_dup, next); -+ g_free(mon_fdset_fd_dup); - if (QLIST_EMPTY(&mon_fdset->dup_fds)) { - monitor_fdset_cleanup(mon_fdset); - } --- -2.23.0 diff --git a/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch b/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch new file mode 100644 index 0000000000000000000000000000000000000000..8dc8fc1219659e8010693e153e50fbbb4d13301c --- /dev/null +++ b/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch @@ -0,0 +1,111 @@ +From c6b183a4c3c63454dea39be26b0fb773ec04887e Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 14:13:05 +0800 +Subject: [PATCH] monitor/qmp: drop inflight rsp if qmp client broken + +If libvirt restart while qemu is handle qmp message, libvirt will +reconnect qemu monitor socket, and query status of qemu by qmp. +But qemu may return last qmp respond to new connect socket, and libvirt +recv unexpected respond, So libvirt think qemu is abnormal, and will +kill qemu. + +This patch add qmp connect id, while reconnect id will change. While +respond to libvirt, judge if id is same, if not, drop this respond. +--- + monitor/monitor-internal.h | 1 + + monitor/qmp.c | 19 +++++++++++-------- + 2 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h +index 252de85681..d7842fa464 100644 +--- a/monitor/monitor-internal.h ++++ b/monitor/monitor-internal.h +@@ -144,6 +144,7 @@ typedef struct { + const QmpCommandList *commands; + bool capab_offered[QMP_CAPABILITY__MAX]; /* capabilities offered */ + bool capab[QMP_CAPABILITY__MAX]; /* offered and accepted */ ++ uint64_t qmp_client_id; /*qmp client id, update if peer disconnect */ + /* + * Protects qmp request/response queue. + * Take monitor_lock first when you need both. +diff --git a/monitor/qmp.c b/monitor/qmp.c +index 6eee450fe4..8f7671c5f1 100644 +--- a/monitor/qmp.c ++++ b/monitor/qmp.c +@@ -149,18 +149,19 @@ void qmp_send_response(MonitorQMP *mon, const QDict *rsp) + * Null @rsp can only happen for commands with QCO_NO_SUCCESS_RESP. + * Nothing is emitted then. + */ +-static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp) ++static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp, uint64_t req_client_id) + { +- if (rsp) { +- qmp_send_response(mon, rsp); ++ if (!rsp || (mon->qmp_client_id != req_client_id)) { ++ return; + } ++ qmp_send_response(mon, rsp); + } + + /* + * Runs outside of coroutine context for OOB commands, but in + * coroutine context for everything else. + */ +-static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) ++static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req, uint64_t req_client_id) + { + QDict *rsp; + QDict *error; +@@ -180,7 +181,7 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) + } + } + +- monitor_qmp_respond(mon, rsp); ++ monitor_qmp_respond(mon, rsp, req_client_id); + qobject_unref(rsp); + } + +@@ -340,13 +341,13 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) + trace_monitor_qmp_cmd_in_band(id_json->str); + g_string_free(id_json, true); + } +- monitor_qmp_dispatch(mon, req_obj->req); ++ monitor_qmp_dispatch(mon, req_obj->req, mon->qmp_client_id); + } else { + assert(req_obj->err); + trace_monitor_qmp_err_in_band(error_get_pretty(req_obj->err)); + rsp = qmp_error_response(req_obj->err); + req_obj->err = NULL; +- monitor_qmp_respond(mon, rsp); ++ monitor_qmp_respond(mon, rsp, mon->qmp_client_id); + qobject_unref(rsp); + } + +@@ -402,7 +403,7 @@ static void handle_qmp_command(void *opaque, QObject *req, Error *err) + trace_monitor_qmp_cmd_out_of_band(id_json->str); + g_string_free(id_json, true); + } +- monitor_qmp_dispatch(mon, req); ++ monitor_qmp_dispatch(mon, req, mon->qmp_client_id); + qobject_unref(req); + return; + } +@@ -486,6 +487,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event) + mon_refcount++; + break; + case CHR_EVENT_CLOSED: ++ mon->qmp_client_id++; + /* + * Note: this is only useful when the output of the chardev + * backend is still open. For example, when the backend is +@@ -539,6 +541,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp) + } + qemu_chr_fe_set_echo(&mon->common.chr, true); + ++ mon->qmp_client_id = 1; + /* Note: we run QMP monitor in I/O thread when @chr supports that */ + monitor_data_init(&mon->common, true, false, + qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_GCONTEXT)); +-- +2.27.0 + diff --git a/msix-add-valid.accepts-methods-to-check-address.patch b/msix-add-valid.accepts-methods-to-check-address.patch deleted file mode 100644 index 67397549683ec1e44d29c1f858f4154bc3b6a024..0000000000000000000000000000000000000000 --- a/msix-add-valid.accepts-methods-to-check-address.patch +++ /dev/null @@ -1,78 +0,0 @@ -From e9cc24b1737f745b23c408b183dd34fda5abc30c Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Fri, 19 Feb 2021 16:28:00 +0800 -Subject: [PATCH] msix: add valid.accepts methods to check address - -Fix CVE-2020-13754 - -While doing msi-x mmio operations, a guest may send an address -that leads to an OOB access issue. Add valid.accepts methods to -ensure that ensuing mmio r/w operation don't go beyond regions. - -Reported-by: Ren Ding -Reported-by: Hanqing Zhao -Reported-by: Anatoly Trosinenko -Reported-by: Alexander Bulekov -Signed-off-by: Prasad J Pandit - -patch link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00004.html -Signed-off-by: Jiajie Li ---- - hw/pci/msix.c | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) - -diff --git a/hw/pci/msix.c b/hw/pci/msix.c -index d39dcf32e8..ec43f16875 100644 ---- a/hw/pci/msix.c -+++ b/hw/pci/msix.c -@@ -192,6 +192,15 @@ static void msix_table_mmio_write(void *opaque, hwaddr addr, - msix_handle_mask_update(dev, vector, was_masked); - } - -+static bool msix_table_accepts(void *opaque, hwaddr addr, unsigned size, -+ bool is_write, MemTxAttrs attrs) -+{ -+ PCIDevice *dev = opaque; -+ uint16_t tbl_size = dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE; -+ -+ return dev->msix_table + addr + 4 <= dev->msix_table + tbl_size; -+} -+ - static const MemoryRegionOps msix_table_mmio_ops = { - .read = msix_table_mmio_read, - .write = msix_table_mmio_write, -@@ -199,6 +208,7 @@ static const MemoryRegionOps msix_table_mmio_ops = { - .valid = { - .min_access_size = 4, - .max_access_size = 4, -+ .accepts = msix_table_accepts - }, - }; - -@@ -220,6 +230,15 @@ static void msix_pba_mmio_write(void *opaque, hwaddr addr, - { - } - -+static bool msix_pba_accepts(void *opaque, hwaddr addr, unsigned size, -+ bool is_write, MemTxAttrs attrs) -+{ -+ PCIDevice *dev = opaque; -+ uint16_t pba_size = QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8; -+ -+ return dev->msix_pba + addr + 4 <= dev->msix_pba + pba_size; -+} -+ - static const MemoryRegionOps msix_pba_mmio_ops = { - .read = msix_pba_mmio_read, - .write = msix_pba_mmio_write, -@@ -227,6 +246,7 @@ static const MemoryRegionOps msix_pba_mmio_ops = { - .valid = { - .min_access_size = 4, - .max_access_size = 4, -+ .accepts = msix_pba_accepts - }, - }; - --- -2.27.0 - diff --git a/multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch deleted file mode 100644 index ef380e66655d0faf290b1d61072165a64bc0861e..0000000000000000000000000000000000000000 --- a/multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 3db288bbddb730960430fb4907e100f19001ca0a Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 14:31:07 +0800 -Subject: [PATCH] multifd: Make sure that we don't do any IO after an error - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert ---- - migration/ram.c | 22 +++++++++++++--------- - 1 file changed, 13 insertions(+), 9 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 3ded38c0be..b74929542d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3617,7 +3617,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - { - RAMState **temp = opaque; - RAMState *rs = *temp; -- int ret; -+ int ret = 0; - int i; - int64_t t0; - int done = 0; -@@ -3686,12 +3686,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -- ram_counters.transferred += 8; -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ ram_counters.transferred += 8; - -- ret = qemu_file_get_error(f); -+ ret = qemu_file_get_error(f); -+ } - if (ret < 0) { - return ret; - } -@@ -3745,9 +3747,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - - rcu_read_unlock(); - -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ } - - return ret; - } --- -2.27.0 - diff --git a/multifd-bugfix-for-incorrect-migration-data-with-QPL.patch b/multifd-bugfix-for-incorrect-migration-data-with-QPL.patch new file mode 100644 index 0000000000000000000000000000000000000000..850327ad3a98fd39e10071b79452b1ae35ad4188 --- /dev/null +++ b/multifd-bugfix-for-incorrect-migration-data-with-QPL.patch @@ -0,0 +1,47 @@ +From 1b0fb2f08c76bc727e52ff763ed5bb7ee1bda820 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Wed, 18 Dec 2024 17:14:12 +0800 +Subject: [98/99] multifd: bugfix for incorrect migration data with QPL + compression + +commit 2588a5f99b0c3493b4690e3ff01ed36f80e830cc upstream. + +When QPL compression is enabled on the migration channel and the same +dirty page changes from a normal page to a zero page in the iterative +memory copy, the dirty page will not be updated to a zero page again +on the target side, resulting in incorrect memory data on the source +and target sides. + +The root cause is that the target side does not record the normal pages +to the receivedmap. + +The solution is to add ramblock_recv_bitmap_set_offset in target side +to record the normal pages. + +Intel-SIG: commit 2588a5f99b0c multifd: bugfix for incorrect migration data with QPL compression + +Signed-off-by: Yuan Liu +Reviewed-by: Jason Zeng +Reviewed-by: Peter Xu +Message-Id: <20241218091413.140396-3-yuan1.liu@intel.com> +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-qpl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +index 9265098ee7..fea60e3937 100644 +--- a/migration/multifd-qpl.c ++++ b/migration/multifd-qpl.c +@@ -730,6 +730,7 @@ static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp) + qpl->zlen[i] = be32_to_cpu(qpl->zlen[i]); + assert(qpl->zlen[i] <= p->page_size); + zbuf_len += qpl->zlen[i]; ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + } + + /* read compressed pages */ +-- +2.33.0 + diff --git a/multifd-bugfix-for-incorrect-migration-data-with-qat.patch b/multifd-bugfix-for-incorrect-migration-data-with-qat.patch new file mode 100644 index 0000000000000000000000000000000000000000..52b71b6218de53773a6606b48a2bfada897c054b --- /dev/null +++ b/multifd-bugfix-for-incorrect-migration-data-with-qat.patch @@ -0,0 +1,51 @@ +From 7541385c82f3c85fc8727080bb224dd8761fe719 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Wed, 18 Dec 2024 17:14:13 +0800 +Subject: [99/99] multifd: bugfix for incorrect migration data with qatzip + compression + +commit a523bc52166c80d8a04d46584f9f3868bd53ef69 upstream. + +When QPL compression is enabled on the migration channel and the same +dirty page changes from a normal page to a zero page in the iterative +memory copy, the dirty page will not be updated to a zero page again +on the target side, resulting in incorrect memory data on the source +and target sides. + +The root cause is that the target side does not record the normal pages +to the receivedmap. + +The solution is to add ramblock_recv_bitmap_set_offset in target side +to record the normal pages. + +Intel-SIG: commit a523bc52166c multifd: bugfix for incorrect migration data with qatzip compression + +Signed-off-by: Yuan Liu +Reviewed-by: Jason Zeng +Reviewed-by: Peter Xu +Message-Id: <20241218091413.140396-4-yuan1.liu@intel.com> +Signed-off-by: Fabiano Rosas + + Conflicts: + migration/multifd-qatzip.c +[jz: resolve context conflict] +Signed-off-by: Jason Zeng +--- + migration/multifd-qatzip.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c +index 3c787ed879..88b6fb44ad 100644 +--- a/migration/multifd-qatzip.c ++++ b/migration/multifd-qatzip.c +@@ -373,6 +373,7 @@ static int qatzip_recv(MultiFDRecvParams *p, Error **errp) + memcpy(p->host + p->normal[i], + q->out_buf + p->page_size * i, + p->page_size); ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + } + return 0; + } +-- +2.33.0 + diff --git a/multifd-bugfix-for-migration-using-compression-metho.patch b/multifd-bugfix-for-migration-using-compression-metho.patch new file mode 100644 index 0000000000000000000000000000000000000000..9760860e7b2b65fd5468639b27caba0805b1ffe8 --- /dev/null +++ b/multifd-bugfix-for-migration-using-compression-metho.patch @@ -0,0 +1,63 @@ +From 123e52e1dc6629fec922dad4f7c97e23a82ec157 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Wed, 18 Dec 2024 17:14:11 +0800 +Subject: [97/99] multifd: bugfix for migration using compression methods + +commit cdc3970f8597ebdc1a4c2090cfb4d11e297329ed upstream. + +When compression is enabled on the migration channel and +the pages processed are all zero pages, these pages will +not be sent and updated on the target side, resulting in +incorrect memory data on the source and target sides. + +The root cause is that all compression methods call +multifd_send_prepare_common to determine whether to compress +dirty pages, but multifd_send_prepare_common does not update +the IOV of MultiFDPacket_t when all dirty pages are zero pages. + +The solution is to always update the IOV of MultiFDPacket_t +regardless of whether the dirty pages are all zero pages. + +Intel-SIG: commit cdc3970f8597 multifd: bugfix for migration using compression methods + +Fixes: 303e6f54f9 ("migration/multifd: Implement zero page transmission on the multifd thread.") +Cc: qemu-stable@nongnu.org #9.0+ +Signed-off-by: Yuan Liu +Reviewed-by: Jason Zeng +Reviewed-by: Peter Xu +Message-Id: <20241218091413.140396-2-yuan1.liu@intel.com> +Signed-off-by: Fabiano Rosas + + Conflicts: + migration/multifd-nocomp.c +[jz: upstream has split nocomp code into multifd-nocomp.c, while + openEuler hasn't yet. The function that needs to be fixed + is still in multifd.c, so we fix it in multifd.c] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 36581a5631..4c310deb61 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1488,6 +1488,7 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + + bool multifd_send_prepare_common(MultiFDSendParams *p) + { ++ multifd_send_prepare_header(p); + multifd_send_zero_page_detect(p); + + if (!p->pages->normal_num) { +@@ -1495,7 +1496,5 @@ bool multifd_send_prepare_common(MultiFDSendParams *p) + return false; + } + +- multifd_send_prepare_header(p); +- + return true; + } +-- +2.33.0 + diff --git a/multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch b/multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch deleted file mode 100644 index a2209ef5776728ced1205b9b1ce3e50daeaa84d3..0000000000000000000000000000000000000000 --- a/multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch +++ /dev/null @@ -1,37 +0,0 @@ -From a4288f41b3af9f4f73f162b89007c6928509a43c Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 14:51:51 +0800 -Subject: [PATCH] multifd/tls: fix memoryleak of the QIOChannelSocket object - when cancelling migration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When creating new tls client, the tioc->master will be referenced which results in socket -leaking after multifd_save_cleanup if we cancel migration. -Fix it by do object_unref() after tls client creation. - -Suggested-by: Daniel P. Berrangé -Signed-off-by: Chuan Zheng -Message-Id: <1605104763-118687-1-git-send-email-zhengchuan@huawei.com> -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert ---- - migration/ram.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/migration/ram.c b/migration/ram.c -index a37dbfc049..92ce1a53e7 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1246,6 +1246,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, - return; - } - -+ object_unref(OBJECT(ioc)); - trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); - qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); - p->c = QIO_CHANNEL(tioc); --- -2.27.0 - diff --git a/nbd-Minor-style-and-typo-fixes.patch b/nbd-Minor-style-and-typo-fixes.patch new file mode 100644 index 0000000000000000000000000000000000000000..93f9a22860581ff057c3a94d991d5db96b9692a3 --- /dev/null +++ b/nbd-Minor-style-and-typo-fixes.patch @@ -0,0 +1,49 @@ +From 2afaa29abe368d51bbd553e3ebacd7e310c8e5c7 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 1 Aug 2024 16:49:20 -0500 +Subject: [PATCH] nbd: Minor style and typo fixes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Touch up a comment with the wrong type name, and an over-long line, +both noticed while working on upcoming patches. + +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-10-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +--- + nbd/server.c | 2 +- + qemu-nbd.c | 3 ++- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index e8baed9705..7cf61e5aa7 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1865,7 +1865,7 @@ static void nbd_export_request_shutdown(BlockExport *blk_exp) + + blk_exp_ref(&exp->common); + /* +- * TODO: Should we expand QMP NbdServerRemoveNode enum to allow a ++ * TODO: Should we expand QMP BlockExportRemoveMode enum to allow a + * close mode that stops advertising the export to new clients but + * still permits existing clients to run to completion? Because of + * that possibility, nbd_export_close() can be called more than +diff --git a/qemu-nbd.c b/qemu-nbd.c +index acccf2977f..bfcc653d13 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -587,7 +587,8 @@ int main(int argc, char **argv) + pthread_t client_thread; + const char *fmt = NULL; + Error *local_err = NULL; +- BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; ++ BlockdevDetectZeroesOptions detect_zeroes = ++ BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; + QDict *options = NULL; + const char *export_name = NULL; /* defaults to "" later for server mode */ + const char *export_description = NULL; +-- +2.45.1.windows.1 + diff --git a/nbd-fix-uninitialized-variable-warning.patch b/nbd-fix-uninitialized-variable-warning.patch deleted file mode 100644 index be69252807d1a7fbd5be7e8e6d187862527c439d..0000000000000000000000000000000000000000 --- a/nbd-fix-uninitialized-variable-warning.patch +++ /dev/null @@ -1,46 +0,0 @@ -From eb5abb631196b97879a868ec75e7f70400695f7f Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Mon, 13 Jan 2020 17:03:08 +0800 -Subject: [PATCH] nbd: fix uninitialized variable warning - -Fixes: -/mnt/sdb/qemu/nbd/server.c: In function 'nbd_handle_request': -/mnt/sdb/qemu/nbd/server.c:2313:9: error: 'ret' may be used uninitialized in this function [-Werror=maybe-uninitialized] - int ret; - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan ---- - nbd/server.c | 10 +--------- - 1 file changed, 1 insertion(+), 9 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index e21bd501..aefb07d9 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -2304,20 +2304,12 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, - !client->export_meta.bitmap, - NBD_META_ID_BASE_ALLOCATION, - errp); -- if (ret < 0) { -- return ret; -- } -- } -- -- if (client->export_meta.bitmap) { -+ } else { /* client->export_meta.bitmap */ - ret = nbd_co_send_bitmap(client, request->handle, - client->exp->export_bitmap, - request->from, request->len, - dont_fragment, - true, NBD_META_ID_DIRTY_BITMAP, errp); -- if (ret < 0) { -- return ret; -- } - } - - return ret; --- -2.18.1 - - diff --git a/nbd-server-Avoid-long-error-message-assertions-CVE-2.patch b/nbd-server-Avoid-long-error-message-assertions-CVE-2.patch deleted file mode 100644 index 71ce6cabd3a38d5af0701f579babe793b1ea7d07..0000000000000000000000000000000000000000 --- a/nbd-server-Avoid-long-error-message-assertions-CVE-2.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 719292175d391e77487f3c55f5f97a065e44d9f8 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 10 Jun 2020 18:32:01 -0400 -Subject: [PATCH] nbd/server: Avoid long error message assertions - CVE-2020-10761 - -RH-Author: Eric Blake -Message-id: <20200610183202.3780750-2-eblake@redhat.com> -Patchwork-id: 97494 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] nbd/server: Avoid long error message assertions CVE-2020-10761 -Bugzilla: 1845384 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Ever since commit 36683283 (v2.8), the server code asserts that error -strings sent to the client are well-formed per the protocol by not -exceeding the maximum string length of 4096. At the time the server -first started sending error messages, the assertion could not be -triggered, because messages were completely under our control. -However, over the years, we have added latent scenarios where a client -could trigger the server to attempt an error message that would -include the client's information if it passed other checks first: - -- requesting NBD_OPT_INFO/GO on an export name that is not present - (commit 0cfae925 in v2.12 echoes the name) - -- requesting NBD_OPT_LIST/SET_META_CONTEXT on an export name that is - not present (commit e7b1948d in v2.12 echoes the name) - -At the time, those were still safe because we flagged names larger -than 256 bytes with a different message; but that changed in commit -93676c88 (v4.2) when we raised the name limit to 4096 to match the NBD -string limit. (That commit also failed to change the magic number -4096 in nbd_negotiate_send_rep_err to the just-introduced named -constant.) So with that commit, long client names appended to server -text can now trigger the assertion, and thus be used as a denial of -service attack against a server. As a mitigating factor, if the -server requires TLS, the client cannot trigger the problematic paths -unless it first supplies TLS credentials, and such trusted clients are -less likely to try to intentionally crash the server. - -We may later want to further sanitize the user-supplied strings we -place into our error messages, such as scrubbing out control -characters, but that is less important to the CVE fix, so it can be a -later patch to the new nbd_sanitize_name. - -Consideration was given to changing the assertion in -nbd_negotiate_send_rep_verr to instead merely log a server error and -truncate the message, to avoid leaving a latent path that could -trigger a future CVE DoS on any new error message. However, this -merely complicates the code for something that is already (correctly) -flagging coding errors, and now that we are aware of the long message -pitfall, we are less likely to introduce such errors in the future, -which would make such error handling dead code. - -Reported-by: Xueqiang Wei -CC: qemu-stable@nongnu.org -Fixes: https://bugzilla.redhat.com/1843684 CVE-2020-10761 -Fixes: 93676c88d7 -Signed-off-by: Eric Blake -Message-Id: <20200610163741.3745251-2-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5c4fe018c025740fef4a0a4421e8162db0c3eefd) -Signed-off-by: Eric Blake -Signed-off-by: Eduardo Lima (Etrunko) ---- - nbd/server.c | 21 +++++++++++++++++++-- - tests/qemu-iotests/143 | 4 ++++ - tests/qemu-iotests/143.out | 2 ++ - 3 files changed, 25 insertions(+), 2 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 2d81248967..115e8f06ed 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -229,6 +229,19 @@ out: - return ret; - } - -+/* -+ * Return a malloc'd copy of @name suitable for use in an error reply. -+ */ -+static char * -+nbd_sanitize_name(const char *name) -+{ -+ if (strnlen(name, 80) < 80) { -+ return g_strdup(name); -+ } -+ /* XXX Should we also try to sanitize any control characters? */ -+ return g_strdup_printf("%.80s...", name); -+} -+ - /* Send an error reply. - * Return -errno on error, 0 on success. */ - static int GCC_FMT_ATTR(4, 5) -@@ -584,9 +597,11 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags, - - exp = nbd_export_find(name); - if (!exp) { -+ g_autofree char *sane_name = nbd_sanitize_name(name); -+ - return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN, - errp, "export '%s' not present", -- name); -+ sane_name); - } - - /* Don't bother sending NBD_INFO_NAME unless client requested it */ -@@ -975,8 +990,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client, - - meta->exp = nbd_export_find(export_name); - if (meta->exp == NULL) { -+ g_autofree char *sane_name = nbd_sanitize_name(export_name); -+ - return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, -- "export '%s' not present", export_name); -+ "export '%s' not present", sane_name); - } - - ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp); -diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 -index f649b36195..d2349903b1 100755 ---- a/tests/qemu-iotests/143 -+++ b/tests/qemu-iotests/143 -@@ -58,6 +58,10 @@ _send_qemu_cmd $QEMU_HANDLE \ - $QEMU_IO_PROG -f raw -c quit \ - "nbd+unix:///no_such_export?socket=$SOCK_DIR/nbd" 2>&1 \ - | _filter_qemu_io | _filter_nbd -+# Likewise, with longest possible name permitted in NBD protocol -+$QEMU_IO_PROG -f raw -c quit \ -+ "nbd+unix:///$(printf %4096d 1 | tr ' ' a)?socket=$SOCK_DIR/nbd" 2>&1 \ -+ | _filter_qemu_io | _filter_nbd | sed 's/aaaa*aa/aa--aa/' - - _send_qemu_cmd $QEMU_HANDLE \ - "{ 'execute': 'quit' }" \ -diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out -index 037d34a409..fc7bab3129 100644 ---- a/tests/qemu-iotests/143.out -+++ b/tests/qemu-iotests/143.out -@@ -3,6 +3,8 @@ QA output created by 143 - {"return": {}} - qemu-io: can't open device nbd+unix:///no_such_export?socket=SOCK_DIR/nbd: Requested export not available - server reported: export 'no_such_export' not present -+qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available -+server reported: export 'aa--aa...' not present - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - *** done --- -2.27.0 - diff --git a/nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch b/nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..dfabaed546cff944085d68df3bbe8348a42f9acf --- /dev/null +++ b/nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch @@ -0,0 +1,90 @@ +From 5da793de60f37cf0daaffee3fe8300a1a20bf36b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 22 Aug 2024 09:35:29 -0500 +Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing + server + +Commit 3e7ef738 plugged the use-after-free of the global nbd_server +object, but overlooked a use-after-free of nbd_server->listener. +Although this race is harder to hit, notice that our shutdown path +first drops the reference count of nbd_server->listener, then triggers +actions that can result in a pending client reaching the +nbd_blockdev_client_closed() callback, which in turn calls +qio_net_listener_set_client_func on a potentially stale object. + +If we know we don't want any more clients to connect, and have already +told the listener socket to shut down, then we should not be trying to +update the listener socket's associated function. + +Reproducer: + +> #!/usr/bin/python3 +> +> import os +> from threading import Thread +> +> def start_stop(): +> while 1: +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start", ++"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'') +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'') +> +> def nbd_list(): +> while 1: +> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock') +> +> def test(): +> sst = Thread(target=start_stop) +> sst.start() +> nlt = Thread(target=nbd_list) +> nlt.start() +> +> sst.join() +> nlt.join() +> +> test() + +Fixes: CVE-2024-7409 +Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop") +CC: qemu-stable@nongnu.org +Reported-by: Andrey Drobyshev +Signed-off-by: Eric Blake +Message-ID: <20240822143617.800419-2-eblake@redhat.com> +Reviewed-by: Stefan Hajnoczi +--- + blockdev-nbd.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index f73409ae49..b36f41b7c5 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + static void nbd_update_server_watch(NBDServerData *s) + { +- if (!s->max_connections || s->connections < s->max_connections) { +- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL); +- } else { +- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ if (s->listener) { ++ if (!s->max_connections || s->connections < s->max_connections) { ++ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, ++ NULL); ++ } else { ++ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ } + } + } + +@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server) + */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ server->listener = NULL; + QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { + qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, + NULL); +-- +2.41.0.windows.1 + diff --git a/nbd-server-CVE-2024-7409-Cap-default-max-connections.patch b/nbd-server-CVE-2024-7409-Cap-default-max-connections.patch new file mode 100644 index 0000000000000000000000000000000000000000..16a6eb941fcc01bdd9214ace38dbdda4d898ea42 --- /dev/null +++ b/nbd-server-CVE-2024-7409-Cap-default-max-connections.patch @@ -0,0 +1,173 @@ +From cfbbd9903e2ea12d365105648ec8e3dfd07b6194 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 6 Aug 2024 13:53:00 -0500 +Subject: [PATCH] nbd/server: CVE-2024-7409: Cap default max-connections to 100 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Allowing an unlimited number of clients to any web service is a recipe +for a rudimentary denial of service attack: the client merely needs to +open lots of sockets without closing them, until qemu no longer has +any more fds available to allocate. + +For qemu-nbd, we default to allowing only 1 connection unless more are +explicitly asked for (-e or --shared); this was historically picked as +a nice default (without an explicit -t, a non-persistent qemu-nbd goes +away after a client disconnects, without needing any additional +follow-up commands), and we are not going to change that interface now +(besides, someday we want to point people towards qemu-storage-daemon +instead of qemu-nbd). + +But for qemu proper, and the newer qemu-storage-daemon, the QMP +nbd-server-start command has historically had a default of unlimited +number of connections, in part because unlike qemu-nbd it is +inherently persistent until nbd-server-stop. Allowing multiple client +sockets is particularly useful for clients that can take advantage of +MULTI_CONN (creating parallel sockets to increase throughput), +although known clients that do so (such as libnbd's nbdcopy) typically +use only 8 or 16 connections (the benefits of scaling diminish once +more sockets are competing for kernel attention). Picking a number +large enough for typical use cases, but not unlimited, makes it +slightly harder for a malicious client to perform a denial of service +merely by opening lots of connections withot progressing through the +handshake. + +This change does not eliminate CVE-2024-7409 on its own, but reduces +the chance for fd exhaustion or unlimited memory usage as an attack +surface. On the other hand, by itself, it makes it more obvious that +with a finite limit, we have the problem of an unauthenticated client +holding 100 fds opened as a way to block out a legitimate client from +being able to connect; thus, later patches will further add timeouts +to reject clients that are not making progress. + +This is an INTENTIONAL change in behavior, and will break any client +of nbd-server-start that was not passing an explicit max-connections +parameter, yet expects more than 100 simultaneous connections. We are +not aware of any such client (as stated above, most clients aware of +MULTI_CONN get by just fine on 8 or 16 connections, and probably cope +with later connections failing by relying on the earlier connections; +libvirt has not yet been passing max-connections, but generally +creates NBD servers with the intent for a single client for the sake +of live storage migration; meanwhile, the KubeSAN project anticipates +a large cluster sharing multiple clients [up to 8 per node, and up to +100 nodes in a cluster], but it currently uses qemu-nbd with an +explicit --shared=0 rather than qemu-storage-daemon with +nbd-server-start). + +We considered using a deprecation period (declare that omitting +max-parameters is deprecated, and make it mandatory in 3 releases - +then we don't need to pick an arbitrary default); that has zero risk +of breaking any apps that accidentally depended on more than 100 +connections, and where such breakage might not be noticed under unit +testing but only under the larger loads of production usage. But it +does not close the denial-of-service hole until far into the future, +and requires all apps to change to add the parameter even if 100 was +good enough. It also has a drawback that any app (like libvirt) that +is accidentally relying on an unlimited default should seriously +consider their own CVE now, at which point they are going to change to +pass explicit max-connections sooner than waiting for 3 qemu releases. +Finally, if our changed default breaks an app, that app can always +pass in an explicit max-parameters with a larger value. + +It is also intentional that the HMP interface to nbd-server-start is +not changed to expose max-connections (any client needing to fine-tune +things should be using QMP). + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-12-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[ericb: Expand commit message to summarize Dan's argument for why we +break corner-case back-compat behavior without a deprecation period] +Signed-off-by: Eric Blake +--- + block/monitor/block-hmp-cmds.c | 3 ++- + blockdev-nbd.c | 8 ++++++++ + include/block/nbd.h | 7 +++++++ + qapi/block-export.json | 4 ++-- + 4 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index c729cbf1eb..78a6975852 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -415,7 +415,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict) + goto exit; + } + +- nbd_server_start(addr, NULL, NULL, 0, &local_err); ++ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS, ++ &local_err); + qapi_free_SocketAddress(addr); + if (local_err != NULL) { + goto exit; +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 267a1de903..24ba5382db 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, + + void nbd_server_start_options(NbdServerOptions *arg, Error **errp) + { ++ if (!arg->has_max_connections) { ++ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz, + arg->max_connections, errp); + } +@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr, + { + SocketAddress *addr_flat = socket_address_flatten(addr); + ++ if (!has_max_connections) { ++ max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp); + qapi_free_SocketAddress(addr_flat); + } +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 1d4d65922d..d4f8b21aec 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd; + */ + #define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 + ++/* ++ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at ++ * once; must be large enough to allow a MULTI_CONN-aware client like ++ * nbdcopy to create its typical number of 8-16 sockets. ++ */ ++#define NBD_DEFAULT_MAX_CONNECTIONS 100 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 7874a49ba7..1d255d77e3 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -28,7 +28,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0) ++# default: 100) + # + # Since: 4.2 + ## +@@ -63,7 +63,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0). ++# default: 100). + # + # Returns: error if the server is already running. + # +-- +2.45.1.windows.1 + diff --git a/nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch b/nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch new file mode 100644 index 0000000000000000000000000000000000000000..56ccb14e43784dee241f6258e3e314b1738d1ed3 --- /dev/null +++ b/nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch @@ -0,0 +1,162 @@ +From fc5e00fc5ff2e7c454a576a81236131b8c74d042 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 12:23:13 -0500 +Subject: [PATCH] nbd/server: CVE-2024-7409: Close stray clients at server-stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +A malicious client can attempt to connect to an NBD server, and then +intentionally delay progress in the handshake, including if it does +not know the TLS secrets. Although the previous two patches reduce +this behavior by capping the default max-connections parameter and +killing slow clients, they did not eliminate the possibility of a +client waiting to close the socket until after the QMP nbd-server-stop +command is executed, at which point qemu would SEGV when trying to +dereference the NULL nbd_server global which is no longer present. +This amounts to a denial of service attack. Worse, if another NBD +server is started before the malicious client disconnects, I cannot +rule out additional adverse effects when the old client interferes +with the connection count of the new server (although the most likely +is a crash due to an assertion failure when checking +nbd_server->connections > 0). + +For environments without this patch, the CVE can be mitigated by +ensuring (such as via a firewall) that only trusted clients can +connect to an NBD server. Note that using frameworks like libvirt +that ensure that TLS is used and that nbd-server-stop is not executed +while any trusted clients are still connected will only help if there +is also no possibility for an untrusted client to open a connection +but then stall on the NBD handshake. + +Given the previous patches, it would be possible to guarantee that no +clients remain connected by having nbd-server-stop sleep for longer +than the default handshake deadline before finally freeing the global +nbd_server object, but that could make QMP non-responsive for a long +time. So intead, this patch fixes the problem by tracking all client +sockets opened while the server is running, and forcefully closing any +such sockets remaining without a completed handshake at the time of +nbd-server-stop, then waiting until the coroutines servicing those +sockets notice the state change. nbd-server-stop now has a second +AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the +blk_exp_close_all_type() that disconnects all clients that completed +handshakes), but forced socket shutdown is enough to progress the +coroutines and quickly tear down all clients before the server is +freed, thus finally fixing the CVE. + +This patch relies heavily on the fact that nbd/server.c guarantees +that it only calls nbd_blockdev_client_closed() from the main loop +(see the assertion in nbd_client_put() and the hoops used in +nbd_client_put_nonzero() to achieve that); if we did not have that +guarantee, we would also need a mutex protecting our accesses of the +list of connections to survive re-entrancy from independent iothreads. + +Although I did not actually try to test old builds, it looks like this +problem has existed since at least commit 862172f45c (v2.12.0, 2017) - +even back when that patch started using a QIONetListener to handle +listening on multiple sockets, nbd_server_free() was already unaware +that the nbd_blockdev_client_closed callback can be reached later by a +client thread that has not completed handshakes (and therefore the +client's socket never got added to the list closed in +nbd_export_close_all), despite that patch intentionally tearing down +the QIONetListener to prevent new clients. + +Reported-by: Alexander Ivanov +Fixes: CVE-2024-7409 +CC: qemu-stable@nongnu.org +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-14-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +--- + blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 24ba5382db..f73409ae49 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -21,12 +21,18 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + ++typedef struct NBDConn { ++ QIOChannelSocket *cioc; ++ QLIST_ENTRY(NBDConn) next; ++} NBDConn; ++ + typedef struct NBDServerData { + QIONetListener *listener; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + uint32_t max_connections; + uint32_t connections; ++ QLIST_HEAD(, NBDConn) conns; + } NBDServerData; + + static NBDServerData *nbd_server; +@@ -51,6 +57,14 @@ int nbd_server_max_connections(void) + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { ++ NBDConn *conn = nbd_client_owner(client); ++ ++ assert(qemu_in_main_thread() && nbd_server); ++ ++ object_unref(OBJECT(conn->cioc)); ++ QLIST_REMOVE(conn, next); ++ g_free(conn); ++ + nbd_client_put(client); + assert(nbd_server->connections > 0); + nbd_server->connections--; +@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) + { ++ NBDConn *conn = g_new0(NBDConn, 1); ++ ++ assert(qemu_in_main_thread() && nbd_server); + nbd_server->connections++; ++ object_ref(OBJECT(cioc)); ++ conn->cioc = cioc; ++ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next); + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); + /* TODO - expose handshake timeout as QMP option */ + nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, + nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed, NULL); ++ nbd_blockdev_client_closed, conn); + } + + static void nbd_update_server_watch(NBDServerData *s) +@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s) + + static void nbd_server_free(NBDServerData *server) + { ++ NBDConn *conn, *tmp; ++ + if (!server) { + return; + } + ++ /* ++ * Forcefully close the listener socket, and any clients that have ++ * not yet disconnected on their own. ++ */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { ++ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, ++ NULL); ++ } ++ ++ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0); ++ + if (server->tlscreds) { + object_unref(OBJECT(server->tlscreds)); + } +-- +2.45.1.windows.1 + diff --git a/nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch b/nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch new file mode 100644 index 0000000000000000000000000000000000000000..72e902e92e555829cbf5ac659708367962521f68 --- /dev/null +++ b/nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch @@ -0,0 +1,125 @@ +From 5f89a59b5f877d6795bd417c9193efa65fb83c3f Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 8 Aug 2024 16:05:08 -0500 +Subject: [PATCH] nbd/server: CVE-2024-7409: Drop non-negotiating clients +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +A client that opens a socket but does not negotiate is merely hogging +qemu's resources (an open fd and a small amount of memory); and a +malicious client that can access the port where NBD is listening can +attempt a denial of service attack by intentionally opening and +abandoning lots of unfinished connections. The previous patch put a +default bound on the number of such ongoing connections, but once that +limit is hit, no more clients can connect (including legitimate ones). +The solution is to insist that clients complete handshake within a +reasonable time limit, defaulting to 10 seconds. A client that has +not successfully completed NBD_OPT_GO by then (including the case of +where the client didn't know TLS credentials to even reach the point +of NBD_OPT_GO) is wasting our time and does not deserve to stay +connected. Later patches will allow fine-tuning the limit away from +the default value (including disabling it for doing integration +testing of the handshake process itself). + +Note that this patch in isolation actually makes it more likely to see +qemu SEGV after nbd-server-stop, as any client socket still connected +when the server shuts down will now be closed after 10 seconds rather +than at the client's whims. That will be addressed in the next patch. + +For a demo of this patch in action: +$ qemu-nbd -f raw -r -t -e 10 file & +$ nbdsh --opt-mode -c ' +H = list() +for i in range(20): + print(i) + H.insert(i, nbd.NBD()) + H[i].set_opt_mode(True) + H[i].connect_uri("nbd://localhost") +' +$ kill $! + +where later connections get to start progressing once earlier ones are +forcefully dropped for taking too long, rather than hanging. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-13-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: rebase to changes earlier in series, reduce scope of timer] +Signed-off-by: Eric Blake +Signed-off-by: liuxiangdong +--- + nbd/server.c | 28 +++++++++++++++++++++++++++- + nbd/trace-events | 1 + + 2 files changed, 28 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index b3c4ba2c30..d1b3c35b59 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3044,22 +3044,48 @@ static void nbd_client_receive_next_request(NBDClient *client) + } + } + ++static void nbd_handshake_timer_cb(void *opaque) ++{ ++ QIOChannel *ioc = opaque; ++ ++ trace_nbd_handshake_timer_cb(); ++ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++} ++ + static coroutine_fn void nbd_co_client_start(void *opaque) + { + NBDClient *client = opaque; + Error *local_err = NULL; ++ QEMUTimer *handshake_timer = NULL; + + qemu_co_mutex_init(&client->send_lock); + +- /* TODO - utilize client->handshake_max_secs */ ++ /* ++ * Create a timer to bound the time spent in negotiation. If the ++ * timer expires, it is likely nbd_negotiate will fail because the ++ * socket was shutdown. ++ */ ++ if (client->handshake_max_secs > 0) { ++ handshake_timer = aio_timer_new(qemu_get_aio_context(), ++ QEMU_CLOCK_REALTIME, ++ SCALE_NS, ++ nbd_handshake_timer_cb, ++ client->sioc); ++ timer_mod(handshake_timer, ++ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ++ client->handshake_max_secs * NANOSECONDS_PER_SECOND); ++ } ++ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); + } ++ timer_free(handshake_timer); + client_close(client, false); + return; + } + ++ timer_free(handshake_timer); + nbd_client_receive_next_request(client); + } + +diff --git a/nbd/trace-events b/nbd/trace-events +index 00ae3216a1..cbd0a4ab7e 100644 +--- a/nbd/trace-events ++++ b/nbd/trace-events +@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload + nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 + nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32 + nbd_trip(void) "Reading request" ++nbd_handshake_timer_cb(void) "client took too long to negotiate" + + # client-connection.c + nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64 +-- +2.45.1.windows.1 + diff --git a/nbd-server-Plumb-in-new-args-to-nbd_client_add.patch b/nbd-server-Plumb-in-new-args-to-nbd_client_add.patch new file mode 100644 index 0000000000000000000000000000000000000000..d73fa9049eed9a865dd75c7cdaf573b326f7f68d --- /dev/null +++ b/nbd-server-Plumb-in-new-args-to-nbd_client_add.patch @@ -0,0 +1,163 @@ +From da0bf4171d0b386d1e7a22ad5b78a3ad48927471 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 08:50:01 -0500 +Subject: [PATCH] nbd/server: Plumb in new args to nbd_client_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Upcoming patches to fix a CVE need to track an opaque pointer passed +in by the owner of a client object, as well as request for a time +limit on how fast negotiation must complete. Prepare for that by +changing the signature of nbd_client_new() and adding an accessor to +get at the opaque pointer, although for now the two servers +(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though +they pass in a new default timeout value. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-11-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan] +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 6 ++++-- + include/block/nbd.h | 11 ++++++++++- + nbd/server.c | 20 +++++++++++++++++--- + qemu-nbd.c | 4 +++- + 4 files changed, 34 insertions(+), 7 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 213012435f..267a1de903 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); +- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed); ++ /* TODO - expose handshake timeout as QMP option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ nbd_server->tlscreds, nbd_server->tlsauthz, ++ nbd_blockdev_client_closed, NULL); + } + + static void nbd_update_server_watch(NBDServerData *s) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 4e7bd6342f..1d4d65922d 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts; + + extern const BlockExportDriver blk_exp_nbd; + ++/* ++ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must ++ * succeed at NBD_OPT_GO before being forcefully dropped as too slow. ++ */ ++#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp); + NBDExport *nbd_export_find(const char *name); + + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)); ++ void (*close_fn)(NBDClient *, bool), ++ void *owner); ++void *nbd_client_owner(NBDClient *client); + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +diff --git a/nbd/server.c b/nbd/server.c +index 7cf61e5aa7..b3c4ba2c30 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -124,10 +124,12 @@ struct NBDMetaContexts { + struct NBDClient { + int refcount; + void (*close_fn)(NBDClient *client, bool negotiated); ++ void *owner; + + NBDExport *exp; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; ++ uint32_t handshake_max_secs; + QIOChannelSocket *sioc; /* The underlying data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + +@@ -3049,6 +3051,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + + qemu_co_mutex_init(&client->send_lock); + ++ /* TODO - utilize client->handshake_max_secs */ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); +@@ -3061,14 +3064,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + } + + /* +- * Create a new client listener using the given channel @sioc. ++ * Create a new client listener using the given channel @sioc and @owner. + * Begin servicing it in a coroutine. When the connection closes, call +- * @close_fn with an indication of whether the client completed negotiation. ++ * @close_fn with an indication of whether the client completed negotiation ++ * within @handshake_max_secs seconds (0 for unbounded). + */ + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)) ++ void (*close_fn)(NBDClient *, bool), ++ void *owner) + { + NBDClient *client; + Coroutine *co; +@@ -3080,13 +3086,21 @@ void nbd_client_new(QIOChannelSocket *sioc, + object_ref(OBJECT(client->tlscreds)); + } + client->tlsauthz = g_strdup(tlsauthz); ++ client->handshake_max_secs = handshake_max_secs; + client->sioc = sioc; + qio_channel_set_delay(QIO_CHANNEL(sioc), false); + object_ref(OBJECT(client->sioc)); + client->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(client->ioc)); + client->close_fn = close_fn; ++ client->owner = owner; + + co = qemu_coroutine_create(nbd_co_client_start, client); + qemu_coroutine_enter(co); + } ++ ++void * ++nbd_client_owner(NBDClient *client) ++{ ++ return client->owner; ++} +diff --git a/qemu-nbd.c b/qemu-nbd.c +index bfcc653d13..8b09cb5e2a 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -389,7 +389,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + nb_fds++; + nbd_update_server_watch(); +- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed); ++ /* TODO - expose handshake timeout as command line option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ tlscreds, tlsauthz, nbd_client_closed, NULL); + } + + static void nbd_update_server_watch(void) +-- +2.45.1.windows.1 + diff --git a/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch b/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch new file mode 100644 index 0000000000000000000000000000000000000000..350339ecb2ab482936e5a700347cf01556c04051 --- /dev/null +++ b/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch @@ -0,0 +1,45 @@ +From 81b4091eee81fe3871d836b1a684e27828cdc2be Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:42:33 +0800 +Subject: [PATCH] nbd/server.c: fix invalid read after client was already free + +In the process of NBD equipment pressurization, executing QEMU NBD will +lead to the failure of IO distribution and go to NBD_ Out process of trip(). +If two or more IO go to the out process, client NBD will release in nbd_request_put(). +The user after free problem that is read again in close(). +Through the NBD_ Save the value of client > closing before the out process in trip +to solve the use after free problem. + +Signed-off-by: wangjian161 +--- + nbd/server.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 895cf0a752..e8baed9705 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -2939,6 +2939,7 @@ static coroutine_fn void nbd_trip(void *opaque) + NBDRequestData *req; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; ++ bool client_closing; + Error *local_err = NULL; + + trace_nbd_trip(); +@@ -3023,8 +3024,11 @@ disconnect: + if (local_err) { + error_reportf_err(local_err, "Disconnect client, due to: "); + } ++ client_closing = client->closing; + nbd_request_put(req); +- client_close(client, true); ++ if (!client_closing) { ++ client_close(client, true); ++ } + nbd_client_put(client); + } + +-- +2.27.0 + diff --git a/net-dump.c-Suppress-spurious-compiler-warning.patch b/net-dump.c-Suppress-spurious-compiler-warning.patch new file mode 100644 index 0000000000000000000000000000000000000000..c35ee6f95844981753fcfd9b57510d7131817685 --- /dev/null +++ b/net-dump.c-Suppress-spurious-compiler-warning.patch @@ -0,0 +1,51 @@ +From 6999f07558308ee6b7d63e46ca554a0b702948d6 Mon Sep 17 00:00:00 2001 +From: liuxiangdong +Date: Tue, 8 Feb 2022 15:10:25 +0800 +Subject: [PATCH] net/dump.c: Suppress spurious compiler warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Compiling with gcc version 11.2.0 (Ubuntu 11.2.0-13ubuntu1) results in +a (spurious) warning: + + In function ‘dump_receive_iov’, + inlined from ‘filter_dump_receive_iov’ at ../net/dump.c:157:5: + ../net/dump.c:89:9: error: ‘writev’ specified size 18446744073709551600 +exceeds maximum object size 9223372036854775807 [-Werror=stringop-overflow=] + 89 | if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + In file included from /home/ptomsich/qemu/include/qemu/osdep.h:108, + from ../net/dump.c:25: + ../net/dump.c: In function ‘filter_dump_receive_iov’: + /usr/include/x86_64-linux-gnu/sys/uio.h:52:16: note: in a call to function +‘writev’ declared with attribute ‘read_only (2, 3)’ + 52 | extern ssize_t writev (int __fd, const struct iovec *__iovec, int +__count) + | ^~~~~~ + cc1: all warnings being treated as errors + +This change helps that version of GCC to understand what is going on +and suppresses this warning. + +Signed-off-by: Philipp Tomsich +--- + net/dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/dump.c b/net/dump.c +index 16073f2458..d880a7e299 100644 +--- a/net/dump.c ++++ b/net/dump.c +@@ -87,7 +87,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt, + dumpiov[0].iov_len = sizeof(hdr); + cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, offset, caplen); + +- if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { ++ if (writev(s->fd, &dumpiov[0], cnt + 1) != sizeof(hdr) + caplen) { + error_report("network dump write error - stopping dump"); + close(s->fd); + s->fd = -1; +-- +2.27.0 + diff --git a/net-eepro100-validate-various-address-valuesi-CVE-20.patch b/net-eepro100-validate-various-address-valuesi-CVE-20.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc259d62847299da04ed6a498ce88bdbc9896931 --- /dev/null +++ b/net-eepro100-validate-various-address-valuesi-CVE-20.patch @@ -0,0 +1,58 @@ +From 6e6215b3ad0c8eac918bca9e2b5bb661e27f2fed Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Sat, 18 Dec 2021 09:39:57 +0800 +Subject: [PATCH] net: eepro100: validate various address + valuesi(CVE-2021-20255) + +fix CVE-2021-20255 + +patch link: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg06098.html + +fix CVE-2021-20255, sync patch from ostms platform. + +Signed-off-by: zhouli57 +Signed-off-by: Yan Wang +--- + hw/net/eepro100.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index 69e1c4bb89..f6204ec059 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -279,6 +279,9 @@ typedef struct { + /* Quasi static device properties (no need to save them). */ + uint16_t stats_size; + bool has_extended_tcb_support; ++ ++ /* Flag to avoid recursions. */ ++ bool busy; + } EEPRO100State; + + /* Word indices in EEPROM. */ +@@ -844,6 +847,14 @@ static void action_command(EEPRO100State *s) + Therefore we limit the number of iterations. */ + unsigned max_loop_count = 16; + ++ if (s->busy) { ++ /* Prevent recursions. */ ++ logout("recursion in %s:%u\n", __FILE__, __LINE__); ++ return; ++ } ++ ++ s->busy = true; ++ + for (;;) { + bool bit_el; + bool bit_s; +@@ -940,6 +951,7 @@ static void action_command(EEPRO100State *s) + } + TRACE(OTHER, logout("CU list empty\n")); + /* List is empty. Now CU is idle or suspended. */ ++ s->busy = false; + } + + static void eepro100_cu_command(EEPRO100State * s, uint8_t val) +-- +2.27.0 + diff --git a/net-fix-build-when-libbpf-is-disabled-but-libxdp-is-.patch b/net-fix-build-when-libbpf-is-disabled-but-libxdp-is-.patch new file mode 100644 index 0000000000000000000000000000000000000000..ecd0c23e0e6678503aa9e4a9ae47bf6a81c89653 --- /dev/null +++ b/net-fix-build-when-libbpf-is-disabled-but-libxdp-is-.patch @@ -0,0 +1,76 @@ +From 17835e803d0cfa308cd00f070c7e21b27f3d036e Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 22 Mar 2025 15:38:09 +0800 +Subject: [PATCH] net: fix build when libbpf is disabled, but libxdp is enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 1f37280b37dbf85f36748f359a9f8802c8fe7ccd + +The net/af-xdp.c code is enabled when the libxdp library is present, +however, it also has direct API calls to bpf_xdp_query_id & +bpf_xdp_detach which are provided by the libbpf library. + +As a result if building with --disable-libbpf, but libxdp gets +auto-detected, we'll fail to link QEMU + + /usr/bin/ld: libcommon.a.p/net_af-xdp.c.o: undefined reference to symbol 'bpf_xdp_query_id@@LIBBPF_0.7.0' + +There are two bugs here + + * Since we have direct libbpf API calls, when building + net/af-xdp.c, we must tell meson that libbpf is a + dependancy, so that we directly link to it, rather + than relying on indirect linkage. + + * When must skip probing for libxdp at all, when libbpf + is not found, raising an error if --enable-libxdp was + given explicitly. + +Fixes: cb039ef3d9e3112da01e1ecd9b136ac9809ef733 +Signed-off-by: Daniel P. Berrangé +Signed-off-by: Jason Wang +Signed-off-by: gubin +--- + meson.build | 10 ++++++++-- + net/meson.build | 2 +- + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/meson.build b/meson.build +index 4078f2aced..aea6a33ca3 100644 +--- a/meson.build ++++ b/meson.build +@@ -1972,8 +1972,14 @@ endif + # libxdp + libxdp = not_found + if not get_option('af_xdp').auto() or have_system +- libxdp = dependency('libxdp', required: get_option('af_xdp'), +- version: '>=1.4.0', method: 'pkg-config') ++ if libbpf.found() ++ libxdp = dependency('libxdp', required: get_option('af_xdp'), ++ version: '>=1.4.0', method: 'pkg-config') ++ else ++ if get_option('af_xdp').enabled() ++ error('libxdp requested, but libbpf is not available') ++ endif ++ endif + endif + + # libdw +diff --git a/net/meson.build b/net/meson.build +index ce99bd4447..7264479242 100644 +--- a/net/meson.build ++++ b/net/meson.build +@@ -37,7 +37,7 @@ if have_netmap + system_ss.add(files('netmap.c')) + endif + +-system_ss.add(when: libxdp, if_true: files('af-xdp.c')) ++system_ss.add(when: [libxdp, libbpf], if_true: files('af-xdp.c')) + + if have_vhost_net_user + system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) +-- +2.41.0.windows.1 + diff --git a/net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch b/net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch deleted file mode 100644 index a96c178294ea7c2afe5a860f4021efe361a6a18f..0000000000000000000000000000000000000000 --- a/net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch +++ /dev/null @@ -1,46 +0,0 @@ -From cb6048ace290e770b0ec1a6011209192541d3e8a Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Fri, 30 Oct 2020 10:46:55 +0800 -Subject: [PATCH] net/l2tpv3: Remove redundant check in net_init_l2tpv3() - -The result has been checked to be NULL before, it cannot be NULL here, -so the check is redundant. Remove it. - -Reported-by: Euler Robot -Signed-off-by: AlexChen -Signed-off-by: Jason Wang -(cherry-picked from commit d949fe64b0) ---- - net/l2tpv3.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/net/l2tpv3.c b/net/l2tpv3.c -index 55fea17c0f..e4d4218db6 100644 ---- a/net/l2tpv3.c -+++ b/net/l2tpv3.c -@@ -655,9 +655,8 @@ int net_init_l2tpv3(const Netdev *netdev, - error_setg(errp, "could not bind socket err=%i", errno); - goto outerr; - } -- if (result) { -- freeaddrinfo(result); -- } -+ -+ freeaddrinfo(result); - - memset(&hints, 0, sizeof(hints)); - -@@ -686,9 +685,7 @@ int net_init_l2tpv3(const Netdev *netdev, - memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); - s->dst_size = result->ai_addrlen; - -- if (result) { -- freeaddrinfo(result); -- } -+ freeaddrinfo(result); - - if (l2tpv3->has_counter && l2tpv3->counter) { - s->has_counter = true; --- -2.27.0 - diff --git a/net-remove-an-assert-call-in-eth_get_gso_type.patch b/net-remove-an-assert-call-in-eth_get_gso_type.patch deleted file mode 100644 index 79d740922a2c2fd766b19c89d68df49a6d0a96f1..0000000000000000000000000000000000000000 --- a/net-remove-an-assert-call-in-eth_get_gso_type.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 9b210ed120ac82e647ed99be3679bab2bc55932b Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Wed, 21 Oct 2020 11:35:50 +0530 -Subject: [PATCH] net: remove an assert call in eth_get_gso_type - -fix CVE-2020-27617 - -eth_get_gso_type() routine returns segmentation offload type based on -L3 protocol type. It calls g_assert_not_reached if L3 protocol is -unknown, making the following return statement unreachable. Remove the -g_assert call, it maybe triggered by a guest user. - -Reported-by: Gaoning Pan -Signed-off-by: Prasad J Pandit -Signed-off-by: Jason Wang - -cherry-pick from commit 7564bf7701f00214cdc8a678a9f7df765244def1 -Signed-off-by: Jiajie Li ---- - net/eth.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/eth.c b/net/eth.c -index 0c1d413ee2..1e0821c5f8 100644 ---- a/net/eth.c -+++ b/net/eth.c -@@ -16,6 +16,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/log.h" - #include "net/eth.h" - #include "net/checksum.h" - #include "net/tap.h" -@@ -71,9 +72,8 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) - return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; - } - } -- -- /* Unsupported offload */ -- g_assert_not_reached(); -+ qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, " -+ "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto); - - return VIRTIO_NET_HDR_GSO_NONE | ecn_state; - } --- -2.27.0 - diff --git a/net-vmxnet3-validate-configuration-values-during-act.patch b/net-vmxnet3-validate-configuration-values-during-act.patch deleted file mode 100644 index a4ed4bccc5534b5b6f04d61ea7c4bccfc72e3fa8..0000000000000000000000000000000000000000 --- a/net-vmxnet3-validate-configuration-values-during-act.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 18d22b1f2b2f89bbdd77bd4d62e0fe42f19b3962 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 9 Mar 2021 17:37:20 +0800 -Subject: [PATCH] net: vmxnet3: validate configuration values during activate - (CVE-2021-20203) - -fix CVE-2021-20203 #I3A34O - -While activating device in vmxnet3_acticate_device(), it does not -validate guest supplied configuration values against predefined -minimum - maximum limits. This may lead to integer overflow or -OOB access issues. Add checks to avoid it. - -Fixes: CVE-2021-20203 -Buglink: https://bugs.launchpad.net/qemu/+bug/1913873 -Reported-by: Gaoning Pan -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/net/vmxnet3.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c -index 10d01d0058..ecc4f5bcf0 100644 ---- a/hw/net/vmxnet3.c -+++ b/hw/net/vmxnet3.c -@@ -1418,6 +1418,7 @@ static void vmxnet3_activate_device(VMXNET3State *s) - vmxnet3_setup_rx_filtering(s); - /* Cache fields from shared memory */ - s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); -+ assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU); - VMW_CFPRN("MTU is %u", s->mtu); - - s->max_rx_frags = -@@ -1471,7 +1472,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) - /* Read rings memory locations for TX queues */ - pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA); - size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize); -- -+ if (size > VMXNET3_TX_RING_MAX_SIZE) { -+ size = VMXNET3_TX_RING_MAX_SIZE; -+ } - vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size, - sizeof(struct Vmxnet3_TxDesc), false); - VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring); -@@ -1481,6 +1484,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) - /* TXC ring */ - pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA); - size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize); -+ if (size > VMXNET3_TC_RING_MAX_SIZE) { -+ size = VMXNET3_TC_RING_MAX_SIZE; -+ } - vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size, - sizeof(struct Vmxnet3_TxCompDesc), true); - VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); -@@ -1522,6 +1528,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) - /* RX rings */ - pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]); - size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]); -+ if (size > VMXNET3_RX_RING_MAX_SIZE) { -+ size = VMXNET3_RX_RING_MAX_SIZE; -+ } - vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size, - sizeof(struct Vmxnet3_RxDesc), false); - VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", -@@ -1531,6 +1540,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) - /* RXC ring */ - pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA); - size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize); -+ if (size > VMXNET3_RC_RING_MAX_SIZE) { -+ size = VMXNET3_RC_RING_MAX_SIZE; -+ } - vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size, - sizeof(struct Vmxnet3_RxCompDesc), true); - VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); --- -2.27.0 - diff --git a/next-kbd-convert-to-use-qemu_input_handler_register.patch b/next-kbd-convert-to-use-qemu_input_handler_register.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c601fd4064c83ee52cb87554d8a77c194f43b58 --- /dev/null +++ b/next-kbd-convert-to-use-qemu_input_handler_register.patch @@ -0,0 +1,224 @@ +From 91e07a78026caafa181134beeb8c5b79157718ad Mon Sep 17 00:00:00 2001 +From: Mark Cave-Ayland +Date: Wed, 6 Nov 2024 12:09:27 +0000 +Subject: [PATCH] next-kbd: convert to use qemu_input_handler_register() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Convert the next-kbd device from the legacy UI qemu_add_kbd_event_handler() +function to use qemu_input_handler_register(). + +Signed-off-by: Mark Cave-Ayland +Reviewed-by: Thomas Huth +Reviewed-by: Daniel P. Berrangé +Message-ID: <20241106120928.242443-2-mark.cave-ayland@ilande.co.uk> +[thuth: Removed the NEXTKBD_NO_KEY definition - replaced by 0 now] +Signed-off-by: Thomas Huth +Signed-off-by: Zhongrui Tang +--- + hw/m68k/next-kbd.c | 158 +++++++++++++++++++++++++++++---------------- + 1 file changed, 103 insertions(+), 55 deletions(-) + +diff --git a/hw/m68k/next-kbd.c b/hw/m68k/next-kbd.c +index 0c348c18cf..880ebe3602 100644 +--- a/hw/m68k/next-kbd.c ++++ b/hw/m68k/next-kbd.c +@@ -68,7 +68,6 @@ struct NextKBDState { + uint16_t shift; + }; + +-static void queue_code(void *opaque, int code); + + /* lots of magic numbers here */ + static uint32_t kbd_read_byte(void *opaque, hwaddr addr) +@@ -166,68 +165,70 @@ static const MemoryRegionOps kbd_ops = { + .endianness = DEVICE_NATIVE_ENDIAN, + }; + +-static void nextkbd_event(void *opaque, int ch) +-{ +- /* +- * Will want to set vars for caps/num lock +- * if (ch & 0x80) -> key release +- * there's also e0 escaped scancodes that might need to be handled +- */ +- queue_code(opaque, ch); +-} +- +-static const unsigned char next_keycodes[128] = { +- 0x00, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x50, 0x4F, +- 0x4E, 0x1E, 0x1F, 0x20, 0x1D, 0x1C, 0x1B, 0x00, +- 0x42, 0x43, 0x44, 0x45, 0x48, 0x47, 0x46, 0x06, +- 0x07, 0x08, 0x00, 0x00, 0x2A, 0x00, 0x39, 0x3A, +- 0x3B, 0x3C, 0x3D, 0x40, 0x3F, 0x3E, 0x2D, 0x2C, +- 0x2B, 0x26, 0x00, 0x00, 0x31, 0x32, 0x33, 0x34, +- 0x35, 0x37, 0x36, 0x2e, 0x2f, 0x30, 0x00, 0x00, +- 0x00, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ++static const int qcode_to_nextkbd_keycode[] = { ++ [Q_KEY_CODE_ESC] = 0x49, ++ [Q_KEY_CODE_1] = 0x4a, ++ [Q_KEY_CODE_2] = 0x4b, ++ [Q_KEY_CODE_3] = 0x4c, ++ [Q_KEY_CODE_4] = 0x4d, ++ [Q_KEY_CODE_5] = 0x50, ++ [Q_KEY_CODE_6] = 0x4f, ++ [Q_KEY_CODE_7] = 0x4e, ++ [Q_KEY_CODE_8] = 0x1e, ++ [Q_KEY_CODE_9] = 0x1f, ++ [Q_KEY_CODE_0] = 0x20, ++ [Q_KEY_CODE_MINUS] = 0x1d, ++ [Q_KEY_CODE_EQUAL] = 0x1c, ++ [Q_KEY_CODE_BACKSPACE] = 0x1b, ++ ++ [Q_KEY_CODE_Q] = 0x42, ++ [Q_KEY_CODE_W] = 0x43, ++ [Q_KEY_CODE_E] = 0x44, ++ [Q_KEY_CODE_R] = 0x45, ++ [Q_KEY_CODE_T] = 0x48, ++ [Q_KEY_CODE_Y] = 0x47, ++ [Q_KEY_CODE_U] = 0x46, ++ [Q_KEY_CODE_I] = 0x06, ++ [Q_KEY_CODE_O] = 0x07, ++ [Q_KEY_CODE_P] = 0x08, ++ [Q_KEY_CODE_RET] = 0x2a, ++ [Q_KEY_CODE_A] = 0x39, ++ [Q_KEY_CODE_S] = 0x3a, ++ ++ [Q_KEY_CODE_D] = 0x3b, ++ [Q_KEY_CODE_F] = 0x3c, ++ [Q_KEY_CODE_G] = 0x3d, ++ [Q_KEY_CODE_H] = 0x40, ++ [Q_KEY_CODE_J] = 0x3f, ++ [Q_KEY_CODE_K] = 0x3e, ++ [Q_KEY_CODE_L] = 0x2d, ++ [Q_KEY_CODE_SEMICOLON] = 0x2c, ++ [Q_KEY_CODE_APOSTROPHE] = 0x2b, ++ [Q_KEY_CODE_GRAVE_ACCENT] = 0x26, ++ [Q_KEY_CODE_Z] = 0x31, ++ [Q_KEY_CODE_X] = 0x32, ++ [Q_KEY_CODE_C] = 0x33, ++ [Q_KEY_CODE_V] = 0x34, ++ ++ [Q_KEY_CODE_B] = 0x35, ++ [Q_KEY_CODE_N] = 0x37, ++ [Q_KEY_CODE_M] = 0x36, ++ [Q_KEY_CODE_COMMA] = 0x2e, ++ [Q_KEY_CODE_DOT] = 0x2f, ++ [Q_KEY_CODE_SLASH] = 0x30, ++ ++ [Q_KEY_CODE_SPC] = 0x38, + }; + +-static void queue_code(void *opaque, int code) ++static void nextkbd_put_keycode(NextKBDState *s, int keycode) + { +- NextKBDState *s = NEXTKBD(opaque); + KBDQueue *q = &s->queue; +- int key = code & KD_KEYMASK; +- int release = code & 0x80; +- static int ext; +- +- if (code == 0xE0) { +- ext = 1; +- } +- +- if (code == 0x2A || code == 0x1D || code == 0x36) { +- if (code == 0x2A) { +- s->shift = KD_LSHIFT; +- } else if (code == 0x36) { +- s->shift = KD_RSHIFT; +- ext = 0; +- } else if (code == 0x1D && !ext) { +- s->shift = KD_LCOMM; +- } else if (code == 0x1D && ext) { +- ext = 0; +- s->shift = KD_RCOMM; +- } +- return; +- } else if (code == (0x2A | 0x80) || code == (0x1D | 0x80) || +- code == (0x36 | 0x80)) { +- s->shift = 0; +- return; +- } + + if (q->count >= KBD_QUEUE_SIZE) { + return; + } + +- q->data[q->wptr] = next_keycodes[key] | release; +- ++ q->data[q->wptr] = keycode; + if (++q->wptr == KBD_QUEUE_SIZE) { + q->wptr = 0; + } +@@ -241,6 +242,53 @@ static void queue_code(void *opaque, int code) + /* s->update_irq(s->update_arg, 1); */ + } + ++static void nextkbd_event(DeviceState *dev, QemuConsole *src, InputEvent *evt) ++{ ++ NextKBDState *s = NEXTKBD(dev); ++ int qcode, keycode; ++ bool key_down = evt->u.key.data->down; ++ ++ qcode = qemu_input_key_value_to_qcode(evt->u.key.data->key); ++ if (qcode >= ARRAY_SIZE(qcode_to_nextkbd_keycode)) { ++ return; ++ } ++ ++ /* Shift key currently has no keycode, so handle separately */ ++ if (qcode == Q_KEY_CODE_SHIFT) { ++ if (key_down) { ++ s->shift |= KD_LSHIFT; ++ } else { ++ s->shift &= ~KD_LSHIFT; ++ } ++ } ++ ++ if (qcode == Q_KEY_CODE_SHIFT_R) { ++ if (key_down) { ++ s->shift |= KD_RSHIFT; ++ } else { ++ s->shift &= ~KD_RSHIFT; ++ } ++ } ++ ++ keycode = qcode_to_nextkbd_keycode[qcode]; ++ if (!keycode) { ++ return; ++ } ++ ++ /* If key release event, create keyboard break code */ ++ if (!key_down) { ++ keycode |= 0x80; ++ } ++ ++ nextkbd_put_keycode(s, keycode); ++} ++ ++static const QemuInputHandler nextkbd_handler = { ++ .name = "QEMU NeXT Keyboard", ++ .mask = INPUT_EVENT_MASK_KEY, ++ .event = nextkbd_event, ++}; ++ + static void nextkbd_reset(DeviceState *dev) + { + NextKBDState *nks = NEXTKBD(dev); +@@ -256,7 +304,7 @@ static void nextkbd_realize(DeviceState *dev, Error **errp) + memory_region_init_io(&s->mr, OBJECT(dev), &kbd_ops, s, "next.kbd", 0x1000); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mr); + +- qemu_add_kbd_event_handler(nextkbd_event, s); ++ qemu_input_handler_register(dev, &nextkbd_handler); + } + + static const VMStateDescription nextkbd_vmstate = { +-- +2.41.0.windows.1 + diff --git a/nvram-add-nrf51_soc-flash-read-method.patch b/nvram-add-nrf51_soc-flash-read-method.patch deleted file mode 100644 index 915bbf8a1e6eecaa0974f14db93847cedded7d85..0000000000000000000000000000000000000000 --- a/nvram-add-nrf51_soc-flash-read-method.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 6f88633406e546eb6a01786b910a2ab12373abf8 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:19:15 +0800 -Subject: [PATCH] nvram: add nrf51_soc flash read method - -fix CVE-2020-15469 - -Add nrf51_soc mmio read method to avoid NULL pointer dereference -issue. - -Reported-by: Lei Sun -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/nvram/nrf51_nvm.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/nvram/nrf51_nvm.c b/hw/nvram/nrf51_nvm.c -index eca0cb35b5..7b2b1351f4 100644 ---- a/hw/nvram/nrf51_nvm.c -+++ b/hw/nvram/nrf51_nvm.c -@@ -271,6 +271,10 @@ static const MemoryRegionOps io_ops = { - .endianness = DEVICE_LITTLE_ENDIAN, - }; - -+static uint64_t flash_read(void *opaque, hwaddr offset, unsigned size) -+{ -+ g_assert_not_reached(); -+} - - static void flash_write(void *opaque, hwaddr offset, uint64_t value, - unsigned int size) -@@ -298,6 +302,7 @@ static void flash_write(void *opaque, hwaddr offset, uint64_t value, - - - static const MemoryRegionOps flash_ops = { -+ .read = flash_read, - .write = flash_write, - .valid.min_access_size = 4, - .valid.max_access_size = 4, --- -2.27.0 - diff --git a/object-return-self-in-object_ref.patch b/object-return-self-in-object_ref.patch deleted file mode 100644 index e851fb30d20a4c56b65bb7ce1c6ddb9109c643aa..0000000000000000000000000000000000000000 --- a/object-return-self-in-object_ref.patch +++ /dev/null @@ -1,58 +0,0 @@ -From b77ade9bb37b2e9813a42008cb21d0c743aa50a1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 10 Jan 2020 19:30:31 +0400 -Subject: [PATCH] object: return self in object_ref() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This allow for simpler assignment with ref: foo = object_ref(bar) - -Signed-off-by: Marc-André Lureau -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20200110153039.1379601-19-marcandre.lureau@redhat.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Zhenyu Ye ---- - include/qom/object.h | 3 ++- - qom/object.c | 5 +++-- - 2 files changed, 5 insertions(+), 3 deletions(-) - -diff --git a/include/qom/object.h b/include/qom/object.h -index 5e2f60d4b0..18660fde1c 100644 ---- a/include/qom/object.h -+++ b/include/qom/object.h -@@ -1005,8 +1005,9 @@ GSList *object_class_get_list_sorted(const char *implements_type, - * - * Increase the reference count of a object. A object cannot be freed as long - * as its reference count is greater than zero. -+ * Returns: @obj - */ --void object_ref(Object *obj); -+Object *object_ref(Object *obj); - - /** - * object_unref: -diff --git a/qom/object.c b/qom/object.c -index 66c4a5f1cb..555c8b9d07 100644 ---- a/qom/object.c -+++ b/qom/object.c -@@ -1107,12 +1107,13 @@ GSList *object_class_get_list_sorted(const char *implements_type, - object_class_cmp); - } - --void object_ref(Object *obj) -+Object *object_ref(Object *obj) - { - if (!obj) { -- return; -+ return NULL; - } - atomic_inc(&obj->ref); -+ return obj; - } - - void object_unref(Object *obj) --- -2.22.0.windows.1 - diff --git a/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch b/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch new file mode 100644 index 0000000000000000000000000000000000000000..d25778669c3421fb77c4a787d5852d1f708738e8 --- /dev/null +++ b/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch @@ -0,0 +1,57 @@ +From b6c45f5ea5d1a379ac0a507cf59345c573b27cc8 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 14:21:39 +0800 +Subject: [PATCH] oslib-posix: optimise vm startup time for 1G hugepage + +It takes quit a long time to clear 1G-hugepage, which makes glibc +pthread_create quit slow. +Create touch_pages threads in advance, and then handle the touch_pages +callback. Only read lock is held here. +--- + util/oslib-posix.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index e86fd64e09..9ca3fee2b8 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -88,6 +88,8 @@ static QemuMutex sigbus_mutex; + static QemuMutex page_mutex; + static QemuCond page_cond; + ++static int started_num_threads; ++ + int qemu_get_thread_id(void) + { + #if defined(__linux__) +@@ -344,6 +346,10 @@ static void *do_touch_pages(void *arg) + } + qemu_mutex_unlock(&page_mutex); + ++ while (started_num_threads != memset_args->context.num_threads) { ++ smp_mb(); ++ } ++ + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); +@@ -448,7 +454,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, + context.threads = g_new0(MemsetThread, context.num_threads); + numpages_per_thread = numpages / context.num_threads; + leftover = numpages % context.num_threads; +- for (i = 0; i < context.num_threads; i++) { ++ for (i = 0, started_num_threads = 0; i < context.num_threads; i++) { + context.threads[i].addr = addr; + context.threads[i].numpages = numpages_per_thread + (i < leftover); + context.threads[i].hpagesize = hpagesize; +@@ -464,6 +470,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, + QEMU_THREAD_JOINABLE); + } + addr += context.threads[i].numpages * hpagesize; ++ started_num_threads++; + } + + if (!use_madv_populate_write) { +-- +2.27.0 + diff --git a/parallels-fix-ext_off-assertion-failure-due-to-overf.patch b/parallels-fix-ext_off-assertion-failure-due-to-overf.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f9770ceda419e01d39d0dd2f31f714a483b4491 --- /dev/null +++ b/parallels-fix-ext_off-assertion-failure-due-to-overf.patch @@ -0,0 +1,47 @@ +From 44cf15f26215a07876d78d8ee63f0fb10ce2d1d4 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Wed, 5 Feb 2025 07:07:13 -0500 +Subject: [PATCH] parallels: fix ext_off assertion failure due to overflow + +cheery-pick from 58607752d173438994d28dea7e2c2587726663e6 + +This error was discovered by fuzzing qemu-img. + +When ph.ext_off has a sufficiently large value, the operation +le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS in +parallels_read_format_extension() can cause an overflow in int64_t. +This overflow triggers the assert(ext_off > 0) +check in block/parallels-ext.c: parallels_read_format_extension(), +leading to a crash. + +This commit adds a check to prevent overflow when shifting ph.ext_off +by BDRV_SECTOR_BITS, ensuring that the value remains within a valid range. + +Reported-by: Leonid Reviakin +Signed-off-by: Denis Rastyogin +Reviewed-by: Denis V. Lunev +Message-ID: <20241212104212.513947-2-gerben@altlinux.org> +Signed-off-by: Stefan Hajnoczi +Signed-off-by: qihao_yewu +--- + block/parallels.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/block/parallels.c b/block/parallels.c +index 9205a0864f..8f2b58e1c9 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -1298,6 +1298,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, + error_setg(errp, "Catalog too large"); + return -EFBIG; + } ++ if (le64_to_cpu(ph.ext_off) >= (INT64_MAX >> BDRV_SECTOR_BITS)) { ++ error_setg(errp, "Invalid image: Too big offset"); ++ return -EFBIG; ++ } + + size = bat_entry_off(s->bat_size); + s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs)); +-- +2.41.0.windows.1 + diff --git a/pc-Don-t-make-die-id-mandatory-unless-necessary.patch b/pc-Don-t-make-die-id-mandatory-unless-necessary.patch deleted file mode 100644 index c51b40f33020e36547f44b895b040acc07bf741c..0000000000000000000000000000000000000000 --- a/pc-Don-t-make-die-id-mandatory-unless-necessary.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 7ebcd375ade505358c1c45542de22f188c599bdd Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Fri, 16 Aug 2019 14:07:50 -0300 -Subject: [PATCH] pc: Don't make die-id mandatory unless necessary - -We have this issue reported when using libvirt to hotplug CPUs: -https://bugzilla.redhat.com/show_bug.cgi?id=1741451 - -Basically, libvirt is not copying die-id from -query-hotpluggable-cpus, but die-id is now mandatory. - -We could blame libvirt and say it is not following the documented -interface, because we have this buried in the QAPI schema -documentation: - -> Note: currently there are 5 properties that could be present -> but management should be prepared to pass through other -> properties with device_add command to allow for future -> interface extension. This also requires the filed names to be kept in -> sync with the properties passed to -device/device_add. - -But I don't think this would be reasonable from us. We can just -make QEMU more flexible and let die-id to be omitted when there's -no ambiguity. This will allow us to keep compatibility with -existing libvirt versions. - -Test case included to ensure we don't break this again. - -Fixes: commit 176d2cda0dee ("i386/cpu: Consolidate die-id validity in smp context") -Signed-off-by: Eduardo Habkost -Message-Id: <20190816170750.23910-1-ehabkost@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit fea374e7c8079563bca7c8fac895c6a880f76adc) -Signed-off-by: Michael Roth ---- - hw/i386/pc.c | 8 ++++++ - tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++ - 2 files changed, 43 insertions(+) - create mode 100644 tests/acceptance/pc_cpu_hotplug_props.py - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 549c437050..947f81070f 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2403,6 +2403,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, - int max_socket = (ms->smp.max_cpus - 1) / - smp_threads / smp_cores / pcms->smp_dies; - -+ /* -+ * die-id was optional in QEMU 4.0 and older, so keep it optional -+ * if there's only one die per socket. -+ */ -+ if (cpu->die_id < 0 && pcms->smp_dies == 1) { -+ cpu->die_id = 0; -+ } -+ - if (cpu->socket_id < 0) { - error_setg(errp, "CPU socket-id is not set"); - return; -diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/acceptance/pc_cpu_hotplug_props.py -new file mode 100644 -index 0000000000..08b7e632c6 ---- /dev/null -+++ b/tests/acceptance/pc_cpu_hotplug_props.py -@@ -0,0 +1,35 @@ -+# -+# Ensure CPU die-id can be omitted on -device -+# -+# Copyright (c) 2019 Red Hat Inc -+# -+# Author: -+# Eduardo Habkost -+# -+# This library is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public -+# License as published by the Free Software Foundation; either -+# version 2 of the License, or (at your option) any later version. -+# -+# This library is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+# -+# You should have received a copy of the GNU Lesser General Public -+# License along with this library; if not, see . -+# -+ -+from avocado_qemu import Test -+ -+class OmittedCPUProps(Test): -+ """ -+ :avocado: tags=arch:x86_64 -+ """ -+ def test_no_die_id(self): -+ self.vm.add_args('-nodefaults', '-S') -+ self.vm.add_args('-smp', '1,sockets=2,cores=2,threads=2,maxcpus=8') -+ self.vm.add_args('-cpu', 'qemu64') -+ self.vm.add_args('-device', 'qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id=0') -+ self.vm.launch() -+ self.assertEquals(len(self.vm.command('query-cpus')), 2) --- -2.23.0 diff --git a/pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch b/pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch deleted file mode 100644 index 6e29f08d69c56a2907892008c9c4ce177778c097..0000000000000000000000000000000000000000 --- a/pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch +++ /dev/null @@ -1,34 +0,0 @@ -From d2bb5b4c4ed3b1dbc0096deb195b6df33f813f23 Mon Sep 17 00:00:00 2001 -From: Yifan Luo -Date: Wed, 14 Aug 2019 14:14:26 +0800 -Subject: [PATCH 5/5] pc-bios/s390-ccw/net: fix a possible memory leak in - get_uuid() - -There is a possible memory leak in get_uuid(). Should free allocated mem -before -return NULL. - -Signed-off-by: Yifan Luo -Message-Id: <02cf01d55267$86cf2850$946d78f0$@cmss.chinamobile.com> -Reviewed-by: Thomas Huth -Reviewed-by: Cornelia Huck -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/netmain.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c -index f3542cb2..f2dcc01e 100644 ---- a/pc-bios/s390-ccw/netmain.c -+++ b/pc-bios/s390-ccw/netmain.c -@@ -269,6 +269,7 @@ static const char *get_uuid(void) - : "d" (r0), "d" (r1), [addr] "a" (buf) - : "cc", "memory"); - if (cc) { -+ free(mem); - return NULL; - } - --- -2.23.0 - diff --git a/pci-Add-return_page_response-pci-ops.patch b/pci-Add-return_page_response-pci-ops.patch deleted file mode 100644 index 133762085ba7a4a01fd5e909a3c17cd45a975978..0000000000000000000000000000000000000000 --- a/pci-Add-return_page_response-pci-ops.patch +++ /dev/null @@ -1,86 +0,0 @@ -From e3b498a1afec138693251bf1bd1fa9b322a880fb Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 Nov 2020 14:34:35 +0100 -Subject: [PATCH] pci: Add return_page_response pci ops - -Add a new PCI operation that allows to return page responses -to registered VFIO devices - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/pci/pci.c | 16 ++++++++++++++++ - include/hw/iommu/iommu.h | 8 ++++++++ - include/hw/pci/pci.h | 4 ++++ - 3 files changed, 28 insertions(+) - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index f11ca7964e..a8b3d1c071 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -2660,6 +2660,22 @@ int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, - return -ENOENT; - } - -+int pci_device_return_page_response(PCIBus *bus, int32_t devfn, -+ IOMMUPageResponse *resp) -+{ -+ PCIDevice *dev; -+ -+ if (!bus) { -+ return -EINVAL; -+ } -+ -+ dev = bus->devices[devfn]; -+ if (dev && dev->pasid_ops && dev->pasid_ops->return_page_response) { -+ return dev->pasid_ops->return_page_response(bus, devfn, resp); -+ } -+ return -ENOENT; -+} -+ - static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) - { - Range *range = opaque; -diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h -index 12092bda7b..5890f095b1 100644 ---- a/include/hw/iommu/iommu.h -+++ b/include/hw/iommu/iommu.h -@@ -24,5 +24,13 @@ typedef struct IOMMUConfig { - }; - } IOMMUConfig; - -+typedef struct IOMMUPageResponse { -+ union { -+#ifdef __linux__ -+ struct iommu_page_response resp; -+#endif -+ }; -+} IOMMUPageResponse; -+ - - #endif /* QEMU_HW_IOMMU_IOMMU_H */ -diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h -index bb14ed61b0..5e7e0e4e6f 100644 ---- a/include/hw/pci/pci.h -+++ b/include/hw/pci/pci.h -@@ -266,6 +266,8 @@ typedef struct PCIReqIDCache PCIReqIDCache; - - struct PCIPASIDOps { - int (*set_pasid_table)(PCIBus *bus, int32_t devfn, IOMMUConfig *config); -+ int (*return_page_response)(PCIBus *bus, int32_t devfn, -+ IOMMUPageResponse *resp); - }; - typedef struct PCIPASIDOps PCIPASIDOps; - -@@ -495,6 +497,8 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque); - void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops); - bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn); - int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, IOMMUConfig *config); -+int pci_device_return_page_response(PCIBus *bus, int32_t devfn, -+ IOMMUPageResponse *resp); - - static inline void - pci_set_byte(uint8_t *config, uint8_t val) --- -2.27.0 - diff --git a/pci-Get-pasid-capability-from-vIOMMU.patch b/pci-Get-pasid-capability-from-vIOMMU.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ba0cdef63449a0c3754011f2b7ebd20caf429b6 --- /dev/null +++ b/pci-Get-pasid-capability-from-vIOMMU.patch @@ -0,0 +1,68 @@ +From 494e0ace6c120af00b27a0cc1d4a478073654e35 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Thu, 12 Sep 2024 00:33:13 -0700 +Subject: [PATCH] pci: Get pasid capability from vIOMMU + +Signed-off-by: Yi Liu +--- + hw/pci/pci.c | 13 +++++++++++++ + include/hw/pci/pci.h | 13 +++++++++++++ + 2 files changed, 26 insertions(+) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index d6f627aa51..447ef2b163 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2802,6 +2802,19 @@ void pci_device_unset_iommu_device(PCIDevice *dev) + } + } + ++bool pci_device_get_pasid_cap(PCIDevice *dev) ++{ ++ PCIBus *iommu_bus; ++ ++ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); ++ if (iommu_bus && iommu_bus->iommu_ops->get_pasid_cap) { ++ return iommu_bus->iommu_ops->get_pasid_cap(pci_get_bus(dev), ++ iommu_bus->iommu_opaque, ++ dev->devfn); ++ } ++ return false; ++} ++ + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) + { + /* +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index 8d1af44249..0dfe274c33 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -418,12 +418,25 @@ typedef struct PCIIOMMUOps { + * @devfn: device and function number of the PCI device. + */ + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); ++ /** ++ * @get_pasid_cap: get pasid capability from vIOMMU ++ * ++ * Optional callback. ++ * ++ * @bus: the #PCIBus of the PCI device. ++ * ++ * @opaque: the data passed to pci_setup_iommu(). ++ * ++ * @devfn: device and function number of the PCI device. ++ */ ++ bool (*get_pasid_cap)(PCIBus *bus, void *opaque, int devfn); + } PCIIOMMUOps; + + AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); + bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, + Error **errp); + void pci_device_unset_iommu_device(PCIDevice *dev); ++bool pci_device_get_pasid_cap(PCIDevice *dev); + + /** + * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus +-- +2.41.0.windows.1 + diff --git a/pci-check-bus-pointer-before-dereference.patch b/pci-check-bus-pointer-before-dereference.patch deleted file mode 100644 index 540caaf945f28b4e13e53a299495528cb0703b5d..0000000000000000000000000000000000000000 --- a/pci-check-bus-pointer-before-dereference.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e393095e6d1456e2fb22f3cde3a9f0a307152562 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Wed, 14 Oct 2020 15:00:20 +0800 -Subject: [PATCH] pci: check bus pointer before dereference - -fix CVE-2020-25742 - -patch link: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg05294.html - -While mapping IRQ level in pci_change_irq_level() routine, -it does not check if pci_get_bus() returned a valid pointer. -It may lead to a NULL pointer dereference issue. Add check to -avoid it. - - -> https://ruhr-uni-bochum.sciebo.de/s/NNWP2GfwzYKeKwE?path=%2Flsi_nullptr1 - ==1183858==Hint: address points to the zero page. - #0 pci_change_irq_level hw/pci/pci.c:259 - #1 pci_irq_handler hw/pci/pci.c:1445 - #2 pci_set_irq hw/pci/pci.c:1463 - #3 lsi_set_irq hw/scsi/lsi53c895a.c:488 - #4 lsi_update_irq hw/scsi/lsi53c895a.c:523 - #5 lsi_script_scsi_interrupt hw/scsi/lsi53c895a.c:554 - #6 lsi_execute_script hw/scsi/lsi53c895a.c:1149 - #7 lsi_reg_writeb hw/scsi/lsi53c895a.c:1984 - #8 lsi_io_write hw/scsi/lsi53c895a.c:2146 - ... - -Reported-by: Ruhr-University -Signed-off-by: Prasad J Pandit ---- - hw/pci/pci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index de0fae10ab..df5a2c3294 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -253,6 +253,9 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) - PCIBus *bus; - for (;;) { - bus = pci_get_bus(pci_dev); -+ if (!bus) { -+ return; -+ } - irq_num = bus->map_irq(pci_dev, irq_num); - if (bus->set_irq) - break; --- -2.23.0 - diff --git a/pci-host-add-pcie-msi-read-method.patch b/pci-host-add-pcie-msi-read-method.patch deleted file mode 100644 index 7433fb0c3ce55f68593f3d612663c4db3a9960cd..0000000000000000000000000000000000000000 --- a/pci-host-add-pcie-msi-read-method.patch +++ /dev/null @@ -1,56 +0,0 @@ -From dd86dc83fcccc0d1773bd93c509e3a03e7ef9b38 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:08:24 +0800 -Subject: [PATCH] pci-host: add pcie-msi read method - -fix CVE-2020-15469 - -Add pcie-msi mmio read method to avoid NULL pointer dereference -issue. - -Reported-by: Lei Sun -Reviewed-by: Li Qiang -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/pci-host/designware.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c -index 9ae8c0deb7..23e3de3cad 100644 ---- a/hw/pci-host/designware.c -+++ b/hw/pci-host/designware.c -@@ -21,6 +21,7 @@ - #include "qemu/osdep.h" - #include "qapi/error.h" - #include "qemu/module.h" -+#include "qemu/log.h" - #include "hw/pci/msi.h" - #include "hw/pci/pci_bridge.h" - #include "hw/pci/pci_host.h" -@@ -60,6 +61,13 @@ designware_pcie_root_to_host(DesignwarePCIERoot *root) - return DESIGNWARE_PCIE_HOST(bus->parent); - } - -+static uint64_t designware_pcie_root_msi_read(void *opaque, hwaddr addr, -+ unsigned size) -+{ -+ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); -+ return 0; -+} -+ - static void designware_pcie_root_msi_write(void *opaque, hwaddr addr, - uint64_t val, unsigned len) - { -@@ -74,6 +82,7 @@ static void designware_pcie_root_msi_write(void *opaque, hwaddr addr, - } - - static const MemoryRegionOps designware_pci_host_msi_ops = { -+ .read = designware_pcie_root_msi_read, - .write = designware_pcie_root_msi_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .valid = { --- -2.27.0 - diff --git a/pci-introduce-PCIPASIDOps-to-PCIDevice.patch b/pci-introduce-PCIPASIDOps-to-PCIDevice.patch deleted file mode 100644 index e89cdc8df7130e348c916988d8b85f39ec4b6d19..0000000000000000000000000000000000000000 --- a/pci-introduce-PCIPASIDOps-to-PCIDevice.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 26adddfe4645b69c16ed8d6601f373d40bddd0e3 Mon Sep 17 00:00:00 2001 -From: Liu Yi L -Date: Fri, 5 Jul 2019 19:01:36 +0800 -Subject: [PATCH] pci: introduce PCIPASIDOps to PCIDevice - -This patch introduces PCIPASIDOps for IOMMU related operations. - -https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00078.html -https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00940.html - -So far, to setup virt-SVA for assigned SVA capable device, needs to -configure host translation structures for specific pasid. (e.g. bind -guest page table to host and enable nested translation in host). -Besides, vIOMMU emulator needs to forward guest's cache invalidation -to host since host nested translation is enabled. e.g. on VT-d, guest -owns 1st level translation table, thus cache invalidation for 1st -level should be propagated to host. - -This patch adds two functions: alloc_pasid and free_pasid to support -guest pasid allocation and free. The implementations of the callbacks -would be device passthru modules. Like vfio. - -Cc: Kevin Tian -Cc: Jacob Pan -Cc: Peter Xu -Cc: Eric Auger -Cc: Yi Sun -Cc: David Gibson -Signed-off-by: Liu Yi L -Signed-off-by: Yi Sun -Signed-off-by: Kunkun Jiang ---- - hw/pci/pci.c | 34 ++++++++++++++++++++++++++++++++++ - include/hw/pci/pci.h | 11 +++++++++++ - 2 files changed, 45 insertions(+) - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index e74143ccc3..f11ca7964e 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -2626,6 +2626,40 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque) - bus->iommu_opaque = opaque; - } - -+void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops) -+{ -+ assert(ops && !dev->pasid_ops); -+ dev->pasid_ops = ops; -+} -+ -+bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn) -+{ -+ PCIDevice *dev; -+ -+ if (!bus) { -+ return false; -+ } -+ -+ dev = bus->devices[devfn]; -+ return !!(dev && dev->pasid_ops); -+} -+ -+int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, -+ IOMMUConfig *config) -+{ -+ PCIDevice *dev; -+ -+ if (!bus) { -+ return -EINVAL; -+ } -+ -+ dev = bus->devices[devfn]; -+ if (dev && dev->pasid_ops && dev->pasid_ops->set_pasid_table) { -+ return dev->pasid_ops->set_pasid_table(bus, devfn, config); -+ } -+ return -ENOENT; -+} -+ - static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) - { - Range *range = opaque; -diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h -index aaf1b9f70d..bb14ed61b0 100644 ---- a/include/hw/pci/pci.h -+++ b/include/hw/pci/pci.h -@@ -9,6 +9,7 @@ - #include "hw/isa/isa.h" - - #include "hw/pci/pcie.h" -+#include "hw/iommu/iommu.h" - - extern bool pci_available; - -@@ -263,6 +264,11 @@ struct PCIReqIDCache { - }; - typedef struct PCIReqIDCache PCIReqIDCache; - -+struct PCIPASIDOps { -+ int (*set_pasid_table)(PCIBus *bus, int32_t devfn, IOMMUConfig *config); -+}; -+typedef struct PCIPASIDOps PCIPASIDOps; -+ - struct PCIDevice { - DeviceState qdev; - -@@ -352,6 +358,7 @@ struct PCIDevice { - MSIVectorUseNotifier msix_vector_use_notifier; - MSIVectorReleaseNotifier msix_vector_release_notifier; - MSIVectorPollNotifier msix_vector_poll_notifier; -+ PCIPASIDOps *pasid_ops; - }; - - void pci_register_bar(PCIDevice *pci_dev, int region_num, -@@ -485,6 +492,10 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int); - AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); - void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque); - -+void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops); -+bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn); -+int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, IOMMUConfig *config); -+ - static inline void - pci_set_byte(uint8_t *config, uint8_t val) - { --- -2.27.0 - diff --git a/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch b/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch index bf5b144d12407aacbd859ebd813b8f303126f789..d86d8ab72d3851993421c2d3e778a5508d351177 100644 --- a/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch +++ b/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch @@ -1,7 +1,7 @@ -From 55c4f093b3a527c52cc8ed7138c330512973c9e6 Mon Sep 17 00:00:00 2001 -From: fangying -Date: Wed, 18 Mar 2020 12:49:33 +0800 -Subject: [PATCH 1/2] pcie: Add pcie-root-port fast plug/unplug feature +From 3c4b4c4fc3c71b375490233bb9209763d7094ee9 Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 8 Feb 2022 16:10:31 +0800 +Subject: [PATCH] pcie: Add pcie-root-port fast plug/unplug feature If a device is plugged in the pcie-root-port when VM kernel is booting, the kernel may wrongly disable the device. @@ -15,108 +15,85 @@ so we must fix it up. We hack into the pcie native hotplug patch so that hotplug/unplug will work under this circumstance. Signed-off-by: Ying Fang +Signed-off-by: Yan Wang --- - hw/core/machine.c | 1 + - hw/pci-bridge/gen_pcie_root_port.c | 3 ++- - hw/pci/pcie.c | 23 +++++++++++++++++++---- - include/hw/pci/pcie_port.h | 3 ++- - 4 files changed, 24 insertions(+), 6 deletions(-) + hw/core/machine.c | 2 ++ + hw/pci-bridge/gen_pcie_root_port.c | 2 ++ + hw/pci/pcie.c | 13 ++++++++++++- + include/hw/pci/pcie_port.h | 3 +++ + 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/hw/core/machine.c b/hw/core/machine.c -index 2baf9ec3..3138f97b 100644 +index 0c17398141..965682619b 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -33,6 +33,7 @@ GlobalProperty hw_compat_3_1[] = { +@@ -160,6 +160,8 @@ const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); + GlobalProperty hw_compat_3_1[] = { { "pcie-root-port", "x-speed", "2_5" }, { "pcie-root-port", "x-width", "1" }, - { "pcie-root-port", "fast-plug", "0" }, ++ { "pcie-root-port", "fast-plug", "0" }, + { "pcie-root-port", "fast-unplug", "0" }, { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, { "tpm-crb", "ppi", "false" }, diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c -index 3179c4ea..2fbb11d0 100644 +index 1ce4e7beba..1e1ab5bb19 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c -@@ -131,7 +131,8 @@ static Property gen_rp_props[] = { +@@ -145,6 +145,8 @@ static Property gen_rp_props[] = { speed, PCIE_LINK_SPEED_16), DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, width, PCIE_LINK_WIDTH_32), -- DEFINE_PROP_UINT8("fast-plug", PCIESlot, disable_lnksta_dllla, 0), + DEFINE_PROP_UINT8("fast-plug", PCIESlot, fast_plug, 0), + DEFINE_PROP_UINT8("fast-unplug", PCIESlot, fast_unplug, 0), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index c0d6ff13..2a8ff86d 100644 +index dccf204451..04fbd794a8 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c -@@ -85,7 +85,7 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) - * To fix this up, let's enable the PCI_EXP_LNKSTA_DLLLA - * only if it is a PCIESlot device. - */ -- if (s == NULL || s->disable_lnksta_dllla == 0) { -+ if (s == NULL || s->fast_plug == 0) { - if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { - pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); -@@ -136,8 +136,11 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) - */ - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, - PCI_EXP_LNKCAP_DLLLARC); -- pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, -- PCI_EXP_LNKSTA_DLLLA); -+ -+ if(s->fast_plug == 0) { -+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, -+ PCI_EXP_LNKSTA_DLLLA); -+ } - - /* - * Target Link Speed defaults to the highest link speed supported by -@@ -477,6 +480,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, - Error *local_err = NULL; - PCIDevice *pci_dev = PCI_DEVICE(dev); - PCIBus *bus = pci_get_bus(pci_dev); -+ PCIESlot *s = PCIE_SLOT(PCI_DEVICE(hotplug_dev)); -+ uint8_t *exp_cap = pci_dev->config + pci_dev->exp.exp_cap; +@@ -555,6 +555,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; + uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); ++ PCIESlot *s = PCIE_SLOT(hotplug_pdev); - pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &local_err); - if (local_err) { -@@ -495,7 +500,17 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + /* Check if hot-unplug is disabled on the slot */ + if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { +@@ -600,7 +601,17 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } -- pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev)); +- pcie_cap_slot_push_attention_button(hotplug_pdev); + if ((pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) && s->fast_plug) { -+ pci_word_test_and_clear_mask(exp_cap+ PCI_EXP_LNKSTA, ++ pci_word_test_and_clear_mask(pci_dev->config + pci_dev->exp.exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } + + if (s->fast_unplug) { -+ pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), ++ pcie_cap_slot_event(hotplug_pdev, + PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + } else { -+ pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev)); ++ pcie_cap_slot_push_attention_button(hotplug_pdev); + } } /* pci express slot for pci express root/downstream port diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h -index c3969921..b57af4ee 100644 +index 90e6cf45b8..7148a0959b 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h -@@ -50,7 +50,8 @@ struct PCIESlot { +@@ -56,6 +56,9 @@ struct PCIESlot { uint8_t chassis; uint16_t slot; -- uint8_t disable_lnksta_dllla; + uint8_t fast_plug; + uint8_t fast_unplug; - ++ PCIExpLinkSpeed speed; PCIExpLinkWidth width; + -- -2.19.1 +2.27.0 diff --git a/pcie-Compat-with-devices-which-do-not-support-Link-W.patch b/pcie-Compat-with-devices-which-do-not-support-Link-W.patch index f5c261916fb66e0a54844ddea65ab70fe2bfdee8..9897182af472d0d12369ed8b945d7b380730df9b 100644 --- a/pcie-Compat-with-devices-which-do-not-support-Link-W.patch +++ b/pcie-Compat-with-devices-which-do-not-support-Link-W.patch @@ -1,23 +1,24 @@ -From 5e1ad9f0f3c344b9fe20fc01ea2f1dfb8ac7fd67 Mon Sep 17 00:00:00 2001 +From 6c72e65d57dc2a7d811f76a126a9a006abd0ab75 Mon Sep 17 00:00:00 2001 From: fangying Date: Wed, 18 Mar 2020 12:51:33 +0800 -Subject: [PATCH 2/2] pcie: Compat with devices which do not support Link - Width, such as ioh3420 +Subject: [PATCH] pcie: Compat with devices which do not support Link Width, + such as ioh3420 We hack into PCI_EXP_LNKCAP to support device fast plug/unplug for pcie-root-port. However some devices like ioh3420 does not suport it, so PCI_EXP_LNKCAP is not set for such devices. Signed-off-by: Ying Fang +Signed-off-by: Yan Wang --- hw/pci/pcie.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index 2a8ff86d..5044bff4 100644 +index 6db0cf69cd..dccf204451 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c -@@ -108,13 +108,6 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) +@@ -97,13 +97,6 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) return; } @@ -31,7 +32,7 @@ index 2a8ff86d..5044bff4 100644 /* * Link bandwidth notification is required for all root ports and * downstream ports supporting links wider than x1 or multiple link -@@ -122,6 +115,12 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) +@@ -111,6 +104,12 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) */ if (s->width > QEMU_PCI_EXP_LNK_X1 || s->speed > QEMU_PCI_EXP_LNK_2_5GT) { @@ -45,5 +46,5 @@ index 2a8ff86d..5044bff4 100644 PCI_EXP_LNKCAP_LBNC); } -- -2.19.1 +2.27.0 diff --git a/pcie-disable-the-PCI_EXP_LINKSTA_DLLA-cap.patch b/pcie-disable-the-PCI_EXP_LINKSTA_DLLA-cap.patch deleted file mode 100644 index dc31a503345e1599abe5cf879b8edfeff4ee22e9..0000000000000000000000000000000000000000 --- a/pcie-disable-the-PCI_EXP_LINKSTA_DLLA-cap.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 7381599d4222f9b5cff6935a66e8b311af77f620 Mon Sep 17 00:00:00 2001 -From: Li Mingwang -Date: Thu, 17 Oct 2019 16:57:52 +0800 -Subject: [PATCH] Subject: [PATCH] pcie: disable the PCI_EXP_LINKSTA_DLLA cap - for pcie-root-port by default - -If the PCI_EXP_LNKSTA_DLLLA capability is set by default, linux -kernel will send PDC event to detect whether there is a device in -pcie slot. If a device is pluged in the pcie-root-port at the same -time, hot-plug device will send ABP + PDC events to the kernel. The -VM kernel will wrongly unplug the device if two PDC events get too -close. Thus we'd better set the PCI_EXP_LNKSTA_DLLLA capability only -in hotplug scenario - -Signed-off-by: Li Mingwang ---- - hw/core/machine.c | 1 + - hw/pci-bridge/gen_pcie_root_port.c | 1 + - hw/pci/pcie.c | 18 ++++++++++++++---- - include/hw/pci/pcie_port.h | 2 ++ - 4 files changed, 18 insertions(+), 4 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5d046a43..29a708da 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -30,6 +30,7 @@ const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); - GlobalProperty hw_compat_3_1[] = { - { "pcie-root-port", "x-speed", "2_5" }, - { "pcie-root-port", "x-width", "1" }, -+ { "pcie-root-port", "fast-plug", "0" }, - { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, - { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, - { "tpm-crb", "ppi", "false" }, -diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c -index 26bda73e..3179c4ea 100644 ---- a/hw/pci-bridge/gen_pcie_root_port.c -+++ b/hw/pci-bridge/gen_pcie_root_port.c -@@ -131,6 +131,7 @@ static Property gen_rp_props[] = { - speed, PCIE_LINK_SPEED_16), - DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, - width, PCIE_LINK_WIDTH_32), -+ DEFINE_PROP_UINT8("fast-plug", PCIESlot, disable_lnksta_dllla, 0), - DEFINE_PROP_END_OF_LIST() - }; - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index cf1ca30f..c0d6ff13 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -50,6 +50,7 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) - { - uint8_t *exp_cap = dev->config + dev->exp.exp_cap; - uint8_t *cmask = dev->cmask + dev->exp.exp_cap; -+ PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT); - - /* capability register - interrupt message number defaults to 0 */ -@@ -76,11 +77,20 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) - QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) | - QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT)); - -- if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { -- pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, -- PCI_EXP_LNKSTA_DLLLA); -+ /* If a device is plugged in the pcie-root-port when VM kernel -+ * is just booting, the kernel will wrongly disable the device. -+ * This bug was brought in two patches of the linux kernel, i.e. -+ * https://patchwork.kernel.org/patch/10575355/ and -+ * https://patchwork.kernel.org/patch/10766219/. -+ * To fix this up, let's enable the PCI_EXP_LNKSTA_DLLLA -+ * only if it is a PCIESlot device. -+ */ -+ if (s == NULL || s->disable_lnksta_dllla == 0) { -+ if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { -+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, -+ PCI_EXP_LNKSTA_DLLLA); -+ } - } -- - /* We changed link status bits over time, and changing them across - * migrations is generally fine as hardware changes them too. - * Let's not bother checking. -diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h -index 09586f46..c3969921 100644 ---- a/include/hw/pci/pcie_port.h -+++ b/include/hw/pci/pcie_port.h -@@ -50,6 +50,8 @@ struct PCIESlot { - uint8_t chassis; - uint16_t slot; - -+ uint8_t disable_lnksta_dllla; -+ - PCIExpLinkSpeed speed; - PCIExpLinkWidth width; - --- -2.19.1 - diff --git a/pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch b/pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch new file mode 100644 index 0000000000000000000000000000000000000000..015ba30eb31cfadfcbd25abccb7a00172f29dabe --- /dev/null +++ b/pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch @@ -0,0 +1,37 @@ +From 632ec38ed57b76baf3e499d1789aeea0f74df0a5 Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki +Date: Wed, 28 Feb 2024 20:33:13 +0900 +Subject: [PATCH] pcie_sriov: Validate NumVFs (CVE-2024-26327) + +The guest may write NumVFs greater than TotalVFs and that can lead +to buffer overflow in VF implementations. + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2024-26327 +Fixes: 7c0fa8dff811 ("pcie: Add support for Single Root I/O Virtualization (SR/IOV)") +Signed-off-by: Akihiko Odaki +Message-Id: <20240228-reuse-v8-2-282660281e60@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Sriram Yagnaraman +--- + hw/pci/pcie_sriov.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c +index a1fe65f5d8..da209b7f47 100644 +--- a/hw/pci/pcie_sriov.c ++++ b/hw/pci/pcie_sriov.c +@@ -176,6 +176,9 @@ static void register_vfs(PCIDevice *dev) + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); ++ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { ++ return; ++ } + + dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); + +-- +2.27.0 + diff --git a/physmem-Add-helper-function-to-destroy-CPU-AddressSp.patch b/physmem-Add-helper-function-to-destroy-CPU-AddressSp.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d3413d07b954188d94a397bc4d5582663040a77 --- /dev/null +++ b/physmem-Add-helper-function-to-destroy-CPU-AddressSp.patch @@ -0,0 +1,93 @@ +From 7efd5d829730d0481659cda91f725df3b141f469 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 16 Jul 2024 12:15:01 +0100 +Subject: [PATCH 66/78] physmem: Add helper function to destroy CPU + AddressSpace + +Virtual CPU Hot-unplug leads to unrealization of a CPU object. This also +involves destruction of the CPU AddressSpace. Add common function to help +destroy the CPU AddressSpace. + +Signed-off-by: Salil Mehta +Tested-by: Vishnu Pajjuri +Reviewed-by: Gavin Shan +Tested-by: Xianglai Li +Tested-by: Miguel Luis +Reviewed-by: Shaoqin Huang +Tested-by: Zhao Liu +Acked-by: Igor Mammedov +Message-Id: <20240716111502.202344-7-salil.mehta@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Xianglai Li +--- + include/hw/core/cpu.h | 4 ++-- + system/physmem.c | 18 +++++++++++------- + 2 files changed, 13 insertions(+), 9 deletions(-) + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index ee04ee44c2..37f3a469c8 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -495,8 +495,8 @@ struct CPUState { + QemuMutex work_mutex; + QSIMPLEQ_HEAD(, qemu_work_item) work_list; + +- CPUAddressSpace *cpu_ases; +- int cpu_ases_ref_count; ++ struct CPUAddressSpace *cpu_ases; ++ int cpu_ases_count; + int num_ases; + AddressSpace *as; + MemoryRegion *memory; +diff --git a/system/physmem.c b/system/physmem.c +index 2c8b83f811..c50ac24786 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -761,7 +761,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx, + + if (!cpu->cpu_ases) { + cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); +- cpu->cpu_ases_ref_count = cpu->num_ases; ++ cpu->cpu_ases_count = cpu->num_ases; + } + + newas = &cpu->cpu_ases[asidx]; +@@ -779,24 +779,28 @@ void cpu_address_space_destroy(CPUState *cpu, int asidx) + { + CPUAddressSpace *cpuas; + +- assert(asidx < cpu->num_ases); +- assert(asidx == 0 || !kvm_enabled()); + assert(cpu->cpu_ases); ++ assert(asidx >= 0 && asidx < cpu->num_ases); ++ /* KVM cannot currently support multiple address spaces. */ ++ assert(asidx == 0 || !kvm_enabled()); + + cpuas = &cpu->cpu_ases[asidx]; + if (tcg_enabled()) { + memory_listener_unregister(&cpuas->tcg_as_listener); + } + +- cpuas->as->free_in_rcu = true; + address_space_destroy(cpuas->as); ++ g_free_rcu(cpuas->as, rcu); + +- if (cpu->cpu_ases_ref_count == 1) { ++ if (asidx == 0) { ++ /* reset the convenience alias for address space 0 */ ++ cpu->as = NULL; ++ } ++ ++ if (--cpu->cpu_ases_count == 0) { + g_free(cpu->cpu_ases); + cpu->cpu_ases = NULL; + } +- +- cpu->cpu_ases_ref_count--; + } + + AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) +-- +2.39.1 + diff --git a/physmem-Bail-out-qemu_ram_block_from_host-for-invali.patch b/physmem-Bail-out-qemu_ram_block_from_host-for-invali.patch new file mode 100644 index 0000000000000000000000000000000000000000..280e6aeb8ed484cf4dedc8f7c1e707bf45313b54 --- /dev/null +++ b/physmem-Bail-out-qemu_ram_block_from_host-for-invali.patch @@ -0,0 +1,40 @@ +From 39eae397a6b573505c0e84cc808cd9765a950908 Mon Sep 17 00:00:00 2001 +From: guping +Date: Mon, 15 Jul 2024 00:54:12 +0000 +Subject: [PATCH] physmem: Bail out qemu_ram_block_from_host() for invalid ram + addrs cherry-pick from 596ccccdbfa124adb42be8c2faf0c74f4849c7a6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Bail out in qemu_ram_block_from_host() when +xen_ram_addr_from_mapcache() does not find an existing +mapping. + +Signed-off-by: default avatarEdgar E. Iglesias +Reviewed-by: default avatarAlex Bennée +Reviewed-by: default avatarStefano Stabellini + +Signed-off-by: guping +--- + system/physmem.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/system/physmem.c b/system/physmem.c +index cbe838f203..0c629233bd 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -2263,6 +2263,10 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, + ram_addr_t ram_addr; + RCU_READ_LOCK_GUARD(); + ram_addr = xen_ram_addr_from_mapcache(ptr); ++ if (ram_addr == RAM_ADDR_INVALID) { ++ return NULL; ++ } ++ + block = qemu_get_ram_block(ram_addr); + if (block) { + *offset = ram_addr - block->offset; +-- +2.41.0.windows.1 + diff --git a/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch b/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch new file mode 100644 index 0000000000000000000000000000000000000000..696a9a49357b61cd9d645c657c5aa38830276e88 --- /dev/null +++ b/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch @@ -0,0 +1,127 @@ +From 8fa5af7de07d9bc2535ea8fab087d509795e3579 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 22:12:52 +0000 +Subject: [PATCH] physmem,gdbstub: Common helping funcs/changes to *unrealize* + vCPU + +Supporting vCPU Hotplug for ARM arch also means introducing new functionality of +unrealizing the ARMCPU. This requires some new common functions. + +Defining them as part of architecture independent change so that this code could +be reused by other interested parties. + +Signed-off-by: Salil Mehta +--- + gdbstub/gdbstub.c | 6 ++++++ + include/exec/cpu-common.h | 8 ++++++++ + include/exec/gdbstub.h | 1 + + include/hw/core/cpu.h | 1 + + system/physmem.c | 25 +++++++++++++++++++++++++ + 5 files changed, 41 insertions(+) + +diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c +index 46d752bbc2..f16006d2a8 100644 +--- a/gdbstub/gdbstub.c ++++ b/gdbstub/gdbstub.c +@@ -582,6 +582,12 @@ void gdb_register_coprocessor(CPUState *cpu, + } + } + ++void gdb_unregister_coprocessor_all(CPUState *cpu) ++{ ++ g_array_free(cpu->gdb_regs, true); ++ cpu->gdb_regs = NULL; ++} ++ + static void gdb_process_breakpoint_remove_all(GDBProcess *p) + { + CPUState *cpu = gdb_get_first_cpu_in_process(p); +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 41115d8919..2a3d4aa1c8 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -139,6 +139,14 @@ size_t qemu_ram_pagesize_largest(void); + */ + void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr); ++/** ++ * cpu_address_space_destroy: ++ * @cpu: CPU for which address space needs to be destroyed ++ * @asidx: integer index of this address space ++ * ++ * Note that with KVM only one address space is supported. ++ */ ++void cpu_address_space_destroy(CPUState *cpu, int asidx); + + void cpu_physical_memory_rw(hwaddr addr, void *buf, + hwaddr len, bool is_write); +diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h +index d8a3c56fa2..d123b838c2 100644 +--- a/include/exec/gdbstub.h ++++ b/include/exec/gdbstub.h +@@ -39,6 +39,7 @@ typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t *buf, int reg); + void gdb_register_coprocessor(CPUState *cpu, + gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg, + int num_regs, const char *xml, int g_pos); ++void gdb_unregister_coprocessor_all(CPUState *cpu); + + /** + * gdbserver_start: start the gdb server +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 0ca778eb75..6dbe163548 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -496,6 +496,7 @@ struct CPUState { + QSIMPLEQ_HEAD(, qemu_work_item) work_list; + + CPUAddressSpace *cpu_ases; ++ int cpu_ases_ref_count; + int num_ases; + AddressSpace *as; + MemoryRegion *memory; +diff --git a/system/physmem.c b/system/physmem.c +index 247c252e53..299174ad91 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -761,6 +761,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx, + + if (!cpu->cpu_ases) { + cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); ++ cpu->cpu_ases_ref_count = cpu->num_ases; + } + + newas = &cpu->cpu_ases[asidx]; +@@ -774,6 +775,30 @@ void cpu_address_space_init(CPUState *cpu, int asidx, + } + } + ++void cpu_address_space_destroy(CPUState *cpu, int asidx) ++{ ++ CPUAddressSpace *cpuas; ++ ++ assert(asidx < cpu->num_ases); ++ assert(asidx == 0 || !kvm_enabled()); ++ assert(cpu->cpu_ases); ++ ++ cpuas = &cpu->cpu_ases[asidx]; ++ if (tcg_enabled()) { ++ memory_listener_unregister(&cpuas->tcg_as_listener); ++ } ++ ++ address_space_destroy(cpuas->as); ++ g_free_rcu(cpuas->as, rcu); ++ ++ if (cpu->cpu_ases_ref_count == 1) { ++ g_free(cpu->cpu_ases); ++ cpu->cpu_ases = NULL; ++ } ++ ++ cpu->cpu_ases_ref_count--; ++} ++ + AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) + { + /* Return the AddressSpace corresponding to the specified index */ +-- +2.27.0 + diff --git a/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch index b375c20f10426f4328d288d1be037609c06842de..d879b781bc47781c33814d8be006901a62fa5c80 100644 --- a/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch +++ b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch @@ -1,4 +1,4 @@ -From f995e8b5e5c14f83a16433f192440ec5c82c87fa Mon Sep 17 00:00:00 2001 +From e730214f4485ad444d8a1db9a284da53f407e8da Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Mon, 29 Jul 2019 16:16:35 +0800 Subject: [PATCH] pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff @@ -17,15 +17,16 @@ https://www.spinics.net/lists/linux-serial/msg23163.html Signed-off-by: Haibin Wang Reviewed-by: Shannon Zhao Reviewed-by: Ying Fang +Signed-off-by: Yan Wang --- hw/char/pl011.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/char/pl011.c b/hw/char/pl011.c -index e5dd448f..899745ef 100644 +index 58edeb9ddb..bc65d778d2 100644 --- a/hw/char/pl011.c +++ b/hw/char/pl011.c -@@ -223,6 +223,10 @@ static void pl011_write(void *opaque, hwaddr offset, +@@ -314,6 +314,10 @@ static void pl011_write(void *opaque, hwaddr offset, case 17: /* UARTICR */ s->int_level &= ~value; pl011_update(s); @@ -37,5 +38,5 @@ index e5dd448f..899745ef 100644 case 18: /* UARTDMACR */ s->dmacr = value; -- -2.19.1 +2.27.0 diff --git a/pl031-support-rtc-timer-property-for-pl031.patch b/pl031-support-rtc-timer-property-for-pl031.patch index 7bd977af364da6871f7b0b7f8cb62af2f23ab26c..48f4d1bad790b6041e6696cd3912e6c77fdeab8e 100644 --- a/pl031-support-rtc-timer-property-for-pl031.patch +++ b/pl031-support-rtc-timer-property-for-pl031.patch @@ -1,23 +1,26 @@ -From 68d4653b5ffde629e9b05d5de13b6adcde9d153b Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Mon, 29 Jul 2019 16:20:51 +0800 +From 8e30e81c4268103d502587de565842b9632a7965 Mon Sep 17 00:00:00 2001 +From: Jinhao Gao +Date: Tue, 15 Feb 2022 17:02:08 +0800 Subject: [PATCH] pl031: support rtc-timer property for pl031 This patch adds the rtc-timer property for pl031, we can get the rtc time (UTC) through qmp command "qom-get date" with this property. Signed-off-by: Haibin Wang -Reviewed-by: Shannon Zhao +Reviewed-by: Shannon Zhao Reviewed-by: Ying Fang +Signed-off-by: Keqian Zhu +Signed-off-by: Jinhao Gao +Signed-off-by: Yuan Zhang --- - hw/timer/pl031.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) + hw/rtc/pl031.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) -diff --git a/hw/timer/pl031.c b/hw/timer/pl031.c -index 274ad47a..04331472 100644 ---- a/hw/timer/pl031.c -+++ b/hw/timer/pl031.c -@@ -57,6 +57,15 @@ static uint32_t pl031_get_count(PL031State *s) +diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c +index f2e6baebba..57e9a35616 100644 +--- a/hw/rtc/pl031.c ++++ b/hw/rtc/pl031.c +@@ -63,6 +63,15 @@ static uint32_t pl031_get_count(PL031State *s) return s->tick_offset + now / NANOSECONDS_PER_SECOND; } @@ -33,17 +36,36 @@ index 274ad47a..04331472 100644 static void pl031_set_alarm(PL031State *s) { uint32_t ticks; -@@ -191,6 +200,10 @@ static void pl031_init(Object *obj) +@@ -202,6 +211,20 @@ static void pl031_init(Object *obj) qemu_clock_get_ns(rtc_clock) / NANOSECONDS_PER_SECOND; s->timer = timer_new_ns(rtc_clock, pl031_interrupt, s); ++ object_property_add_tm(OBJECT(s), "date", pl031_get_date); ++} + -+ object_property_add_tm(OBJECT(s), "date", pl031_get_date, NULL); ++static void pl031_realize(DeviceState *d, Error **errp) ++{ + object_property_add_alias(qdev_get_machine(), "rtc-time", -+ OBJECT(s), "date", NULL); ++ OBJECT(d), "date"); ++} ++ ++static void pl031_unrealize(DeviceState *d) ++{ ++ if (object_property_find(qdev_get_machine(), "rtc-time")) { ++ object_property_del(qdev_get_machine(), "rtc-time"); ++ } + } + + static void pl031_finalize(Object *obj) +@@ -338,6 +361,8 @@ static void pl031_class_init(ObjectClass *klass, void *data) + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_pl031; ++ dc->realize = pl031_realize; ++ dc->unrealize = pl031_unrealize; + device_class_set_props(dc, pl031_properties); } - static int pl031_pre_save(void *opaque) -- -2.19.1 +2.27.0 diff --git a/platform-bus-fix-refcount-leak.patch b/platform-bus-fix-refcount-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..750d84d2e4a662a61ef0379d53127cb81da18f70 --- /dev/null +++ b/platform-bus-fix-refcount-leak.patch @@ -0,0 +1,42 @@ +From 0b23e1ad9e27fa60525b3d014da0425d2c24885f Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 17 Oct 2024 13:19:51 +0800 +Subject: [PATCH] platform-bus: fix refcount leak + +cheery-pick from 99ec7b440a1d6a6ef07450b68687d24d13a25fb5 + +memory_region_find() returns an MR which it is the caller's +responsibility to unref, but platform_bus_map_mmio() was +forgetting to do so, thus leaking the MR. + +Signed-off-by: Gao Shiyuan gaoshiyuan@baidu.com +Message-id: 20240829131005.9196-1-gaoshiyuan@baidu.com +Reviewed-by: Peter Maydell peter.maydell@linaro.org +[PMM: tweaked commit message] +Signed-off-by: Peter Maydell peter.maydell@linaro.org +Signed-off-by: Zhang Jiao +--- + hw/core/platform-bus.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/core/platform-bus.c b/hw/core/platform-bus.c +index b8487b26b6..dc58bf505a 100644 +--- a/hw/core/platform-bus.c ++++ b/hw/core/platform-bus.c +@@ -145,9 +145,12 @@ static void platform_bus_map_mmio(PlatformBusDevice *pbus, SysBusDevice *sbdev, + * the target device's memory region + */ + for (off = 0; off < pbus->mmio_size; off += alignment) { +- if (!memory_region_find(&pbus->mmio, off, size).mr) { ++ MemoryRegion *mr = memory_region_find(&pbus->mmio, off, size).mr; ++ if (!mr) { + found_region = true; + break; ++ } else { ++ memory_region_unref(mr); + } + } + +-- +2.41.0.windows.1 + diff --git a/plugins-loader-fix-deadlock-when-resetting-uninstall.patch b/plugins-loader-fix-deadlock-when-resetting-uninstall.patch new file mode 100644 index 0000000000000000000000000000000000000000..433a1416f3127bfd03cdbf83dfb88146ee57ceba --- /dev/null +++ b/plugins-loader-fix-deadlock-when-resetting-uninstall.patch @@ -0,0 +1,39 @@ +From 105b30f3406bb968b1eb87a6127a988cfb0a3022 Mon Sep 17 00:00:00 2001 +From: songjie +Date: Wed, 13 Aug 2025 14:46:12 +0800 +Subject: [PATCH] plugins/loader: fix deadlock when resetting/uninstalling a + plugin +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reported and fixed by Dmitry Kurakin. + +Fixes: #2901 + +Signed-off-by: default avatarPierrick Bouvier +Message-Id: <20250404032027.430575-2-pierrick.bouvier@linaro.org> +Signed-off-by: default avatarAlex Bennée +Reviewed-by: default avatarPhilippe Mathieu-Daudé +(cherry picked from commit c07cd110) +Signed-off-by: default avatarMichael Tokarev +--- + plugins/loader.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/plugins/loader.c b/plugins/loader.c +index 734c11cae0..1f06dfa970 100644 +--- a/plugins/loader.c ++++ b/plugins/loader.c +@@ -374,7 +374,7 @@ static void plugin_reset_destroy(struct qemu_plugin_reset_data *data) + { + qemu_rec_mutex_lock(&plugin.lock); + plugin_reset_destroy__locked(data); +- qemu_rec_mutex_lock(&plugin.lock); ++ qemu_rec_mutex_unlock(&plugin.lock); + } + + static void plugin_flush_destroy(CPUState *cpu, run_on_cpu_data arg) +-- +2.33.0 + diff --git a/ppc-pnv-I2C-controller-is-not-user-creatablei.patch b/ppc-pnv-I2C-controller-is-not-user-creatablei.patch new file mode 100644 index 0000000000000000000000000000000000000000..254ef7dcde6dc003cdfd7fef0cc1bf913d7b84d3 --- /dev/null +++ b/ppc-pnv-I2C-controller-is-not-user-creatablei.patch @@ -0,0 +1,46 @@ +From e2a4aed3ef07b05302ab4d15017b720fec97905f Mon Sep 17 00:00:00 2001 +From: gaojiazhen +Date: Mon, 25 Mar 2024 18:04:40 +0800 +Subject: [PATCH] ppc/pnv: I2C controller is not user creatablei +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry picked from commit 5b2b9450a2f83668bedd092b43233ad35f0d40bd + +The I2C controller is a subunit of the processor. Make it so and avoid +QEMU crashes. + + $ build/qemu-system-ppc64 -S -machine powernv9 -device pnv-i2c + qemu-system-ppc64: ../hw/ppc/pnv_i2c.c:521: pnv_i2c_realize: Assertion `i2c->chip' failed. + Aborted (core dumped) + +Fixes: 263b81e ("ppc/pnv: Add an I2C controller model") +Cc: Glenn Miles +Reported-by: Thomas Huth +Reviewed-by: Thomas Huth +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Glenn Miles +Signed-off-by: Cédric Le Goater +Signed-off-by: Gao Jiazhen +--- + hw/ppc/pnv_i2c.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/ppc/pnv_i2c.c b/hw/ppc/pnv_i2c.c +index 656a48eebe..0ac6aa5c06 100644 +--- a/hw/ppc/pnv_i2c.c ++++ b/hw/ppc/pnv_i2c.c +@@ -673,6 +673,9 @@ static void pnv_i2c_class_init(ObjectClass *klass, void *data) + + xscomc->dt_xscom = pnv_i2c_dt_xscom; + ++ /* Reason: This device is part of the CPU and cannot be used separately */ ++ dc->user_creatable = false; ++ + dc->desc = "PowerNV I2C"; + dc->realize = pnv_i2c_realize; + device_class_set_props(dc, pnv_i2c_properties); +-- +2.41.0.windows.1 + diff --git a/ppc-vof-Fix-unaligned-FDT-property-access.patch b/ppc-vof-Fix-unaligned-FDT-property-access.patch new file mode 100644 index 0000000000000000000000000000000000000000..9fc20c59170efa393be38277dca238bf2f3d576d --- /dev/null +++ b/ppc-vof-Fix-unaligned-FDT-property-access.patch @@ -0,0 +1,34 @@ +From ad1d68502c41ff6a966ae89ae5ac008050602e2a Mon Sep 17 00:00:00 2001 +From: qihao +Date: Mon, 29 Jul 2024 10:38:46 +0800 +Subject: [PATCH] ppc/vof: Fix unaligned FDT property access + +cheery-pick from 785c8637f9d2362a8addf4ded853d975955a9d6b + +FDT properties are aligned by 4 bytes, not 8 bytes. + +Signed-off-by: Akihiko Odaki +Reviewed-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Nicholas Piggin +Signed-off-by: qihao_yewu +--- + hw/ppc/vof.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c +index e3b430a81f..b5b6514d79 100644 +--- a/hw/ppc/vof.c ++++ b/hw/ppc/vof.c +@@ -646,7 +646,7 @@ static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base) + mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen); + g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc)); + if (sc == 2) { +- mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac)); ++ mem0_end = ldq_be_p(mem0_reg + sizeof(uint32_t) * ac); + } else { + mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac)); + } +-- +2.41.0.windows.1 + diff --git a/ppc-xive-Fix-ESB-length-overflow-on-32-bit-hosts.patch b/ppc-xive-Fix-ESB-length-overflow-on-32-bit-hosts.patch new file mode 100644 index 0000000000000000000000000000000000000000..551df5a67cd78cea38e52b1023929d59a4811884 --- /dev/null +++ b/ppc-xive-Fix-ESB-length-overflow-on-32-bit-hosts.patch @@ -0,0 +1,69 @@ +From c73b18ef8f2dd15934d90f65ba825bef19d11f73 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Thu, 7 Nov 2024 22:07:23 -0500 +Subject: [PATCH] ppc/xive: Fix ESB length overflow on 32-bit hosts + +cheery-pick from 07f2770503e24889720028ddf9ef54788ddf3b6d + +The length of this region can be > 32-bits, which overflows size_t on +32-bit hosts. Change to uint64_t. + +Signed-off-by: Nicholas Piggin +Signed-off-by: qihao_yewu +--- + hw/intc/spapr_xive_kvm.c | 4 ++-- + hw/intc/xive.c | 2 +- + include/hw/ppc/xive.h | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c +index 5789062379..7a86197fc9 100644 +--- a/hw/intc/spapr_xive_kvm.c ++++ b/hw/intc/spapr_xive_kvm.c +@@ -720,7 +720,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, + { + SpaprXive *xive = SPAPR_XIVE(intc); + XiveSource *xsrc = &xive->source; +- size_t esb_len = xive_source_esb_len(xsrc); ++ uint64_t esb_len = xive_source_esb_len(xsrc); + size_t tima_len = 4ull << TM_SHIFT; + CPUState *cs; + int fd; +@@ -824,7 +824,7 @@ void kvmppc_xive_disconnect(SpaprInterruptController *intc) + { + SpaprXive *xive = SPAPR_XIVE(intc); + XiveSource *xsrc; +- size_t esb_len; ++ uint64_t esb_len; + + assert(xive->fd != -1); + +diff --git a/hw/intc/xive.c b/hw/intc/xive.c +index a3585593d8..0cfc172dd4 100644 +--- a/hw/intc/xive.c ++++ b/hw/intc/xive.c +@@ -1238,7 +1238,7 @@ static void xive_source_reset(void *dev) + static void xive_source_realize(DeviceState *dev, Error **errp) + { + XiveSource *xsrc = XIVE_SOURCE(dev); +- size_t esb_len = xive_source_esb_len(xsrc); ++ uint64_t esb_len = xive_source_esb_len(xsrc); + + assert(xsrc->xive); + +diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h +index f120874e0f..00023c0233 100644 +--- a/include/hw/ppc/xive.h ++++ b/include/hw/ppc/xive.h +@@ -218,7 +218,7 @@ static inline bool xive_source_esb_has_2page(XiveSource *xsrc) + xsrc->esb_shift == XIVE_ESB_4K_2PAGE; + } + +-static inline size_t xive_source_esb_len(XiveSource *xsrc) ++static inline uint64_t xive_source_esb_len(XiveSource *xsrc) + { + return (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + } +-- +2.41.0.windows.1 + diff --git a/pr-manager-Fix-invalid-g_free-crash-bug.patch b/pr-manager-Fix-invalid-g_free-crash-bug.patch deleted file mode 100644 index b171cdb5ae34dab7135926c2250541814d543a02..0000000000000000000000000000000000000000 --- a/pr-manager-Fix-invalid-g_free-crash-bug.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 57fdf4a13ff16d9d48a43f02a5e7b42e3d264f83 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 22 Aug 2019 15:38:46 +0200 -Subject: [PATCH] pr-manager: Fix invalid g_free() crash bug -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -pr_manager_worker() passes its @opaque argument to g_free(). Wrong; -it points to pr_manager_worker()'s automatic @data. Broken when -commit 2f3a7ab39be converted @data from heap- to stack-allocated. Fix -by deleting the g_free(). - -Fixes: 2f3a7ab39bec4ba8022dc4d42ea641165b004e3e -Cc: qemu-stable@nongnu.org -Signed-off-by: Markus Armbruster -Reviewed-by: Philippe Mathieu-Daudé -Acked-by: Paolo Bonzini -Signed-off-by: Kevin Wolf -(cherry picked from commit 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3) -Signed-off-by: Michael Roth ---- - scsi/pr-manager.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c -index ee43663576..0c866e8698 100644 ---- a/scsi/pr-manager.c -+++ b/scsi/pr-manager.c -@@ -39,7 +39,6 @@ static int pr_manager_worker(void *opaque) - int fd = data->fd; - int r; - -- g_free(data); - trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]); - - /* The reference was taken in pr_manager_execute. */ --- -2.23.0 diff --git a/prep-add-ppc-parity-write-method.patch b/prep-add-ppc-parity-write-method.patch deleted file mode 100644 index fbc3dcc8cf024a5f26ce66f73ff653b5c74837dd..0000000000000000000000000000000000000000 --- a/prep-add-ppc-parity-write-method.patch +++ /dev/null @@ -1,50 +0,0 @@ -From f4eed258b1b8b434927fbc9a18bbcb52d3f55ce6 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:16:14 +0800 -Subject: [PATCH] prep: add ppc-parity write method - -fix CVE-2020-15469 - -Add ppc-parity mmio write method to avoid NULL pointer dereference -issue. - -Reported-by: Lei Sun -Acked-by: David Gibson -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/ppc/prep_systemio.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/ppc/prep_systemio.c b/hw/ppc/prep_systemio.c -index df7603b986..67244ed48c 100644 ---- a/hw/ppc/prep_systemio.c -+++ b/hw/ppc/prep_systemio.c -@@ -23,6 +23,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/log.h" - #include "hw/isa/isa.h" - #include "exec/address-spaces.h" - #include "qemu/error-report.h" /* for error_report() */ -@@ -232,8 +233,15 @@ static uint64_t ppc_parity_error_readl(void *opaque, hwaddr addr, - return val; - } - -+static void ppc_parity_error_writel(void *opaque, hwaddr addr, -+ uint64_t data, unsigned size) -+{ -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write access\n", __func__); -+} -+ - static const MemoryRegionOps ppc_parity_error_ops = { - .read = ppc_parity_error_readl, -+ .write = ppc_parity_error_writel, - .valid = { - .min_access_size = 4, - .max_access_size = 4, --- -2.27.0 - diff --git a/ps2-fix-oob-in-ps2-kbd.patch b/ps2-fix-oob-in-ps2-kbd.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d0f4dea78e884ab9f376659daca9b6717a01661 --- /dev/null +++ b/ps2-fix-oob-in-ps2-kbd.patch @@ -0,0 +1,35 @@ +From 0a54d68547df3f276dc242b52d54e8549d0a84a0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 11:21:28 +0800 +Subject: [PATCH] ps2: fix oob in ps2 kbd + +fix oob in ps2 kbd +--- + hw/input/ps2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/input/ps2.c b/hw/input/ps2.c +index c8fd23cf36..b647561069 100644 +--- a/hw/input/ps2.c ++++ b/hw/input/ps2.c +@@ -167,7 +167,7 @@ void ps2_queue_noirq(PS2State *s, int b) + } + + q->data[q->wptr] = b; +- if (++q->wptr == PS2_BUFFER_SIZE) { ++ if (++q->wptr >= PS2_BUFFER_SIZE) { + q->wptr = 0; + } + q->count++; +@@ -557,7 +557,7 @@ uint32_t ps2_read_data(PS2State *s) + val = q->data[index]; + } else { + val = q->data[q->rptr]; +- if (++q->rptr == PS2_BUFFER_SIZE) { ++ if (++q->rptr >= PS2_BUFFER_SIZE) { + q->rptr = 0; + } + q->count--; +-- +2.27.0 + diff --git a/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch b/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch deleted file mode 100644 index 3ee078c19f392be5b53214f0f03dcea3ecc216fa..0000000000000000000000000000000000000000 --- a/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch +++ /dev/null @@ -1,214 +0,0 @@ -From f97eaa27e2fb6b985f090af9acaa780bb6a2ee5b Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:27 +0530 -Subject: [PATCH] qapi: Add VFIO devices migration stats in Migration stats - -Added amount of bytes transferred to the VM at destination by all VFIO -devices - -Signed-off-by: Kirti Wankhede -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Alex Williamson ---- - hw/vfio/common.c | 19 +++++++++++++++++++ - hw/vfio/migration.c | 9 +++++++++ - include/hw/vfio/vfio-common.h | 3 +++ - migration/migration.c | 17 +++++++++++++++++ - monitor/hmp-cmds.c | 6 ++++++ - qapi/migration.json | 17 +++++++++++++++++ - 6 files changed, 71 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 4ce1c10734..a86a4c4506 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -291,6 +291,25 @@ const MemoryRegionOps vfio_region_ops = { - * Device state interfaces - */ - -+bool vfio_mig_active(void) -+{ -+ VFIOGroup *group; -+ VFIODevice *vbasedev; -+ -+ if (QLIST_EMPTY(&vfio_group_list)) { -+ return false; -+ } -+ -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ QLIST_FOREACH(vbasedev, &group->device_list, next) { -+ if (vbasedev->migration_blocker) { -+ return false; -+ } -+ } -+ } -+ return true; -+} -+ - static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) - { - VFIOGroup *group; -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 0bdf6a1820..b77c66557e 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -45,6 +45,8 @@ - #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) - #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) - -+static int64_t bytes_transferred; -+ - static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, - off_t off, bool iswrite) - { -@@ -255,6 +257,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) - *size = data_size; - } - -+ bytes_transferred += data_size; - return ret; - } - -@@ -785,6 +788,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) - case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_CANCELLED: - case MIGRATION_STATUS_FAILED: -+ bytes_transferred = 0; - ret = vfio_migration_set_state(vbasedev, - ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), - VFIO_DEVICE_STATE_RUNNING); -@@ -866,6 +870,11 @@ err: - - /* ---------------------------------------------------------------------- */ - -+int64_t vfio_mig_bytes_transferred(void) -+{ -+ return bytes_transferred; -+} -+ - int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) - { - VFIOContainer *container = vbasedev->group->container; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 8fd0212264..048731e81f 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -203,6 +203,9 @@ extern const MemoryRegionOps vfio_region_ops; - typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - extern VFIOGroupList vfio_group_list; - -+bool vfio_mig_active(void); -+int64_t vfio_mig_bytes_transferred(void); -+ - #ifdef CONFIG_LINUX - int vfio_get_region_info(VFIODevice *vbasedev, int index, - struct vfio_region_info **info); -diff --git a/migration/migration.c b/migration/migration.c -index b0b9430822..9faf5f63a6 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -49,6 +49,10 @@ - #include "monitor/monitor.h" - #include "net/announce.h" - -+#ifdef CONFIG_VFIO -+#include "hw/vfio/vfio-common.h" -+#endif -+ - #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ - - /* Amount of time to allocate to each "chunk" of bandwidth-throttled -@@ -908,6 +912,17 @@ static void populate_disk_info(MigrationInfo *info) - } - } - -+static void populate_vfio_info(MigrationInfo *info) -+{ -+#ifdef CONFIG_VFIO -+ if (vfio_mig_active()) { -+ info->has_vfio = true; -+ info->vfio = g_malloc0(sizeof(*info->vfio)); -+ info->vfio->transferred = vfio_mig_bytes_transferred(); -+ } -+#endif -+} -+ - static void fill_source_migration_info(MigrationInfo *info) - { - MigrationState *s = migrate_get_current(); -@@ -941,6 +956,7 @@ static void fill_source_migration_info(MigrationInfo *info) - - populate_ram_info(info, s); - populate_disk_info(info); -+ populate_vfio_info(info); - break; - case MIGRATION_STATUS_COLO: - info->has_status = true; -@@ -956,6 +972,7 @@ static void fill_source_migration_info(MigrationInfo *info) - info->setup_time = s->setup_time; - - populate_ram_info(info, s); -+ populate_vfio_info(info); - break; - case MIGRATION_STATUS_FAILED: - info->has_status = true; -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index e5a7a88ba2..cecaae0a47 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -370,6 +370,12 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) - } - monitor_printf(mon, "]\n"); - } -+ -+ if (info->has_vfio) { -+ monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n", -+ info->vfio->transferred >> 10); -+ } -+ - qapi_free_MigrationInfo(info); - qapi_free_MigrationCapabilityStatusList(caps); - } -diff --git a/qapi/migration.json b/qapi/migration.json -index 587ef65872..1f0eb19ac6 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -141,6 +141,18 @@ - 'active', 'postcopy-active', 'postcopy-paused', - 'postcopy-recover', 'completed', 'failed', 'colo', - 'pre-switchover', 'device' ] } -+## -+# @VfioStats: -+# -+# Detailed VFIO devices migration statistics -+# -+# @transferred: amount of bytes transferred to the target VM by VFIO devices -+# -+# Since: 5.2 -+# -+## -+{ 'struct': 'VfioStats', -+ 'data': {'transferred': 'int' } } - - ## - # @MigrationInfo: -@@ -202,11 +214,16 @@ - # - # @socket-address: Only used for tcp, to know what the real port is (Since 4.0) - # -+# @vfio: @VfioStats containing detailed VFIO devices migration statistics, -+# only returned if VFIO device is present, migration is supported by all -+# VFIO devices and status is 'active' or 'completed' (since 5.2) -+# - # Since: 0.14.0 - ## - { 'struct': 'MigrationInfo', - 'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats', - '*disk': 'MigrationStats', -+ '*vfio': 'VfioStats', - '*xbzrle-cache': 'XBZRLECacheStats', - '*total-time': 'int', - '*expected-downtime': 'int', --- -2.27.0 - diff --git a/qapi-add-BitmapSyncMode-enum.patch b/qapi-add-BitmapSyncMode-enum.patch deleted file mode 100644 index 778faeee0dc98c233e2415190a8941ed6bd137db..0000000000000000000000000000000000000000 --- a/qapi-add-BitmapSyncMode-enum.patch +++ /dev/null @@ -1,54 +0,0 @@ -From bd1d5d79f4629520d0753676cea8129c60fc6bbc Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:52 -0400 -Subject: [PATCH] qapi: add BitmapSyncMode enum - -Depending on what a user is trying to accomplish, there might be a few -bitmap cleanup actions that occur when an operation is finished that -could be useful. - -I am proposing three: -- NEVER: The bitmap is never synchronized against what was copied. -- ALWAYS: The bitmap is always synchronized, even on failures. -- ON-SUCCESS: The bitmap is synchronized only on success. - -The existing incremental backup modes use 'on-success' semantics, -so add just that one for right now. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Reviewed-by: Markus Armbruster -Message-id: 20190709232550.10724-5-jsnow@redhat.com -Signed-off-by: John Snow ---- - qapi/block-core.json | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 37aa1b7b9a..b8d12a4951 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1134,6 +1134,20 @@ - { 'enum': 'MirrorSyncMode', - 'data': ['top', 'full', 'none', 'incremental'] } - -+## -+# @BitmapSyncMode: -+# -+# An enumeration of possible behaviors for the synchronization of a bitmap -+# when used for data copy operations. -+# -+# @on-success: The bitmap is only synced when the operation is successful. -+# This is the behavior always used for 'INCREMENTAL' backups. -+# -+# Since: 4.2 -+## -+{ 'enum': 'BitmapSyncMode', -+ 'data': ['on-success'] } -+ - ## - # @MirrorCopyMode: - # --- -2.27.0 - diff --git a/qapi-block-core-Add-retry-option-for-error-action.patch b/qapi-block-core-Add-retry-option-for-error-action.patch index 817ff7051db6ab8b9a36d8d53427495c628a2a89..43154aecf644f1c2e98b9cbe795ab335488d3724 100644 --- a/qapi-block-core-Add-retry-option-for-error-action.patch +++ b/qapi-block-core-Add-retry-option-for-error-action.patch @@ -1,5 +1,5 @@ -From 9a95d75bdd469c9c7d44c7c72bc16d57ef2f65cc Mon Sep 17 00:00:00 2001 -From: Jiahui Cen +From cfc15dc456126a6fb811f0c51af8d8ce5c4a4a1b Mon Sep 17 00:00:00 2001 +From: yexiao Date: Thu, 21 Jan 2021 15:46:45 +0800 Subject: [PATCH] qapi/block-core: Add retry option for error action @@ -7,16 +7,17 @@ Add a new error action 'retry' to support retry on errors. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- blockdev.c | 2 ++ - qapi/block-core.json | 4 ++-- - 2 files changed, 4 insertions(+), 2 deletions(-) + qapi/block-core.json | 8 ++++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/blockdev.c b/blockdev.c -index 4d141e9a1f..0f49fd290e 100644 +index c91f49e7b6..2817f73fad 100644 --- a/blockdev.c +++ b/blockdev.c -@@ -319,6 +319,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) +@@ -326,6 +326,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) return BLOCKDEV_ON_ERROR_STOP; } else if (!strcmp(buf, "report")) { return BLOCKDEV_ON_ERROR_REPORT; @@ -26,10 +27,15 @@ index 4d141e9a1f..0f49fd290e 100644 error_setg(errp, "'%s' invalid %s error action", buf, is_read ? "read" : "write"); diff --git a/qapi/block-core.json b/qapi/block-core.json -index 0d43d4f37c..db24f0dfe5 100644 +index 1444624590..ded6f0f6d2 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json -@@ -1113,7 +1113,7 @@ +@@ -1286,10 +1286,12 @@ + # + # @auto: inherit the error handling policy of the backend (since: 2.7) + # ++# @retry: retrying IO with errors ++# # Since: 1.3 ## { 'enum': 'BlockdevOnError', @@ -38,15 +44,20 @@ index 0d43d4f37c..db24f0dfe5 100644 ## # @MirrorSyncMode: -@@ -4894,7 +4894,7 @@ +@@ -5480,10 +5482,12 @@ + # + # @stop: error caused VM to be stopped + # ++# @retry: retry IO with errors ++# # Since: 2.1 ## { 'enum': 'BlockErrorAction', - 'data': [ 'ignore', 'report', 'stop' ] } + 'data': [ 'ignore', 'report', 'stop', 'retry' ] } - ## + # @BLOCK_IMAGE_CORRUPTED: -- 2.27.0 diff --git a/qapi-block-core-Introduce-BackupCommon.patch b/qapi-block-core-Introduce-BackupCommon.patch deleted file mode 100644 index 2d160748c79371b993fd4f0c82a48b535d7b29cd..0000000000000000000000000000000000000000 --- a/qapi-block-core-Introduce-BackupCommon.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 2204b4839fb90658e13ddc608df7b35ed1ea9fd0 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Mon, 29 Jul 2019 16:35:52 -0400 -Subject: [PATCH] qapi/block-core: Introduce BackupCommon - -drive-backup and blockdev-backup have an awful lot of things in common -that are the same. Let's fix that. - -I don't deduplicate 'target', because the semantics actually did change -between each structure. Leave that one alone so it can be documented -separately. - -Where documentation was not identical, use the most up-to-date version. -For "speed", use Blockdev-Backup's version. For "sync", use -Drive-Backup's version. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -[Maintainer edit: modified commit message. --js] -Reviewed-by: Markus Armbruster -Message-id: 20190709232550.10724-2-jsnow@redhat.com -Signed-off-by: John Snow ---- - qapi/block-core.json | 95 ++++++++++++++------------------------------ - 1 file changed, 29 insertions(+), 66 deletions(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index db24f0dfe5..37aa1b7b9a 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1315,32 +1315,23 @@ - 'data': { 'node': 'str', 'overlay': 'str' } } - - ## --# @DriveBackup: -+# @BackupCommon: - # - # @job-id: identifier for the newly-created block job. If - # omitted, the device name will be used. (Since 2.7) - # - # @device: the device name or node-name of a root node which should be copied. - # --# @target: the target of the new image. If the file exists, or if it --# is a device, the existing file/device will be used as the new --# destination. If it does not exist, a new file will be created. --# --# @format: the format of the new destination, default is to --# probe if @mode is 'existing', else the format of the source --# - # @sync: what parts of the disk image should be copied to the destination - # (all the disk, only the sectors allocated in the topmost image, from a - # dirty bitmap, or only new I/O). - # --# @mode: whether and how QEMU should create a new image, default is --# 'absolute-paths'. --# --# @speed: the maximum speed, in bytes per second -+# @speed: the maximum speed, in bytes per second. The default is 0, -+# for unlimited. - # - # @bitmap: the name of dirty bitmap if sync is "incremental". - # Must be present if sync is "incremental", must NOT be present --# otherwise. (Since 2.4) -+# otherwise. (Since 2.4 (drive-backup), 3.1 (blockdev-backup)) - # - # @compress: true to compress data, if the target format supports it. - # (default: false) (since 2.8) -@@ -1370,75 +1361,47 @@ - # I/O. If an error occurs during a guest write request, the device's - # rerror/werror actions will be used. - # --# Since: 1.6 -+# Since: 4.2 - ## --{ 'struct': 'DriveBackup', -- 'data': { '*job-id': 'str', 'device': 'str', 'target': 'str', -- '*format': 'str', 'sync': 'MirrorSyncMode', -- '*mode': 'NewImageMode', '*speed': 'int', -+{ 'struct': 'BackupCommon', -+ 'data': { '*job-id': 'str', 'device': 'str', -+ 'sync': 'MirrorSyncMode', '*speed': 'int', - '*bitmap': 'str', '*compress': 'bool', - '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError', - '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } - - ## --# @BlockdevBackup: --# --# @job-id: identifier for the newly-created block job. If --# omitted, the device name will be used. (Since 2.7) --# --# @device: the device name or node-name of a root node which should be copied. --# --# @target: the device name or node-name of the backup target node. --# --# @sync: what parts of the disk image should be copied to the destination --# (all the disk, only the sectors allocated in the topmost image, or --# only new I/O). --# --# @speed: the maximum speed, in bytes per second. The default is 0, --# for unlimited. --# --# @bitmap: the name of dirty bitmap if sync is "incremental". --# Must be present if sync is "incremental", must NOT be present --# otherwise. (Since 3.1) --# --# @compress: true to compress data, if the target format supports it. --# (default: false) (since 2.8) -+# @DriveBackup: - # --# @on-source-error: the action to take on an error on the source, --# default 'report'. 'stop' and 'enospc' can only be used --# if the block device supports io-status (see BlockInfo). -+# @target: the target of the new image. If the file exists, or if it -+# is a device, the existing file/device will be used as the new -+# destination. If it does not exist, a new file will be created. - # --# @on-target-error: the action to take on an error on the target, --# default 'report' (no limitations, since this applies to --# a different block device than @device). -+# @format: the format of the new destination, default is to -+# probe if @mode is 'existing', else the format of the source - # --# @auto-finalize: When false, this job will wait in a PENDING state after it has --# finished its work, waiting for @block-job-finalize before --# making any block graph changes. --# When true, this job will automatically --# perform its abort or commit actions. --# Defaults to true. (Since 2.12) -+# @mode: whether and how QEMU should create a new image, default is -+# 'absolute-paths'. - # --# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it --# has completely ceased all work, and awaits @block-job-dismiss. --# When true, this job will automatically disappear from the query --# list without user intervention. --# Defaults to true. (Since 2.12) -+# Since: 1.6 -+## -+{ 'struct': 'DriveBackup', -+ 'base': 'BackupCommon', -+ 'data': { 'target': 'str', -+ '*format': 'str', -+ '*mode': 'NewImageMode' } } -+ -+## -+# @BlockdevBackup: - # --# Note: @on-source-error and @on-target-error only affect background --# I/O. If an error occurs during a guest write request, the device's --# rerror/werror actions will be used. -+# @target: the device name or node-name of the backup target node. - # - # Since: 2.3 - ## - { 'struct': 'BlockdevBackup', -- 'data': { '*job-id': 'str', 'device': 'str', 'target': 'str', -- 'sync': 'MirrorSyncMode', '*speed': 'int', -- '*bitmap': 'str', '*compress': 'bool', -- '*on-source-error': 'BlockdevOnError', -- '*on-target-error': 'BlockdevOnError', -- '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } -+ 'base': 'BackupCommon', -+ 'data': { 'target': 'str' } } - - ## - # @blockdev-snapshot-sync: --- -2.27.0 - diff --git a/qapi-misc-target-Add-KVM-option-to-isolate-virtcca-d.patch b/qapi-misc-target-Add-KVM-option-to-isolate-virtcca-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..9086c35673be55e5b64ea91a6f164422ffc02041 --- /dev/null +++ b/qapi-misc-target-Add-KVM-option-to-isolate-virtcca-d.patch @@ -0,0 +1,40 @@ +From 555841f0b5d38681d5bec899cba9fc67d92d2a3a Mon Sep 17 00:00:00 2001 +From: panhengchang +Date: Mon, 23 Jun 2025 18:48:09 +0800 +Subject: [PATCH 1/2] qapi/misc-target: Add KVM option to isolate virtcca + detection interface. + +Add 'CONFIG_KVM' to isolate "VirtccaCapability" and +"query-virtcca-capabilities". + +Signed-off-by: panghengchang +--- + qapi/misc-target.json | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 76ed52b..3df0062 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -497,7 +497,8 @@ + ## + { 'struct': 'VirtccaCapability', + 'data': { 'enabled': 'bool' }, +- 'if': 'TARGET_AARCH64' } ++ 'if': { 'all': ['TARGET_AARCH64' , 'CONFIG_KVM'] } ++} + + ## + # @query-virtcca-capabilities: +@@ -515,4 +516,5 @@ + # <- { "return": { "enabled": true } } + ## + { 'command': 'query-virtcca-capabilities', 'returns': 'VirtccaCapability', +- 'if': 'TARGET_AARCH64' } +\ No newline at end of file ++ 'if': { 'all': ['TARGET_AARCH64' , 'CONFIG_KVM'] } ++} +\ No newline at end of file +-- +2.28.0.windows.1 + diff --git a/qapi-misc-target-Add-Virtcca-capability-struct-and-q.patch b/qapi-misc-target-Add-Virtcca-capability-struct-and-q.patch new file mode 100644 index 0000000000000000000000000000000000000000..248f3ced2bf545ea8a8edb42a17649d4e4d8e2d4 --- /dev/null +++ b/qapi-misc-target-Add-Virtcca-capability-struct-and-q.patch @@ -0,0 +1,132 @@ +From a06fe21504564a75d2cfdd3b133b67719edc78ec Mon Sep 17 00:00:00 2001 +From: panhengchang +Date: Thu, 5 Jun 2025 10:05:11 +0800 +Subject: [PATCH] qapi/misc-target: Add Virtcca capability struct and query + command. + +Introduce a new QAPI struct "VirtccaCapility" to represent the +VIRTCCA feature capability with a boolean "enabled" filed. +Add "query-virtcca-capabilties" command to retrieve this capability +information, which targeting HISI AARCH64 platforms. + +Signed-off-by: panghengchang +--- + qapi/misc-target.json | 29 +++++++++++++++++++++++++++++ + target/arm/kvm-tmm.c | 33 +++++++++++++++++++++++++++++++++ + tests/qtest/qmp-cmd-test.c | 1 + + 3 files changed, 63 insertions(+) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 8829145..76ed52b 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -487,3 +487,32 @@ + { 'command': 'xen-event-inject', + 'data': { 'port': 'uint32' }, + 'if': 'TARGET_I386' } ++ ++## ++# @VirtccaCapability: ++# ++# The struct describes capability for VIRTCCA feature. ++# ++# Since: 8.2.0 ++## ++{ 'struct': 'VirtccaCapability', ++ 'data': { 'enabled': 'bool' }, ++ 'if': 'TARGET_AARCH64' } ++ ++## ++# @query-virtcca-capabilities: ++# ++# This command is used to get the VIRTCCA capabilities, and is supported ++# on HISI AARCH64 platforms only. ++# ++# Returns: VirtccaCapability objects. ++# ++# Since: 8.2.0 ++# ++# Example: ++# ++# -> { "execute": "query-virtcca-capabilities" } ++# <- { "return": { "enabled": true } } ++## ++{ 'command': 'query-virtcca-capabilities', 'returns': 'VirtccaCapability', ++ 'if': 'TARGET_AARCH64' } +\ No newline at end of file +diff --git a/target/arm/kvm-tmm.c b/target/arm/kvm-tmm.c +index ea6bcc0..d18ac10 100644 +--- a/target/arm/kvm-tmm.c ++++ b/target/arm/kvm-tmm.c +@@ -15,11 +15,13 @@ + #include "kvm_arm.h" + #include "migration/blocker.h" + #include "qapi/error.h" ++#include "qapi/qapi-commands-misc-target.h" + #include "qom/object_interfaces.h" + #include "sysemu/kvm.h" + #include "sysemu/runstate.h" + #include "hw/loader.h" + #include "linux-headers/asm-arm64/kvm.h" ++#include + + #define TYPE_TMM_GUEST "tmm-guest" + OBJECT_DECLARE_SIMPLE_TYPE(TmmGuest, TMM_GUEST) +@@ -27,6 +29,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(TmmGuest, TMM_GUEST) + #define TMM_PAGE_SIZE qemu_real_host_page_size() + #define TMM_MAX_PMU_CTRS 0x20 + #define TMM_MAX_CFG 6 ++#define TMM_MEMORY_INFO_SYSFS "/sys/kernel/tmm/memory_info" + + typedef struct { + uint32_t kae_vf_num; +@@ -406,3 +409,33 @@ static void tmm_register_types(void) + type_register_static(&tmm_guest_info); + } + type_init(tmm_register_types); ++ ++static VirtccaCapability *virtcca_get_capabilities(Error **errp) ++{ ++ VirtccaCapability *cap = NULL; ++ uint64_t tmi_version = 0; ++ int rc = 0; ++ ++ if (kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version) < 0) { ++ error_setg(errp, "VIRTCCA is not enabled in KVM"); ++ return NULL; ++ } ++ ++ rc = access(TMM_MEMORY_INFO_SYSFS, R_OK); ++ if (rc < 0) { ++ error_setg_errno(errp, errno, "VIRTCCA: Failed to read %s", ++ TMM_MEMORY_INFO_SYSFS); ++ return NULL; ++ } ++ ++ cap = g_new0(VirtccaCapability, 1); ++ ++ cap->enabled = true; ++ ++ return cap; ++} ++ ++VirtccaCapability *qmp_query_virtcca_capabilities(Error **errp) ++{ ++ return virtcca_get_capabilities(errp); ++} +\ No newline at end of file +diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c +index 2c15f60..df1f93e 100644 +--- a/tests/qtest/qmp-cmd-test.c ++++ b/tests/qtest/qmp-cmd-test.c +@@ -110,6 +110,7 @@ static bool query_is_ignored(const char *cmd) + "query-sev-capabilities", + "query-sgx", + "query-sgx-capabilities", ++ "query-virtcca-capabilities", + /* Success depends on enabling dirty page rate limit */ + "query-vcpu-dirty-limit", + NULL +-- +2.28.0.windows.1 + diff --git a/qapi-qom-target-i386-csv-guest-Introduce-secret-head.patch b/qapi-qom-target-i386-csv-guest-Introduce-secret-head.patch new file mode 100644 index 0000000000000000000000000000000000000000..8cfc34d7a4fdce68dd17919c63fc394934c0f4cc --- /dev/null +++ b/qapi-qom-target-i386-csv-guest-Introduce-secret-head.patch @@ -0,0 +1,220 @@ +From 10f5fa07068f54b23b01bf875259dc1a259d66b4 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Fri, 2 Aug 2024 01:35:25 +0800 +Subject: [PATCH] qapi/qom,target/i386: csv-guest: Introduce + secret-header-file=str and secret-file=str options + +This feature only applied to Hygon CSV. + +User can utilize the hag to generate secret header file and secret file, +and inject these data to guest encrypted secret area automatically. + +Signed-off-by: hanliyang +--- + qapi/qom.json | 9 ++++- + qemu-options.hx | 8 +++- + target/i386/sev.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 115 insertions(+), 2 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 51d9daf55a..a74c7a91f9 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -869,6 +869,11 @@ + # @user-id: the user id of the guest owner, only support on Hygon CPUs + # (since 8.2) + # ++# @secret-header-file: the header file of guest owner's secret, only ++# support on Hygon CPUs (since 8.2) ++# @secret-file: the file guest owner's secret, only support on Hygon ++# CPUs (since 8.2) ++# + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +@@ -880,7 +885,9 @@ + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', + '*kernel-hashes': 'bool', +- '*user-id': 'str' } } ++ '*user-id': 'str', ++ '*secret-header-file': 'str', ++ '*secret-file': 'str' } } + + ## + # @ThreadContextProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 51ba9378b9..8516b73206 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5637,7 +5637,7 @@ SRST + -object secret,id=sec0,keyid=secmaster0,format=base64,\\ + data=$SECRET,iv=$(user_id = g_strdup(value); + } + ++static char * ++sev_guest_get_secret_header_file(Object *obj, Error **errp) ++{ ++ SevGuestState *s = SEV_GUEST(obj); ++ ++ return g_strdup(s->secret_header_file); ++} ++ ++static void ++sev_guest_set_secret_header_file(Object *obj, const char *value, Error **errp) ++{ ++ SevGuestState *s = SEV_GUEST(obj); ++ ++ s->secret_header_file = g_strdup(value); ++} ++ ++static char * ++sev_guest_get_secret_file(Object *obj, Error **errp) ++{ ++ SevGuestState *s = SEV_GUEST(obj); ++ ++ return g_strdup(s->secret_file); ++} ++ ++static void ++sev_guest_set_secret_file(Object *obj, const char *value, Error **errp) ++{ ++ SevGuestState *s = SEV_GUEST(obj); ++ ++ s->secret_file = g_strdup(value); ++} ++ + static char * + sev_guest_get_sev_device(Object *obj, Error **errp) + { +@@ -448,6 +482,16 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_user_id); + object_class_property_set_description(oc, "user-id", + "user id of the guest owner"); ++ object_class_property_add_str(oc, "secret-header-file", ++ sev_guest_get_secret_header_file, ++ sev_guest_set_secret_header_file); ++ object_class_property_set_description(oc, "secret-header-file", ++ "header file of the guest owner's secret"); ++ object_class_property_add_str(oc, "secret-file", ++ sev_guest_get_secret_file, ++ sev_guest_set_secret_file); ++ object_class_property_set_description(oc, "secret-file", ++ "file of the guest owner's secret"); + } + + static void +@@ -867,6 +911,9 @@ sev_launch_update_vmsa(SevGuestState *sev) + return ret; + } + ++static int ++csv_load_launch_secret(const char *secret_header_file, const char *secret_file); ++ + static void + sev_launch_get_measure(Notifier *notifier, void *unused) + { +@@ -917,6 +964,15 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + /* encode the measurement value and emit the event */ + sev->measurement = g_base64_encode(data, measurement.len); + trace_kvm_sev_launch_measurement(sev->measurement); ++ ++ /* Hygon CSV will auto load guest owner's secret */ ++ if (is_hygon_cpu()) { ++ if (sev->secret_header_file && ++ strlen(sev->secret_header_file) && ++ sev->secret_file && ++ strlen(sev->secret_file)) ++ csv_load_launch_secret(sev->secret_header_file, sev->secret_file); ++ } + } + + static char *sev_get_launch_measurement(void) +@@ -2526,6 +2582,50 @@ int csv_load_incoming_cpu_state(QEMUFile *f) + return ret; + } + ++static int ++csv_load_launch_secret(const char *secret_header_file, const char *secret_file) ++{ ++ gsize secret_header_size, secret_size; ++ gchar *secret_header = NULL, *secret = NULL; ++ uint8_t *data; ++ struct sev_secret_area *area; ++ uint64_t gpa; ++ GError *error = NULL; ++ Error *local_err = NULL; ++ int ret = 0; ++ ++ if (!g_file_get_contents(secret_header_file, ++ &secret_header, ++ &secret_header_size, &error)) { ++ error_report("CSV: Failed to read '%s' (%s)", ++ secret_header_file, error->message); ++ g_error_free(error); ++ return -1; ++ } ++ ++ if (!g_file_get_contents(secret_file, &secret, &secret_size, &error)) { ++ error_report("CSV: Failed to read '%s' (%s)", secret_file, error->message); ++ g_error_free(error); ++ return -1; ++ } ++ ++ if (!pc_system_ovmf_table_find(SEV_SECRET_GUID, &data, NULL)) { ++ error_report("CSV: no secret area found in OVMF, gpa must be" ++ " specified."); ++ return -1; ++ } ++ area = (struct sev_secret_area *)data; ++ gpa = area->base; ++ ++ ret = sev_inject_launch_secret((char *)secret_header, ++ (char *)secret, gpa, &local_err); ++ ++ if (local_err) { ++ error_report_err(local_err); ++ } ++ return ret; ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +-- +2.41.0.windows.1 + diff --git a/qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO-CVE-202.patch b/qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO-CVE-202.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f3788efd825c56386fa155819203064a45b6e31 --- /dev/null +++ b/qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO-CVE-202.patch @@ -0,0 +1,108 @@ +From 1163031f9e9662c0882c986e5e76d20a7cd9d579 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 11 Apr 2024 15:06:01 +0200 +Subject: [PATCH] qcow2: Don't open data_file with BDRV_O_NO_IO (CVE-2024-4467) + +One use case for 'qemu-img info' is verifying that untrusted images +don't reference an unwanted external file, be it as a backing file or an +external data file. To make sure that calling 'qemu-img info' can't +already have undesired side effects with a malicious image, just don't +open the data file at all with BDRV_O_NO_IO. If nothing ever tries to do +I/O, we don't need to have it open. + +This changes the output of iotests case 061, which used 'qemu-img info' +to show that opening an image with an invalid data file fails. After +this patch, it succeeds. Replace this part of the test with a qemu-io +call, but keep the final 'qemu-img info' to show that the invalid data +file is correctly displayed in the output. + +Fixes: CVE-2024-4467 +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +--- + block/qcow2.c | 17 ++++++++++++++++- + tests/qemu-iotests/061 | 6 ++++-- + tests/qemu-iotests/061.out | 8 ++++++-- + 3 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 13e032bd5e..7af7c0bee4 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1636,7 +1636,22 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + goto fail; + } + +- if (open_data_file) { ++ if (open_data_file && (flags & BDRV_O_NO_IO)) { ++ /* ++ * Don't open the data file for 'qemu-img info' so that it can be used ++ * to verify that an untrusted qcow2 image doesn't refer to external ++ * files. ++ * ++ * Note: This still makes has_data_file() return true. ++ */ ++ if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { ++ s->data_file = NULL; ++ } else { ++ s->data_file = bs->file; ++ } ++ qdict_extract_subqdict(options, NULL, "data-file."); ++ qdict_del(options, "data-file"); ++ } else if (open_data_file) { + /* Open external data file */ + bdrv_graph_co_rdunlock(); + s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs, +diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 +index 53c7d428e3..b71ac097d1 100755 +--- a/tests/qemu-iotests/061 ++++ b/tests/qemu-iotests/061 +@@ -326,12 +326,14 @@ $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" + echo + _make_test_img -o "compat=1.1,data_file=$TEST_IMG.data" 64M + $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo + $QEMU_IMG amend -o "data_file=" --image-opts "data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo +diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out +index 139fc68177..24c33add7c 100644 +--- a/tests/qemu-iotests/061.out ++++ b/tests/qemu-iotests/061.out +@@ -545,7 +545,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + qemu-img: data-file can only be set for images that use an external data file + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'foo': No such file or directory ++qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 'foo': No such file or directory ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +@@ -560,7 +562,9 @@ Format specific information: + corrupt: false + extended l2: false + +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': 'data-file' is required for this image ++qemu-io: can't open device TEST_DIR/t.IMGFMT: 'data-file' is required for this image ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +-- +2.41.0.windows.1 + diff --git a/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch b/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch deleted file mode 100644 index f2a4e5c26f50c820eb9122e8a8449b76713a8db7..0000000000000000000000000000000000000000 --- a/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 405deba14f6b61b9c557484b46e863308c8cf373 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 28 Oct 2019 17:18:40 +0100 -Subject: [PATCH] qcow2: Fix QCOW2_COMPRESSED_SECTOR_MASK - -Masks for L2 table entries should have 64 bit. - -Fixes: b6c246942b14d3e0dec46a6c5868ed84e7dbea19 -Buglink: https://bugs.launchpad.net/qemu/+bug/1850000 -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20191028161841.1198-2-mreitz@redhat.com -Reviewed-by: Alberto Garcia -Signed-off-by: Max Reitz -(cherry picked from commit 24552feb6ae2f615b76c2b95394af43901f75046) -Signed-off-by: Michael Roth ---- - block/qcow2.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/qcow2.h b/block/qcow2.h -index fc1b0d3c1e..359197f89f 100644 ---- a/block/qcow2.h -+++ b/block/qcow2.h -@@ -77,7 +77,7 @@ - - /* Defined in the qcow2 spec (compressed cluster descriptor) */ - #define QCOW2_COMPRESSED_SECTOR_SIZE 512U --#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1)) -+#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL)) - - /* Must be at least 2 to cover COW */ - #define MIN_L2_CACHE_SIZE 2 /* cache entries */ --- -2.23.0 diff --git a/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch b/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch deleted file mode 100644 index b4c25806d7f7b99408a0419987c22c2175f4fee3..0000000000000000000000000000000000000000 --- a/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 416a692e51b8b582407e30046ddcffbbe52ecf77 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 24 Oct 2019 16:26:58 +0200 -Subject: [PATCH] qcow2: Fix corruption bug in - qcow2_detect_metadata_preallocation() - -qcow2_detect_metadata_preallocation() calls qcow2_get_refcount() which -requires s->lock to be taken to protect its accesses to the refcount -table and refcount blocks. However, nothing in this code path actually -took the lock. This could cause the same cache entry to be used by two -requests at the same time, for different tables at different offsets, -resulting in image corruption. - -As it would be preferable to base the detection on consistent data (even -though it's just heuristics), let's take the lock not only around the -qcow2_get_refcount() calls, but around the whole function. - -This patch takes the lock in qcow2_co_block_status() earlier and asserts -in qcow2_detect_metadata_preallocation() that we hold the lock. - -Fixes: 69f47505ee66afaa513305de0c1895a224e52c45 -Cc: qemu-stable@nongnu.org -Reported-by: Michael Weiser -Signed-off-by: Kevin Wolf -Tested-by: Michael Weiser -Reviewed-by: Michael Weiser -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -(cherry picked from commit 5e9785505210e2477e590e61b1ab100d0ec22b01) -Signed-off-by: Michael Roth ---- - block/qcow2-refcount.c | 2 ++ - block/qcow2.c | 3 ++- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index ef965d7895..0d64bf5a5e 100644 ---- a/block/qcow2-refcount.c -+++ b/block/qcow2-refcount.c -@@ -3455,6 +3455,8 @@ int qcow2_detect_metadata_preallocation(BlockDriverState *bs) - int64_t i, end_cluster, cluster_count = 0, threshold; - int64_t file_length, real_allocation, real_clusters; - -+ qemu_co_mutex_assert_locked(&s->lock); -+ - file_length = bdrv_getlength(bs->file->bs); - if (file_length < 0) { - return file_length; -diff --git a/block/qcow2.c b/block/qcow2.c -index 865839682c..c0f5439dc8 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -1899,6 +1899,8 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, - unsigned int bytes; - int status = 0; - -+ qemu_co_mutex_lock(&s->lock); -+ - if (!s->metadata_preallocation_checked) { - ret = qcow2_detect_metadata_preallocation(bs); - s->metadata_preallocation = (ret == 1); -@@ -1906,7 +1908,6 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, - } - - bytes = MIN(INT_MAX, count); -- qemu_co_mutex_lock(&s->lock); - ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); - qemu_co_mutex_unlock(&s->lock); - if (ret < 0) { --- -2.23.0 diff --git a/qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch deleted file mode 100644 index 8d9b71c70ea503d0bc2480439d5826357e5ab931..0000000000000000000000000000000000000000 --- a/qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +++ /dev/null @@ -1,39 +0,0 @@ -From fad649b88c93d0567be4e426f23063b439037095 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 11 Feb 2020 10:48:59 +0100 -Subject: [PATCH] qcow2: Fix qcow2_alloc_cluster_abort() for external data file - -For external data file, cluster allocations return an offset in the data -file and are not refcounted. In this case, there is nothing to do for -qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file -is wrong and causes crashes in the better case or image corruption in -the worse case. - -Signed-off-by: Kevin Wolf -Message-Id: <20200211094900.17315-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf ---- - block/qcow2-cluster.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index f8576031b6..7e7e051437 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -1026,8 +1026,11 @@ err: - void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) - { - BDRVQcow2State *s = bs->opaque; -- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, -- QCOW2_DISCARD_NEVER); -+ if (!has_data_file(bs)) { -+ qcow2_free_clusters(bs, m->alloc_offset, -+ m->nb_clusters << s->cluster_bits, -+ QCOW2_DISCARD_NEVER); -+ } - } - - /* --- -2.27.0 - diff --git a/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch b/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch deleted file mode 100644 index be2c3c72ced8b33f569fabef0c1f01dd382993ef..0000000000000000000000000000000000000000 --- a/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch +++ /dev/null @@ -1,58 +0,0 @@ -From c9ffb12754b1575babfef45168b6e1b1af80a95f Mon Sep 17 00:00:00 2001 -From: Alberto Garcia -Date: Fri, 16 Aug 2019 15:17:42 +0300 -Subject: [PATCH] qcow2: Fix the calculation of the maximum L2 cache size - -The size of the qcow2 L2 cache defaults to 32 MB, which can be easily -larger than the maximum amount of L2 metadata that the image can have. -For example: with 64 KB clusters the user would need a qcow2 image -with a virtual size of 256 GB in order to have 32 MB of L2 metadata. - -Because of that, since commit b749562d9822d14ef69c9eaa5f85903010b86c30 -we forbid the L2 cache to become larger than the maximum amount of L2 -metadata for the image, calculated using this formula: - - uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); - -The problem with this formula is that the result should be rounded up -to the cluster size because an L2 table on disk always takes one full -cluster. - -For example, a 1280 MB qcow2 image with 64 KB clusters needs exactly -160 KB of L2 metadata, but we need 192 KB on disk (3 clusters) even if -the last 32 KB of those are not going to be used. - -However QEMU rounds the numbers down and only creates 2 cache tables -(128 KB), which is not enough for the image. - -A quick test doing 4KB random writes on a 1280 MB image gives me -around 500 IOPS, while with the correct cache size I get 16K IOPS. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Alberto Garcia -Signed-off-by: Kevin Wolf -(cherry picked from commit b70d08205b2e4044c529eefc21df2c8ab61b473b) -Signed-off-by: Michael Roth ---- - block/qcow2.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 039bdc2f7e..865839682c 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -826,7 +826,11 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, - bool l2_cache_entry_size_set; - int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; - uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; -- uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); -+ uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size); -+ /* An L2 table is always one cluster in size so the max cache size -+ * should be a multiple of the cluster size. */ -+ uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t), -+ s->cluster_size); - - combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); - l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); --- -2.23.0 diff --git a/qcow2-Limit-total-allocation-range-to-INT_MAX.patch b/qcow2-Limit-total-allocation-range-to-INT_MAX.patch deleted file mode 100644 index 3f915adcf6c57f828466e8ea0feff735a2a27937..0000000000000000000000000000000000000000 --- a/qcow2-Limit-total-allocation-range-to-INT_MAX.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 3d83643fb8d69f1c38df3e90634f9b82d4a62a1c Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Thu, 10 Oct 2019 12:08:57 +0200 -Subject: [PATCH] qcow2: Limit total allocation range to INT_MAX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When the COW areas are included, the size of an allocation can exceed -INT_MAX. This is kind of limited by handle_alloc() in that it already -caps avail_bytes at INT_MAX, but the number of clusters still reflects -the original length. - -This can have all sorts of effects, ranging from the storage layer write -call failing to image corruption. (If there were no image corruption, -then I suppose there would be data loss because the .cow_end area is -forced to be empty, even though there might be something we need to -COW.) - -Fix all of it by limiting nb_clusters so the equivalent number of bytes -will not exceed INT_MAX. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Reviewed-by: Eric Blake -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit d1b9d19f99586b33795e20a79f645186ccbc070f) -Signed-off-by: Michael Roth ---- - block/qcow2-cluster.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index 974a4e8..c4a99c1 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -1342,6 +1342,9 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, - nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); - assert(nb_clusters <= INT_MAX); - -+ /* Limit total allocation byte count to INT_MAX */ -+ nb_clusters = MIN(nb_clusters, INT_MAX >> s->cluster_bits); -+ - /* Find L2 entry for the first involved cluster */ - ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); - if (ret < 0) { -@@ -1430,7 +1433,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, - * request actually writes to (excluding COW at the end) - */ - uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); -- int avail_bytes = MIN(INT_MAX, nb_clusters << s->cluster_bits); -+ int avail_bytes = nb_clusters << s->cluster_bits; - int nb_bytes = MIN(requested_bytes, avail_bytes); - QCowL2Meta *old_m = *m; - --- -1.8.3.1 - diff --git a/qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch b/qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch deleted file mode 100644 index 358fc61764ab3a8170a4d625af0ee4136531b7c0..0000000000000000000000000000000000000000 --- a/qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 66ad3c6ecce098d8f01545859c5ebf7a9e505e2c Mon Sep 17 00:00:00 2001 -From: Tuguoyi -Date: Fri, 1 Nov 2019 07:37:35 +0000 -Subject: [PATCH] qcow2-bitmap: Fix uint64_t left-shift overflow - -There are two issues in In check_constraints_on_bitmap(), -1) The sanity check on the granularity will cause uint64_t -integer left-shift overflow when cluster_size is 2M and the -granularity is BIGGER than 32K. -2) The way to calculate image size that the maximum bitmap -supported can map to is a bit incorrect. -This patch fix it by add a helper function to calculate the -number of bytes needed by a normal bitmap in image and compare -it to the maximum bitmap bytes supported by qemu. - -Fixes: 5f72826e7fc62167cf3a -Signed-off-by: Guoyi Tu -Message-id: 4ba40cd1e7ee4a708b40899952e49f22@h3c.com -Reviewed-by: Vladimir Sementsov-Ogievskiy -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -(cherry picked from commit 570542ecb11e04b61ef4b3f4d0965a6915232a88) -Signed-off-by: Michael Roth ---- - block/qcow2-bitmap.c | 14 +++++++++++--- - 1 file changed, 11 insertions(+), 3 deletions(-) - -diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c -index e53a160..997923f 100644 ---- a/block/qcow2-bitmap.c -+++ b/block/qcow2-bitmap.c -@@ -143,6 +143,13 @@ static int check_table_entry(uint64_t entry, int cluster_size) - return 0; - } - -+static int64_t get_bitmap_bytes_needed(int64_t len, uint32_t granularity) -+{ -+ int64_t num_bits = DIV_ROUND_UP(len, granularity); -+ -+ return DIV_ROUND_UP(num_bits, 8); -+} -+ - static int check_constraints_on_bitmap(BlockDriverState *bs, - const char *name, - uint32_t granularity, -@@ -151,6 +158,7 @@ static int check_constraints_on_bitmap(BlockDriverState *bs, - BDRVQcow2State *s = bs->opaque; - int granularity_bits = ctz32(granularity); - int64_t len = bdrv_getlength(bs); -+ int64_t bitmap_bytes; - - assert(granularity > 0); - assert((granularity & (granularity - 1)) == 0); -@@ -172,9 +180,9 @@ static int check_constraints_on_bitmap(BlockDriverState *bs, - return -EINVAL; - } - -- if ((len > (uint64_t)BME_MAX_PHYS_SIZE << granularity_bits) || -- (len > (uint64_t)BME_MAX_TABLE_SIZE * s->cluster_size << -- granularity_bits)) -+ bitmap_bytes = get_bitmap_bytes_needed(len, granularity); -+ if ((bitmap_bytes > (uint64_t)BME_MAX_PHYS_SIZE) || -+ (bitmap_bytes > (uint64_t)BME_MAX_TABLE_SIZE * s->cluster_size)) - { - error_setg(errp, "Too much space will be occupied by the bitmap. " - "Use larger granularity"); --- -1.8.3.1 - diff --git a/qcow2-fix-memory-leak-in-qcow2_read_extensions.patch b/qcow2-fix-memory-leak-in-qcow2_read_extensions.patch deleted file mode 100644 index 2837a02342a4d6fde0e7f0370d64557089450870..0000000000000000000000000000000000000000 --- a/qcow2-fix-memory-leak-in-qcow2_read_extensions.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 76ab77108279f9d328e4a7fe1684141084698d97 Mon Sep 17 00:00:00 2001 -From: zhanghailiang -Date: Thu, 25 Jul 2019 16:05:11 +0800 -Subject: [PATCH] qcow2: fix memory leak in qcow2_read_extensions - -Free feature_table if it is failed in bdrv_pread. - -Signed-off-by: fangyi ---- - block/qcow2.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 3ace3b22..5e85cf4b 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -258,6 +258,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, - void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); - ret = bdrv_pread(bs->file, offset , feature_table, ext.len); - if (ret < 0) { -+ g_free(feature_table); - error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " - "Could not read table"); - return ret; --- -2.19.1 - diff --git a/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch index 85467e8412ec264d6034f59ae3704a3042d1e5e0..4aa28bb0c5b9bed2bb8cc181af9fe5c046e5068e 100644 --- a/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch +++ b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch @@ -1,4 +1,4 @@ -From 4f1396f9e173a24f78204b8849c209100499d639 Mon Sep 17 00:00:00 2001 +From 172d79d8ebb343fa144987d2c50d90655d5aa5f9 Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Thu, 29 Jul 2021 15:24:48 +0800 Subject: [PATCH] qdev/monitors: Fix reundant error_setg of qdev_add_device @@ -9,15 +9,16 @@ will trigger an asseration "assert(*errp == NULL)". Fixes: 515a7970490 (log: Add some logs on VM runtime path) Signed-off-by: Kunkun Jiang +Signed-off-by: Yan Wang --- - qdev-monitor.c | 1 - + system/qdev-monitor.c | 1 - 1 file changed, 1 deletion(-) -diff --git a/qdev-monitor.c b/qdev-monitor.c -index c6c1d3f06a..ab2bdef105 100644 ---- a/qdev-monitor.c -+++ b/qdev-monitor.c -@@ -587,7 +587,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index c885175b66..b10e483a9a 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -644,7 +644,6 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, if (path != NULL) { bus = qbus_find(path, errp); if (!bus) { diff --git a/qemu-4.1.0.tar.xz b/qemu-8.2.0.tar.xz similarity index 41% rename from qemu-4.1.0.tar.xz rename to qemu-8.2.0.tar.xz index 79ad0661eda38092de13a677ef70eeaece3ad848..6d13b5dd8d4d58c99a8d664ca6f207c9ae5c59b7 100644 Binary files a/qemu-4.1.0.tar.xz and b/qemu-8.2.0.tar.xz differ diff --git a/qemu-bswap-Undefine-CPU_CONVERT-once-done.patch b/qemu-bswap-Undefine-CPU_CONVERT-once-done.patch new file mode 100644 index 0000000000000000000000000000000000000000..7240ff24b2bf294ded17d3771d3e1267a72076fe --- /dev/null +++ b/qemu-bswap-Undefine-CPU_CONVERT-once-done.patch @@ -0,0 +1,37 @@ +From 4fc36060bec2ac7de500068211b1282c38e3e073 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Tue, 12 Nov 2024 14:05:45 +0800 +Subject: [PATCH] qemu/bswap: Undefine CPU_CONVERT() once done +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 1d73353f236209e9b5987d7c6b30b2a32b739210 + +Better undefined macros once we are done with them, +like we do few lines later with DO_STN_LDN_P(). + +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Thomas Huth +Message-Id: <20241003234211.53644-2-philmd@linaro.org> +Signed-off-by: Zhang Jiao +--- + include/qemu/bswap.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h +index 933a66ee87..49e4944457 100644 +--- a/include/qemu/bswap.h ++++ b/include/qemu/bswap.h +@@ -138,6 +138,8 @@ CPU_CONVERT(le, 16, uint16_t) + CPU_CONVERT(le, 32, uint32_t) + CPU_CONVERT(le, 64, uint64_t) + ++#undef CPU_CONVERT ++ + /* + * Same as cpu_to_le{16,32,64}, except that gcc will figure the result is + * a compile-time constant if you pass in a constant. So this can be +-- +2.41.0.windows.1 + diff --git a/qemu-file-Don-t-do-IO-after-shutdown.patch b/qemu-file-Don-t-do-IO-after-shutdown.patch deleted file mode 100644 index 72cfc4d7c97cc130be5ffb5556852790c17b2afa..0000000000000000000000000000000000000000 --- a/qemu-file-Don-t-do-IO-after-shutdown.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 1f8bc46e8af4ffe6d062f378bd11e0ad70d30ac8 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 2 Dec 2020 14:25:13 +0800 -Subject: [PATCH] qemu-file: Don't do IO after shutdown - -Be sure that we are not doing neither read/write after shutdown of the -QEMUFile. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert ---- - migration/qemu-file.c | 22 +++++++++++++++++++++- - 1 file changed, 21 insertions(+), 1 deletion(-) - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 18f480529a..cd96d04e9a 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -51,6 +51,8 @@ struct QEMUFile { - unsigned int iovcnt; - - int last_error; -+ /* has the file has been shutdown */ -+ bool shutdown; - }; - - /* -@@ -59,10 +61,18 @@ struct QEMUFile { - */ - int qemu_file_shutdown(QEMUFile *f) - { -+ int ret; -+ -+ f->shutdown = true; - if (!f->ops->shut_down) { - return -ENOSYS; - } -- return f->ops->shut_down(f->opaque, true, true); -+ -+ ret = f->ops->shut_down(f->opaque, true, true); -+ if (!f->last_error) { -+ qemu_file_set_error(f, -EIO); -+ } -+ return ret; - } - - /* -@@ -181,6 +191,10 @@ void qemu_fflush(QEMUFile *f) - return; - } - -+ if (f->shutdown) { -+ return; -+ } -+ - if (f->iovcnt > 0) { - expect = iov_size(f->iov, f->iovcnt); - ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); -@@ -293,6 +307,9 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) - f->buf_index = 0; - f->buf_size = pending; - -+ if (f->shutdown) { -+ return 0; -+ } - len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, - IO_BUF_SIZE - pending); - if (len > 0) { -@@ -591,6 +608,9 @@ int64_t qemu_ftell(QEMUFile *f) - - int qemu_file_rate_limit(QEMUFile *f) - { -+ if (f->shutdown) { -+ return 1; -+ } - if (qemu_file_get_error(f)) { - return 1; - } --- -2.27.0 - diff --git a/qemu-img-add-qemu-img-direct-create.patch b/qemu-img-add-qemu-img-direct-create.patch new file mode 100644 index 0000000000000000000000000000000000000000..74fcf3bf7c9b41789da4d4d6c06fcd059e4272ec --- /dev/null +++ b/qemu-img-add-qemu-img-direct-create.patch @@ -0,0 +1,534 @@ +From 422ac7d67a7ced985b1beef4b33cc43b48d1f240 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Mon, 18 Mar 2024 10:18:07 +0800 +Subject: [PATCH] qemu-img: add qemu-img direct create + +Introdue buffer_size while creating raw file, then we +can controll the speed of direct write by: + qemu-img create -t 'cache' -o buffer_size='num' + +Signed-off-by: Jinhua Cao +--- + block/file-posix.c | 65 ++++++++++++++++++-- + include/block/block_int-common.h | 2 + + qapi/block-core.json | 6 +- + qemu-img-cmds.hx | 4 +- + qemu-img.c | 14 ++++- + tests/qemu-iotests/049.out | 102 +++++++++++++++---------------- + tests/qemu-iotests/099.out | 2 +- + 7 files changed, 134 insertions(+), 61 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 4782aba59f..4ac8f684f1 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -128,6 +128,10 @@ + #define FTYPE_CD 1 + + #define MAX_BLOCKSIZE 4096 ++#define DEFAULT_BUFFER_SIZE 65536 ++#define BUFFER_ALIGN_SIZE 65536 ++#define MIN_BUFFER_SIZE 65536 ++#define MAX_BUFFER_SIZE 16777216 + + /* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes, + * leaving a few more bytes for its future use. */ +@@ -203,6 +207,8 @@ typedef struct RawPosixAIOData { + off_t aio_offset; + uint64_t aio_nbytes; + ++ size_t buffer_size; ++ + union { + struct { + struct iovec *iov; +@@ -2630,7 +2636,8 @@ static void raw_close(BlockDriverState *bs) + */ + static int coroutine_fn + raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, size_t buffer_size, ++ Error **errp) + { + RawPosixAIOData acb; + +@@ -2639,6 +2646,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + .aio_fildes = fd, + .aio_type = QEMU_AIO_TRUNCATE, + .aio_offset = offset, ++ .buffer_size = buffer_size, + .truncate = { + .prealloc = prealloc, + .errp = errp, +@@ -2664,7 +2672,8 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + + if (S_ISREG(st.st_mode)) { + /* Always resizes to the exact @offset */ +- return raw_regular_truncate(bs, s->fd, offset, prealloc, errp); ++ return raw_regular_truncate(bs, s->fd, offset, prealloc, ++ DEFAULT_BUFFER_SIZE, errp); + } + + if (prealloc != PREALLOC_MODE_OFF) { +@@ -2882,6 +2891,8 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + int fd; + uint64_t perm, shared; + int result = 0; ++ int flags = O_RDWR | O_BINARY; ++ size_t buffer_size = DEFAULT_BUFFER_SIZE; + + /* Validate options and set default values */ + assert(options->driver == BLOCKDEV_DRIVER_FILE); +@@ -2901,9 +2912,19 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + error_setg(errp, "Extent size hint is too large"); + goto out; + } ++ if (!file_opts->cache) { ++ file_opts->cache = g_strdup("writeback"); ++ } ++ if (file_opts->preallocation == PREALLOC_MODE_FULL && ++ !strcmp(file_opts->cache, "none")) { ++ flags |= O_DIRECT; ++ } ++ if (file_opts->has_buffersize) { ++ buffer_size = file_opts->buffersize; ++ } + + /* Create file */ +- fd = qemu_create(file_opts->filename, O_RDWR | O_BINARY, 0644, errp); ++ fd = qemu_create(file_opts->filename, flags, 0644, errp); + if (fd < 0) { + result = -errno; + goto out; +@@ -2938,7 +2959,8 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + } + + /* Clear the file by truncating it to 0 */ +- result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); ++ result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, ++ buffer_size, errp); + if (result < 0) { + goto out_unlock; + } +@@ -2982,7 +3004,8 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + /* Resize and potentially preallocate the file to the desired + * final size */ + result = raw_regular_truncate(NULL, fd, file_opts->size, +- file_opts->preallocation, errp); ++ file_opts->preallocation, ++ buffer_size, errp); + if (result < 0) { + goto out_unlock; + } +@@ -3003,6 +3026,8 @@ out_close: + error_setg_errno(errp, -result, "Could not close the new file"); + } + out: ++ g_free(file_opts->cache); ++ file_opts->cache = NULL; + return result; + } + +@@ -3018,6 +3043,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, + PreallocMode prealloc; + char *buf = NULL; + Error *local_err = NULL; ++ size_t buffersize = DEFAULT_BUFFER_SIZE; ++ char *cache = NULL; + + /* Skip file: protocol prefix */ + strstart(filename, "file:", &filename); +@@ -3040,6 +3067,21 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, + return -EINVAL; + } + ++ buffersize = qemu_opt_get_size_del(opts, BLOCK_OPT_BUFFER_SIZE, ++ DEFAULT_BUFFER_SIZE); ++ if (buffersize < MIN_BUFFER_SIZE || buffersize > MAX_BUFFER_SIZE) { ++ error_setg_errno(errp, EINVAL, "Buffer size must be between %d " ++ "and %d", MIN_BUFFER_SIZE, MAX_BUFFER_SIZE); ++ return -EINVAL; ++ } ++ ++ cache = qemu_opt_get_del(opts, BLOCK_OPT_CACHE); ++ if (!cache) { ++ cache = g_strdup("writeback"); ++ } ++ ++ buffersize = ROUND_UP(buffersize, BUFFER_ALIGN_SIZE); ++ + options = (BlockdevCreateOptions) { + .driver = BLOCKDEV_DRIVER_FILE, + .u.file = { +@@ -3051,6 +3093,9 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, + .nocow = nocow, + .has_extent_size_hint = has_extent_size_hint, + .extent_size_hint = extent_size_hint, ++ .has_buffersize = true, ++ .buffersize = buffersize, ++ .cache = cache, + }, + }; + return raw_co_create(&options, errp); +@@ -3741,6 +3786,16 @@ static QemuOptsList raw_create_opts = { + .type = QEMU_OPT_SIZE, + .help = "Extent size hint for the image file, 0 to disable" + }, ++ { ++ .name = BLOCK_OPT_CACHE, ++ .type = QEMU_OPT_STRING, ++ .help = "Cache mode (allowed values: writeback, none)" ++ }, ++ { ++ .name = BLOCK_OPT_BUFFER_SIZE, ++ .type = QEMU_OPT_SIZE, ++ .help = "write buffer size" ++ }, + { /* end of list */ } + } + }; +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 4e31d161c5..a6e2436524 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -57,6 +57,8 @@ + #define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" + #define BLOCK_OPT_COMPRESSION_TYPE "compression_type" + #define BLOCK_OPT_EXTL2 "extended_l2" ++#define BLOCK_OPT_CACHE "cache" ++#define BLOCK_OPT_BUFFER_SIZE "buffer_size" + + #define BLOCK_PROBE_BUF_SIZE 512 + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index ca390c5700..1444624590 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4906,6 +4906,8 @@ + # + # @extent-size-hint: Extent size hint to add to the image file; 0 for + # not adding an extent size hint (default: 1 MB, since 5.1) ++# @cache: Cache mode used to write the output disk image ++# @buffersize: Buffer size for creating image + # + # Since: 2.12 + ## +@@ -4914,7 +4916,9 @@ + 'size': 'size', + '*preallocation': 'PreallocMode', + '*nocow': 'bool', +- '*extent-size-hint': 'size'} } ++ '*extent-size-hint': 'size', ++ '*cache': 'str', ++ '*buffersize': 'size'} } + + ## + # @BlockdevCreateOptionsGluster: +diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx +index 068692d13e..20bdcd7b82 100644 +--- a/qemu-img-cmds.hx ++++ b/qemu-img-cmds.hx +@@ -52,9 +52,9 @@ SRST + ERST + + DEF("create", img_create, +- "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-o options] filename [size]") ++ "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-t cache] [-o options] filename [size]") + SRST +-.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE] ++.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-t CACHE] [-o OPTIONS] FILENAME [SIZE] + ERST + + DEF("dd", img_dd, +diff --git a/qemu-img.c b/qemu-img.c +index 5a77f67719..80adee2620 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -516,6 +516,7 @@ static int img_create(int argc, char **argv) + const char *base_fmt = NULL; + const char *filename; + const char *base_filename = NULL; ++ const char *cache = BDRV_DEFAULT_CACHE; + char *options = NULL; + Error *local_err = NULL; + bool quiet = false; +@@ -527,7 +528,7 @@ static int img_create(int argc, char **argv) + {"object", required_argument, 0, OPTION_OBJECT}, + {0, 0, 0, 0} + }; +- c = getopt_long(argc, argv, ":F:b:f:ho:qu", ++ c = getopt_long(argc, argv, ":F:b:f:t:ho:qu", + long_options, NULL); + if (c == -1) { + break; +@@ -551,6 +552,9 @@ static int img_create(int argc, char **argv) + case 'f': + fmt = optarg; + break; ++ case 't': ++ cache = optarg; ++ break; + case 'o': + if (accumulate_options(&options, optarg) < 0) { + goto fail; +@@ -594,6 +598,14 @@ static int img_create(int argc, char **argv) + error_exit("Unexpected argument: %s", argv[optind]); + } + ++ if (!options) { ++ options = g_strdup_printf(BLOCK_OPT_CACHE"=%s", cache); ++ } else { ++ char *old_options = options; ++ options = g_strdup_printf("%s,"BLOCK_OPT_CACHE"=%s", options, cache); ++ g_free(old_options); ++ } ++ + bdrv_img_create(filename, fmt, base_filename, base_fmt, + options, img_size, flags, quiet, &local_err); + if (local_err) { +diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out +index 34e1b452e6..b4a9705ec2 100644 +--- a/tests/qemu-iotests/049.out ++++ b/tests/qemu-iotests/049.out +@@ -4,90 +4,90 @@ QA output created by 049 + == 1. Traditional size parameter == + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024b +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1k +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1K +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1G +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1T +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0b +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5k +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5K +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5G +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5T +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 cache=writeback + + == 2. Specifying size via -o == + + qemu-img create -f qcow2 -o size=1024 TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1024b TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1k TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1K TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1M TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1G TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1T TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1024.0 TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1024.0b TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5k TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5K TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5M TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5G TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5T TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 cache=writeback + + == 3. Invalid sizes == + +@@ -132,84 +132,84 @@ qemu-img: TEST_DIR/t.qcow2: The image size must be specified only once + == Check correct interpretation of suffixes for cluster size == + + qemu-img create -f qcow2 -o cluster_size=1024 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1024b TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1k TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1K TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1M TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1048576 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1048576 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1024.0 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1024.0b TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=0.5k TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=0.5K TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=0.5M TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=524288 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=524288 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + == Check compat level option == + + qemu-img create -f qcow2 -o compat=0.10 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=1.1 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=0.42 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value '0.42' + + qemu-img create -f qcow2 -o compat=foobar TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value 'foobar' + + == Check preallocation option == + + qemu-img create -f qcow2 -o preallocation=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o preallocation=metadata TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=metadata compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=metadata compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Parameter 'preallocation' does not accept value '1234' + + == Check encryption option == + + qemu-img create -f qcow2 -o encryption=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=off cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=off cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 --object secret,id=sec0,data=123456 -o encryption=on,encrypt.key-secret=sec0 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=on encrypt.key-secret=sec0 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=on encrypt.key-secret=sec0 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + == Check lazy_refcounts option (only with v3) == + + qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=on TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=on refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=on refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=on TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Lazy refcounts only supported with compatibility level 1.1 and above (use version=v3 or greater) + + == Expect error when backing file name is empty string == +diff --git a/tests/qemu-iotests/099.out b/tests/qemu-iotests/099.out +index 8cce627529..f6f8f25957 100644 +--- a/tests/qemu-iotests/099.out ++++ b/tests/qemu-iotests/099.out +@@ -1,6 +1,6 @@ + QA output created by 099 + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 +-Formatting 'TEST_DIR/t.IMGFMT.compare', fmt=raw size=131072 ++Formatting 'TEST_DIR/t.IMGFMT.compare', fmt=raw size=131072 cache=writeback + + === Testing simple filename for blkverify === + +-- +2.27.0 + diff --git a/qemu-img-block-set-zero-flags-only-when-discard_zero.patch b/qemu-img-block-set-zero-flags-only-when-discard_zero.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba0731826712ba76c9e5310686060f4c03334b12 --- /dev/null +++ b/qemu-img-block-set-zero-flags-only-when-discard_zero.patch @@ -0,0 +1,33 @@ +From 48c792a802c8cb0ab670ddf92920e2e5e96747a4 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Mon, 18 Mar 2024 10:04:42 +0800 +Subject: [PATCH] qemu-img block: set zero flags only when discard_zeros of the + block supported + +zero flags set for block discard_zeros, only when the block support +discard_zeros need set these flags. + +old commit info: + qemu-img: block: dont blk_make_zero if discard_zeroes false + +Signed-off-by: Jinhua Cao +--- + block/file-posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 01ae5fd88c..4782aba59f 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -822,7 +822,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + #endif + s->needs_alignment = raw_needs_alignment(bs); + +- bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; ++ bs->supported_zero_flags = s->discard_zeroes ? (BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) : 0; + if (S_ISREG(st.st_mode)) { + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; +-- +2.27.0 + diff --git a/qemu-img-convert-Don-t-pre-zero-images.patch b/qemu-img-convert-Don-t-pre-zero-images.patch deleted file mode 100644 index 925590c34903cd73307b3f806a0b407c6c744fb5..0000000000000000000000000000000000000000 --- a/qemu-img-convert-Don-t-pre-zero-images.patch +++ /dev/null @@ -1,73 +0,0 @@ -From a2fcbe2b82c42f890a857ad8d4edcfdb273106ea Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 31 Jul 2020 08:18:31 -0400 -Subject: [PATCH] qemu-img convert: Don't pre-zero images - -RH-Author: Kevin Wolf -Message-id: <20200731081831.13781-2-kwolf@redhat.com> -Patchwork-id: 98117 -O-Subject: [RHEL-AV-8.2.1.z qemu-kvm PATCH 1/1] qemu-img convert: Don't pre-zero images -Bugzilla: 1861682 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Max Reitz -RH-Acked-by: Eric Blake - -Since commit 5a37b60a61c, qemu-img create will pre-zero the target image -if it isn't already zero-initialised (most importantly, for host block -devices, but also iscsi etc.), so that writing explicit zeros wouldn't -be necessary later. - -This could speed up the operation significantly, in particular when the -source image file was only sparsely populated. However, it also means -that some block are written twice: Once when pre-zeroing them, and then -when they are overwritten with actual data. On a full image, the -pre-zeroing is wasted work because everything will be overwritten. - -In practice, write_zeroes typically turns out faster than writing -explicit zero buffers, but slow enough that first zeroing everything and -then overwriting parts can be a significant net loss. - -Meanwhile, qemu-img convert was rewritten in 690c7301600 and zero blocks -are now written to the target using bdrv_co_pwrite_zeroes() if the -target could be pre-zeroed. This way we already make use of the faster -write_zeroes operation, but avoid writing any blocks twice. - -Remove the pre-zeroing because these days this former optimisation has -actually turned into a pessimisation in the common case. - -Reported-by: Nir Soffer -Signed-off-by: Kevin Wolf -Message-Id: <20200622151203.35624-1-kwolf@redhat.com> -Tested-by: Nir Soffer -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit edafc70c0c8510862f2f213a3acf7067113bcd08) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - qemu-img.c | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index 2e9cc5db7c..e4abd4978a 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -1981,15 +1981,6 @@ static int convert_do_copy(ImgConvertState *s) - ? bdrv_has_zero_init(blk_bs(s->target)) - : false; - -- if (!s->has_zero_init && !s->target_has_backing && -- bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) -- { -- ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); -- if (ret == 0) { -- s->has_zero_init = true; -- } -- } -- - /* Allocate buffer for copied data. For compressed images, only one cluster - * can be copied at a time. */ - if (s->compressed) { --- -2.27.0 - diff --git a/qemu-img-create-cache-paramter-only-use-for-reg-file.patch b/qemu-img-create-cache-paramter-only-use-for-reg-file.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ff2ad0369356c14587fdcebd0fd89379f14a03e --- /dev/null +++ b/qemu-img-create-cache-paramter-only-use-for-reg-file.patch @@ -0,0 +1,66 @@ +From 9ca9391acb780f15a6d8769339e7cd0edf457529 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 24 Mar 2022 17:12:49 +0800 +Subject: [PATCH] qemu-img create: 'cache' paramter only use for reg file image + +The paramter 'cache' is invalid for host device(/dev/xxx). If +'qemu-img create' operator performed on host device, the host +device not support 'cache' would result 'qemu-img create' execute +failed. + +Signed-off-by: Jinhua Cao +--- + qemu-img.c | 30 ++++++++++++++++++++++++------ + 1 file changed, 24 insertions(+), 6 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index 80adee2620..49d914c9c4 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -508,6 +508,22 @@ static int64_t cvtnum(const char *name, const char *value) + return cvtnum_full(name, value, 0, INT64_MAX); + } + ++static bool is_reg_file(const char *filename) ++{ ++ struct stat st; ++ ++ /* file not exist, file will be create later, so it's a reg file */ ++ if (access(filename, F_OK) == -1) { ++ return true; ++ } ++ ++ /* file exist, check file type */ ++ if (stat(filename, &st) >= 0 && S_ISREG(st.st_mode)) { ++ return true; ++ } ++ return false; ++} ++ + static int img_create(int argc, char **argv) + { + int c; +@@ -598,12 +614,14 @@ static int img_create(int argc, char **argv) + error_exit("Unexpected argument: %s", argv[optind]); + } + +- if (!options) { +- options = g_strdup_printf(BLOCK_OPT_CACHE"=%s", cache); +- } else { +- char *old_options = options; +- options = g_strdup_printf("%s,"BLOCK_OPT_CACHE"=%s", options, cache); +- g_free(old_options); ++ if (is_reg_file(filename)) { ++ if (!options) { ++ options = g_strdup_printf(BLOCK_OPT_CACHE"=%s", cache); ++ } else { ++ char *old_options = options; ++ options = g_strdup_printf("%s,"BLOCK_OPT_CACHE"=%s", options, cache); ++ g_free(old_options); ++ } + } + + bdrv_img_create(filename, fmt, base_filename, base_fmt, +-- +2.27.0 + diff --git a/qemu-img-free-memory-before-re-assign.patch b/qemu-img-free-memory-before-re-assign.patch deleted file mode 100644 index 2d46d64b1b9664b66efc76ea6490a1bc22663137..0000000000000000000000000000000000000000 --- a/qemu-img-free-memory-before-re-assign.patch +++ /dev/null @@ -1,33 +0,0 @@ -From d22af5cb41c16829dbf3ed3c611ef56ceeb840ff Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Thu, 27 Feb 2020 09:29:50 +0800 -Subject: [PATCH 02/14] qemu-img: free memory before re-assign - -collect_image_check() is called twice in img_check(), the filename/format will be alloced without free the original memory. -It is not a big deal since the process will exit anyway, but seems like a clean code and it will remove the warning spotted by asan. - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Message-Id: <20200227012950.12256-3-pannengyuan@huawei.com> -Signed-off-by: Max Reitz -Signed-off-by: Peng Liang ---- - qemu-img.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/qemu-img.c b/qemu-img.c -index 79983772de39..2e9cc5db7c4c 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -808,6 +808,8 @@ static int img_check(int argc, char **argv) - check->corruptions_fixed); - } - -+ qapi_free_ImageCheck(check); -+ check = g_new0(ImageCheck, 1); - ret = collect_image_check(bs, check, filename, fmt, 0); - - check->leaks_fixed = leaks_fixed; --- -2.26.2 - diff --git a/qemu-nbd-make-native-as-the-default-aio-mode.patch b/qemu-nbd-make-native-as-the-default-aio-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..23dae5662eeb47f75517140f9c70a8e4dee707e0 --- /dev/null +++ b/qemu-nbd-make-native-as-the-default-aio-mode.patch @@ -0,0 +1,35 @@ +From 0e610831d584d9485eb0655168d08d8234bbb555 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:48:58 +0800 +Subject: [PATCH] qemu-nbd: make native as the default aio mode + +When the file system is dealing with multithreading concurrent writing to a file, +the performance will be degraded because of the lock. +At present, the default AIO mode of QEMU NBD is threads. In the case of large blocks, +because IO is divided into small pieces and multiple queues, it will become multithreading +concurrent writing the same file. Due to the file system, the performance will be greatly reduced. +If you change to native mode, this problem will not exist. + +Signed-off-by: wangjian161 +--- + qemu-nbd.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 186e6468b1..acccf2977f 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -843,6 +843,10 @@ int main(int argc, char **argv) + trace_init_file(); + qemu_set_log(LOG_TRACE, &error_fatal); + ++ if (!seen_aio && (flags & BDRV_O_NOCACHE)) { ++ flags |= BDRV_O_NATIVE_AIO; ++ } ++ + socket_activation = check_socket_activation(); + if (socket_activation == 0) { + if (!sockpath) { +-- +2.27.0 + diff --git a/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch b/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc41eac5641004223996c683b2af1792fd826737 --- /dev/null +++ b/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch @@ -0,0 +1,42 @@ +From d6aa08ac3693be3e08f2c8d3ad5a356ea6e9dead Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:55:08 +0800 +Subject: [PATCH] qemu-nbd: set timeout to qemu-nbd socket + +In case of insufficient memory and kill-9, +the NBD socket cannot be processed and stuck all the time. + +Signed-off-by: wangjian161 +--- + nbd/client.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/nbd/client.c b/nbd/client.c +index 29ffc609a4..987dde43c7 100644 +--- a/nbd/client.c ++++ b/nbd/client.c +@@ -24,6 +24,8 @@ + #include "nbd-internal.h" + #include "qemu/cutils.h" + ++#define NBD_TIMEOUT_SECONDS 30 ++ + /* Definitions for opaque data types */ + + static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); +@@ -1310,6 +1312,12 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info, + } + } + ++ if (ioctl(fd, NBD_SET_TIMEOUT, NBD_TIMEOUT_SECONDS) < 0) { ++ int serrno = errno; ++ error_setg(errp, "Failed setting timeout"); ++ return -serrno; ++ } ++ + trace_nbd_init_finish(); + + return 0; +-- +2.27.0 + diff --git a/qemu-options-Fix-CXL-Fixed-Memory-Window-interleave-.patch b/qemu-options-Fix-CXL-Fixed-Memory-Window-interleave-.patch new file mode 100644 index 0000000000000000000000000000000000000000..bdc8e67f70e9aca37ee8b2cfc2172822ea46971c --- /dev/null +++ b/qemu-options-Fix-CXL-Fixed-Memory-Window-interleave-.patch @@ -0,0 +1,48 @@ +From 34fc72b12cc4887cb2b551b171f6a76c860b6997 Mon Sep 17 00:00:00 2001 +From: Yuquan Wang +Date: Sun, 7 Apr 2024 16:35:39 +0800 +Subject: [PATCH] qemu-options: Fix CXL Fixed Memory Window + interleave-granularity typo +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix the unit typo of interleave-granularity of CXL Fixed Memory +Window in qemu-option.hx. + +Fixes: 03b39fcf64 ("hw/cxl: Make the CFMW a machine parameter.") +Signed-off-by: Yuquan Wang wangyuquan1236@phytium.com.cn +Message-ID: <20240407083539.1488172-2-wangyuquan1236@phytium.com.cn> +[PMD: Reworded] +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit aa88f99c87c0e5d195d6d96190374650553ea61f) +Signed-off-by: zhujun2 +--- + qemu-options.hx | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/qemu-options.hx b/qemu-options.hx +index 9829b1020a..4df4dcea21 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -149,14 +149,14 @@ SRST + platform and configuration dependent. + + ``interleave-granularity=granularity`` sets the granularity of +- interleave. Default 256KiB. Only 256KiB, 512KiB, 1024KiB, 2048KiB +- 4096KiB, 8192KiB and 16384KiB granularities supported. ++ interleave. Default 256 (bytes). Only 256, 512, 1k, 2k, ++ 4k, 8k and 16k granularities supported. + + Example: + + :: + +- -machine cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=128G,cxl-fmw.0.interleave-granularity=512k ++ -machine cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=128G,cxl-fmw.0.interleave-granularity=512 + ERST + + DEF("M", HAS_ARG, QEMU_OPTION_M, +-- +2.41.0.windows.1 + diff --git a/qemu-options-enable-smbios-option-on-RISC-V.patch b/qemu-options-enable-smbios-option-on-RISC-V.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9912cd806699d8130cbef858161a45a656001bf --- /dev/null +++ b/qemu-options-enable-smbios-option-on-RISC-V.patch @@ -0,0 +1,36 @@ +From 987e286cc7614c5ff3cc9096798675d7da70a5ea Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Fri, 30 May 2025 09:13:08 +0800 +Subject: [PATCH] qemu-options: enable -smbios option on RISC-V + +commit e2ff0dec156eff4e109c678654df1225d384fd14 upstream + +With SMBIOS support added for RISC-V we also should enable the command line +option. + +Signed-off-by: Heinrich Schuchardt +Reviewed-by: Daniel Henrique Barboza +Acked-by: Alistair Francis +Reviewed-by: Andrew Jones +Message-ID: <20240123184229.10415-5-heinrich.schuchardt@canonical.com> +Signed-off-by: Alistair Francis +--- + qemu-options.hx | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qemu-options.hx b/qemu-options.hx +index cbaa2e5367..55765fb34c 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2690,7 +2690,7 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios, + " specify SMBIOS type 17 fields\n" + "-smbios type=41[,designation=str][,kind=str][,instance=%d][,pcidev=str]\n" + " specify SMBIOS type 41 fields\n", +- QEMU_ARCH_I386 | QEMU_ARCH_ARM | QEMU_ARCH_LOONGARCH) ++ QEMU_ARCH_I386 | QEMU_ARCH_ARM | QEMU_ARCH_LOONGARCH | QEMU_ARCH_RISCV) + SRST + ``-smbios file=binary`` + Load SMBIOS entry from binary file. +-- +2.33.0 + diff --git a/qemu-options.hx-correct-formatting-smbios-type-4.patch b/qemu-options.hx-correct-formatting-smbios-type-4.patch new file mode 100644 index 0000000000000000000000000000000000000000..031748df04206d1c2fc8603651f6db0bf8699f79 --- /dev/null +++ b/qemu-options.hx-correct-formatting-smbios-type-4.patch @@ -0,0 +1,40 @@ +From 851559a1442a824559f273380c7ad1fa06f559e8 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Thu, 19 Jun 2025 10:27:13 +0800 +Subject: [PATCH] qemu-options.hx: correct formatting -smbios type=4 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 68baeaafa562e360188fb3be8a9451db1c5bd862 upstream + +processor-family and processor-id can be assigned independently. + +Add missing brackets. + +Fixes: b5831d79671c ("smbios: add processor-family option") +Signed-off-by: Heinrich Schuchardt +Reviewed-by: Thomas Huth +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240729204816.11905-1-heinrich.schuchardt@canonical.com> +Signed-off-by: Philippe Mathieu-Daudé +--- + qemu-options.hx | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qemu-options.hx b/qemu-options.hx +index 55765fb34c..b09d692d5b 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2679,7 +2679,7 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios, + " specify SMBIOS type 3 fields\n" + "-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str]\n" + " [,asset=str][,part=str][,max-speed=%d][,current-speed=%d]\n" +- " [,processor-family=%d,processor-id=%d]\n" ++ " [,processor-family=%d][,processor-id=%d]\n" + " specify SMBIOS type 4 fields\n" + "-smbios type=8[,external_reference=str][,internal_reference=str][,connector_type=%d][,port_type=%d]\n" + " specify SMBIOS type 8 fields\n" +-- +2.33.0 + diff --git a/qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch b/qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d5329a2c14576a2a4c1b42e169641606e07c01d --- /dev/null +++ b/qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch @@ -0,0 +1,37 @@ +From 48f32788794e061ab0b359fe194c964849bb3040 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:10:42 +0800 +Subject: [PATCH] qemu-pr: fixed ioctl failed for multipath disk + +We use ioctl to detect multipath devices. However, we only set flags in +struct dm_ioctl (the argument to ioctl) and left other fields in random, +which may cause the failure of calling ioctl. Hence, we set other +fields to 0 to avoid the failure. + +Signed-off-by: wangjian161 +Signed-off-by: shaodenghui +--- + scsi/qemu-pr-helper.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index c6c6347e9b..655404fd07 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -285,9 +285,12 @@ static void multipath_pr_init(void) + + static int is_mpath(int fd) + { +- struct dm_ioctl dm = { .flags = DM_NOFLUSH_FLAG }; ++ struct dm_ioctl dm; + struct dm_target_spec *tgt; + ++ memset(&dm, 0, sizeof(struct dm_ioctl)); ++ dm.flags = DM_NOFLUSH_FLAG; ++ + tgt = dm_dev_ioctl(fd, DM_TABLE_STATUS, &dm); + if (!tgt) { + if (errno == ENXIO) { +-- +2.27.0 + diff --git a/qemu.spec b/qemu.spec index 981d9e122bb15216d1d717f42c62cd6ace4810fd..b84e4803a45b4e7d3fc242986c44c3b646dbf7e4 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,570 +1,1036 @@ +# Whether to support Ceph rbd storage backend +%bcond_without rbd + Name: qemu -Version: 4.1.0 -Release: 76 -Epoch: 2 +Version: 8.2.0 +Release: 42 +Epoch: 11 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 URL: http://www.qemu.org -Source0: https://www.qemu.org/download/%{name}-%{version}%{?rcstr}.tar.xz +Source0: https://download.qemu.org/%{name}-%{version}%{?rcstr}.tar.xz Source1: 80-kvm.rules Source2: 99-qemu-guest-agent.rules Source3: bridge.conf +Source4: BinDir.tar.gz + +Patch0001: tests-qemu-iotests-resolved-the-problem-that-the-108.patch +Patch0002: hw-usb-Style-cleanup.patch +Patch0003: virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch +Patch0004: blkio-Respect-memory-alignment-for-bounce-buffer-all.patch +Patch0005: i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch +Patch0006: i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch +Patch0007: i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch +Patch0008: i386-cpuid-Move-leaf-7-to-correct-group.patch +Patch0009: chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch +Patch0010: vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch +Patch0011: vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch +Patch0012: vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch +Patch0013: vfio-pci-Ascend710-change-to-bar2-quirk.patch +Patch0014: hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch +Patch0015: virtio-gpu-remove-needless-condition.patch +Patch0016: target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch +Patch0017: hw-acpi-cpu-Use-CPUState-typedef.patch +Patch0018: hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch +Patch0019: pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch +Patch0020: Revert-file-posix-Remove-unused-s-discard_zeroes.patch +Patch0021: qemu-img-block-set-zero-flags-only-when-discard_zero.patch +Patch0022: qemu-img-add-qemu-img-direct-create.patch +Patch0023: qemu-img-create-cache-paramter-only-use-for-reg-file.patch +Patch0024: hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch +Patch0025: hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch +Patch0026: bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch +Patch0027: log-Add-some-logs-on-VM-runtime-path.patch +Patch0028: util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch +Patch0029: bugfix-fix-some-illegal-memory-access-and-memory-lea.patch +Patch0030: bugfix-fix-possible-memory-leak.patch +Patch0031: scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch +Patch0032: qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch +Patch0033: scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch +Patch0034: scsi-bugfix-fix-division-by-zero.patch +Patch0035: qapi-block-core-Add-retry-option-for-error-action.patch +Patch0036: block-backend-Introduce-retry-timer.patch +Patch0037: block-backend-Add-device-specific-retry-callback.patch +Patch0038: block-backend-Enable-retry-action-on-errors.patch +Patch0039: block-backend-Add-timeout-support-for-retry.patch +Patch0040: block-Add-error-retry-param-setting.patch +Patch0041: virtio_blk-Add-support-for-retry-on-errors.patch +Patch0042: scsi-bus-Refactor-the-code-that-retries-requests.patch +Patch0043: scsi-disk-Add-support-for-retry-on-errors.patch +Patch0044: block-backend-Stop-retrying-when-draining.patch +Patch0045: block-Add-sanity-check-when-setting-retry-parameters.patch +Patch0046: scsi-bus-fix-unmatched-object_unref.patch +Patch0047: scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch +Patch0048: block-mirror-fix-file-system-went-to-read-only-after.patch +Patch0049: block-enable-cache-mode-of-empty-cdrom.patch +Patch0050: block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch +Patch0051: hw-loongarch-virt-Align-high-memory-base-address-wit.patch +Patch0052: target-loongarch-Add-timer-information-dump-support.patch +Patch0053: target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch +Patch0054: target-loongarch-move-translate-modules-to-tcg.patch +Patch0055: linux-headers-Update-to-Linux-v6.7-rc5.patch +Patch0056: linux-headers-Synchronize-linux-headers-from-linux-v.patch +Patch0057: target-loongarch-Define-some-kvm_arch-interfaces.patch +Patch0058: target-loongarch-Supplement-vcpu-env-initial-when-vc.patch +Patch0059: target-loongarch-Implement-kvm-get-set-registers.patch +Patch0060: target-loongarch-Implement-kvm_arch_init-function.patch +Patch0061: target-loongarch-Implement-kvm_arch_init_vcpu.patch +Patch0062: target-loongarch-Implement-kvm_arch_handle_exit.patch +Patch0063: target-loongarch-Restrict-TCG-specific-code.patch +Patch0064: target-loongarch-Implement-set-vcpu-intr-for-kvm.patch +Patch0065: target-loongarch-Add-loongarch-kvm-into-meson-build.patch +Patch0066: hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch +Patch0067: hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch +Patch0068: hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch +Patch0069: hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch +Patch0070: configure-Add-linux-header-compile-support-for-Loong.patch +Patch0071: target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch +Patch0072: target-loongarch-kvm-Enable-LSX-LASX-extension.patch +Patch0073: target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch +Patch0074: loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch +Patch0075: disable-keyring-option.patch +Patch0076: virtio-net-correctly-copy-vnet-header-when-flushing-.patch +Patch0077: ui-clipboard-mark-type-as-not-available-when-there-i.patch +Patch0078: memory-backup-Modify-the-VM-s-physical-bits-value-se.patch +Patch0079: backup-memory-bakcup-hugepages-hugepages-files-maybe.patch +Patch0080: block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch +Patch0081: travis-ci-Rename-SOFTMMU-SYSTEM.patch +Patch0082: iotests-adapt-to-output-change-for-recently-introduc.patch +Patch0083: migration-Skip-only-empty-block-devicesi.patch +Patch0084: vhost-cancel-migration-when-vhost-user-restarted-dur.patch +Patch0085: Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch +Patch0086: ps2-fix-oob-in-ps2-kbd.patch +Patch0087: monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch +Patch0088: oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch +Patch0089: migration-skip-cache_drop-for-bios-bootloader-and-nv.patch +Patch0090: migration-Add-multi-thread-compress-method.patch +Patch0091: migration-Refactoring-multi-thread-compress-migratio.patch +Patch0092: migration-Add-multi-thread-compress-ops.patch +Patch0093: migration-Add-zstd-support-in-multi-thread-compressi.patch +Patch0094: migration-Add-compress_level-sanity-check.patch +Patch0095: doc-Update-multi-thread-compression-doc.patch +Patch0096: cpu-features-fix-bug-for-memory-leakage.patch +Patch0097: migration-report-migration-related-thread-pid-to-lib.patch +Patch0098: migration-report-multiFd-related-thread-pid-to-libvi.patch +Patch0099: virtio-check-descriptor-numbers.patch +Patch0100: virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch +Patch0101: virtio-print-the-guest-virtio_net-features-that-host.patch +Patch0102: virtio-bugfix-check-the-value-of-caches-before-acces.patch +Patch0103: virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch +Patch0104: nbd-server.c-fix-invalid-read-after-client-was-alrea.patch +Patch0105: qemu-nbd-make-native-as-the-default-aio-mode.patch +Patch0106: qemu-nbd-set-timeout-to-qemu-nbd-socket.patch +Patch0107: qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch +Patch0108: pcie-Compat-with-devices-which-do-not-support-Link-W.patch +Patch0109: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch +Patch0110: net-dump.c-Suppress-spurious-compiler-warning.patch +Patch0111: hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch +Patch0112: i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch +Patch0113: vhost-user-Set-the-acked_features-to-vm-s-featrue.patch +Patch0114: vhost-user-Add-support-reconnect-vhost-user-socket.patch +Patch0115: fix-qemu-core-when-vhost-user-net-config-with-server.patch +Patch0116: vhost-user-quit-infinite-loop-while-used-memslots-is.patch +Patch0117: vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch +Patch0118: vhost-user-add-unregister_savevm-when-vhost-user-cle.patch +Patch0119: monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch +Patch0120: virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch +Patch0121: virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch +Patch0122: virtio-net-set-the-max-of-queue-size-to-4096.patch +Patch0123: virtio-net-update-the-default-and-max-of-rx-tx_queue.patch +Patch0124: hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch +Patch0125: vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch +Patch0126: vhost-vdpa-add-migration-log-ops-for-VhostOps.patch +Patch0127: vhost-introduce-bytemap-for-vhost-backend-logging.patch +Patch0128: vhost-add-vhost_dev_suspend-resume_op.patch +Patch0129: vhost-implement-vhost-vdpa-suspend-resume.patch +Patch0130: vhost-implement-vhost_vdpa_device_suspend-resume.patch +Patch0131: vhost-implement-savevm_handler-for-vdpa-device.patch +Patch0132: vhost-implement-post-resume-bh.patch +Patch0133: vhost-implement-migration-state-notifier-for-vdpa-de.patch +Patch0134: vdpa-implement-vdpa-device-migration.patch +Patch0135: vdpa-move-memory-listener-to-the-realize-stage.patch +Patch0136: vdpa-support-vdpa-device-suspend-resume.patch +Patch0137: vdpa-suspend-function-return-0-when-the-vdpa-device-.patch +Patch0138: vdpa-correct-param-passed-in-when-unregister-save.patch +Patch0139: vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch +Patch0140: docs-Add-generic-vhost-vdpa-device-documentation.patch +Patch0141: vdpa-set-vring-enable-only-if-the-vring-address-has-.patch +Patch0142: ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch +Patch0143: net-eepro100-validate-various-address-valuesi-CVE-20.patch +Patch0144: cpu-add-Kunpeng-920-cpu-support.patch +Patch0145: cpu-add-Cortex-A72-processor-kvm-target-support.patch +Patch0146: tests-virt-Allow-changes-to-PPTT-test-table.patch +Patch0147: hw-arm64-add-vcpu-cache-info-support.patch +Patch0148: arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch +Patch0149: tests-virt-Update-expected-ACPI-tables-for-virt-test.patch +Patch0150: pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch +Patch0151: shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch +Patch0152: tests-Disable-filemonitor-testcase.patch +Patch0153: freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch +Patch0154: freeclock-set-rtc_date_diff-for-arm.patch +Patch0155: freeclock-set-rtc_date_diff-for-X86.patch +Patch0156: i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch +Patch0157: bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch +Patch0158: log-Add-log-at-boot-cpu-init-for-aarch64.patch +Patch0159: feature-Add-log-for-each-modules.patch +Patch0160: feature-Add-logs-for-vm-start-and-destroy.patch +Patch0161: pl031-support-rtc-timer-property-for-pl031.patch +Patch0162: arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch +Patch0163: linux-headers-update-against-5.10-and-manual-clear-v.patch +Patch0164: vfio-Maintain-DMA-mapping-range-for-the-container.patch +Patch0165: vfio-migration-Add-support-for-manual-clear-vfio-dir.patch +Patch0166: arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch +Patch0167: cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch +Patch0168: hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch +Patch0169: arm-virt-target-arm-Machine-init-time-change-common-.patch +Patch0170: accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch +Patch0171: arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch +Patch0172: arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch +Patch0173: arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch +Patch0174: hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch +Patch0175: arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch +Patch0176: hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch +Patch0177: hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch +Patch0178: hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch +Patch0179: arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch +Patch0180: arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch +Patch0181: hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch +Patch0182: arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch +Patch0183: acpi-cpu-Add-cpu_cppc-building-support.patch +Patch0184: tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch +Patch0185: arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch +Patch0186: arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch +Patch0187: hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch +Patch0188: hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch +Patch0189: hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch +Patch0190: hw-acpi-Make-_MAT-method-optional.patch +Patch0191: arm-virt-Release-objects-for-disabled-possible-vCPUs.patch +Patch0192: hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch +Patch0193: arm-virt-Add-update-basic-hot-un-plug-framework.patch +Patch0194: arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch +Patch0195: hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch +Patch0196: hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch +Patch0197: arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch +Patch0198: hw-arm-Changes-required-for-reset-and-to-support-nex.patch +Patch0199: physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch +Patch0200: target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch +Patch0201: target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch +Patch0202: target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch +Patch0203: hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch +Patch0204: tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch +Patch0205: hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch +Patch0206: system-physmem-Fix-possible-double-free-when-destroy.patch +Patch0207: arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch +Patch0208: acpi-cpu-Fix-cpu_hotplug_hw_init.patch +Patch0209: system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch +Patch0210: system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch +Patch0211: arm-virt.c-Convey-local_err-when-set-psci-conduit.patch +Patch0212: arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch +Patch0213: accel-kvm-Use-correct-id-for-parked-vcpu.patch +Patch0214: arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch +Patch0215: intc-gicv3-Fixes-for-vcpu-hotplug.patch +Patch0216: acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch +Patch0217: acpi-ged-Remove-cpuhp-field-of-ged.patch +Patch0218: arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch +Patch0219: arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch +Patch0220: arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch +Patch0221: tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch +Patch0222: coro-support-live-patch-for-libcare.patch +Patch0223: arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch +Patch0224: arm-virt-Use-max_cpus-to-calculate-redist1_count.patch +Patch0225: include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch +Patch0226: tests-bios-tables-test-Rename-smbios-type-4-related-.patch +Patch0227: hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch +Patch0228: hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch +Patch0229: block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch +Patch0230: hw-nvme-fix-Werror-maybe-uninitialized.patch +Patch0231: hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch +Patch0232: hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch +Patch0233: hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch +Patch0234: hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch +Patch0235: hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch +Patch0236: hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch +Patch0237: acpi-cpu-Fix-detection-of-present-cpu.patch +Patch0238: arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch +Patch0239: kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch +Patch0240: arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch +Patch0241: kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch +Patch0242: hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch +Patch0243: target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch +Patch0244: target-i386-Add-new-CPU-model-SierraForest.patch +Patch0245: target-i386-Export-RFDS-bit-to-guests.patch +Patch0246: target-loongarch-Fix-qemu-system-loongarch64-assert-.patch +Patch0247: target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch +Patch0248: target-loongarch-Fix-tlb-huge-page-loading-issue.patch +Patch0249: target-loongarch-kvm-Add-software-breakpoint-support.patch +Patch0250: target-loongarch-kvm-sync-kernel-header-files.patch +Patch0251: hw-intc-loongarch_extioi-Add-virt-extension-support.patch +Patch0252: target-loongarch-kvm-Add-pmu-support.patch +Patch0253: target-loongarch-kvm-Fix-vm-restore-failed.patch +Patch0254: target-loongarch-kvm-Add-pv-steal-time-support.patch +Patch0255: target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch +Patch0256: ppc-pnv-I2C-controller-is-not-user-creatablei.patch +Patch0257: arm-virt-Set-vcpus_count-of-CPU-as-1-to-compatible-w.patch +Patch0258: hw-ufs-Fix-buffer-overflow-bug.patch +Patch0259: ui-gtk-Fix-mouse-motion-event-scaling-issue-with-GTK.patch +Patch0260: target-i386-Add-Hygon-Dhyana-v3-CPU-model.patch +Patch0261: target-i386-Add-new-Hygon-Dharma-CPU-model.patch +Patch0262: target-riscv-cpu.c-fix-Zvkb-extension-config.patch +Patch0263: target-hexagon-idef-parser-fix-leak-of-init_list.patch +Patch0264: migration-dirtyrate-Fix-segmentation-fault.patch +Patch0265: qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO-CVE-202.patch +Patch0266: iotests-244-Don-t-store-data-file-with-protocol-in-i.patch +Patch0267: iotests-270-Don-t-store-data-file-with-json-prefix-i.patch +Patch0268: block-Parse-filenames-only-when-explicitly-requested.patch +Patch0269: nbd-Minor-style-and-typo-fixes.patch +Patch0270: nbd-server-Plumb-in-new-args-to-nbd_client_add.patch +Patch0271: nbd-server-CVE-2024-7409-Cap-default-max-connections.patch +Patch0272: nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch +Patch0273: nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch +Patch0274: physmem-Bail-out-qemu_ram_block_from_host-for-invali.patch +Patch0275: hvf-arm-Do-not-advance-PC-when-raising-an-exception.patch +Patch0276: hw-nvme-fix-memory-leak-in-nvme_dsm.patch +Patch0277: hw-nvme-fix-number-of-PIDs-for-FDP-RUH-update.patch +Patch0278: aspeed-smc-Fix-possible-integer-overflow.patch +Patch0279: hw-display-bcm2835_fb-fix-fb_use_offsets-condition.patch +Patch0280: vl-fix-type-is-NULL-in-vga-help.patch +Patch0281: ppc-vof-Fix-unaligned-FDT-property-access.patch +Patch0282: crypto-Introduce-SM4-symmetric-cipher-algorithm.patch +Patch0283: target-sparc-use-signed-denominator-in-sdiv-helper.patch +Patch0284: Add-support-for-the-virtcca-cvm-feature.patch +Patch0285: target-i386-add-support-for-LAM-in-CPUID-enumeration.patch +Patch0286: target-i386-add-control-bits-support-for-LAM.patch +Patch0287: cvm-bug-fix-for-incorrect-device-name-check-for-vhos.patch +Patch0288: cvm-bug-fix-for-undefined-reference-to-virtcca_cvm_a.patch +Patch0289: hw-misc-support-vpsp.patch +Patch0290: hw-core-ptimer-fix-timer-zero-period-condition-for-f.patch +Patch0291: vvfat-Fix-bug-in-writing-to-middle-of-file.patch +Patch0292: virtio-net-Use-virtual-time-for-RSC-timers.patch +Patch0293: crypto-Introduce-SM3-hash-hmac-pbkdf-algorithm.patch +Patch0294: cvm-Implement-command-blacklist-for-cvm-security-enh.patch +Patch0295: hw-display-vhost-user-gpu.c-fix-vhost_user_gpu_chr_r.patch +Patch0296: hw-nvme-fix-leak-of-uninitialized-memory-in-io_mgmt_.patch +Patch0297: crypto-tlscredspsk-Free-username-on-finalize.patch +Patch0298: doc-update-AMD-SEV-to-include-Live-migration-flow.patch +Patch0299: migration.json-add-AMD-SEV-specific-migration-parame.patch +Patch0300: confidential-guest-support-introduce-ConfidentialGue.patch +Patch0301: target-i386-sev-provide-callback-to-setup-outgoing-c.patch +Patch0302: target-i386-sev-do-not-create-launch-context-for-an-.patch +Patch0303: target-i386-sev-add-support-to-encrypt-the-outgoing-.patch +Patch0304: target-i386-sev-add-support-to-load-incoming-encrypt.patch +Patch0305: kvm-Add-support-for-SEV-shared-regions-list-and-KVM_.patch +Patch0306: migration-add-support-to-migrate-shared-regions-list.patch +Patch0307: migration-ram-add-support-to-send-encrypted-pages.patch +Patch0308: migration-ram-Force-encrypted-status-for-flash0-flas.patch +Patch0309: kvm-Add-support-for-userspace-MSR-filtering-and-hand.patch +Patch0310: target-i386-sev-Return-0-if-sev_send_get_packet_len-.patch +Patch0311: migration-ram-Force-encrypted-status-for-VGA-vram.patch +Patch0312: target-i386-sev-Clear-shared_regions_list-when-reboo.patch +Patch0313: migration-ram-Fix-calculation-of-gfn-correpond-to-a-.patch +Patch0314: target-i386-Introduce-header-file-csv.h.patch +Patch0315: target-i386-csv-Read-cert-chain-from-file-when-prepa.patch +Patch0316: target-i386-csv-add-support-to-queue-the-outgoing-pa.patch +Patch0317: target-i386-csv-add-support-to-encrypt-the-outgoing-.patch +Patch0318: target-i386-csv-add-support-to-queue-the-incoming-pa.patch +Patch0319: target-i386-csv-add-support-to-load-incoming-encrypt.patch +Patch0320: migration-ram-Accelerate-the-transmission-of-CSV-gue.patch +Patch0321: migration-ram-Accelerate-the-loading-of-CSV-guest-s-.patch +Patch0322: target-i386-csv-Add-support-for-migrate-VMSA-for-CSV.patch +Patch0323: target-i386-get-set-migrate-GHCB-state.patch +Patch0324: target-i386-kvm-Fix-the-resettable-info-when-emulate.patch +Patch0325: kvm-Add-support-for-CSV2-reboot.patch +Patch0326: update-docs-tools-virtfs-proxy-helper.rst.patch +Patch0327: update-io-trace-events.patch +Patch0328: nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch +Patch0329: virtio-net-Ensure-queue-index-fits-with-RSS-CVE-2024.patch +Patch0330: target-i386-Introduce-SapphireRapids-v3-to-add-missi.patch +Patch0331: hw-misc-bcm2835_property-Fix-handling-of-FRAMEBUFFER.patch +Patch0332: target-arm-Disable-SVE-extensions-when-SVE-is-disabl.patch +Patch0333: virtio-pci-fix-use-of-a-released-vector.patch +Patch0334: target-loongarch-fix-a-wrong-print-in-cpu-dump.patch +Patch0335: backends-cryptodev-builtin-Fix-local_error-leaks.patch +Patch0336: char-stdio-Restore-blocking-mode-of-stdout-on-exit.patch +Patch0337: target-i386-no-single-step-exception-after-MOV-or-PO.patch +Patch0338: migration-colo-Fix-bdrv_graph_rdlock_main_loop-Asser.patch +Patch0339: load_elf-fix-iterator-s-type-for-elf-file-processing.patch +Patch0340: hw-loongarch-Fix-fdt-memory-node-wrong-reg.patch +Patch0341: hw-loongarch-virt-Fix-FDT-memory-node-address-width.patch +Patch0342: system-physmem-Propagate-AddressSpace-to-MapClient-h.patch +Patch0343: system-physmem-Per-AddressSpace-bounce-buffering.patch +Patch0344: softmmu-Support-concurrent-bounce-buffers-CVE-2024-8.patch +Patch0345: mac_dbdma-Remove-leftover-dma_memory_unmap-calls-CVE.patch +Patch0346: crypto-avoid-leak-of-ctx-when-bad-cipher-mode-is-giv.patch +Patch0347: hw-ufs-add-basic-info-of-query-response-upiu.patch +Patch0348: hw-block-fix-uint32-overflow.patch +Patch0349: Added-CoDA-feature-support-in-the-context-of-CVM.-Wh.patch +Patch0350: virtio-snd-add-max-size-bounds-check-in-input-cb-CVE.patch +Patch0351: target-ppc-Fix-lxvx-stxvx-facility-check.patch +Patch0352: target-ppc-Fix-lxv-stxv-MSR-facility-check.patch +Patch0353: virtio-net-drop-too-short-packets-early.patch +Patch0354: target-i386-fix-size-of-EBP-writeback-in-gen_enter.patch +Patch0355: ui-gtk-Draw-guest-frame-at-refresh-cycle.patch +Patch0356: stdvga-fix-screen-blanking.patch +Patch0357: hw-intc-riscv_aplic-APLICs-should-add-child-earlier-.patch +Patch0358: hw-loongarch-virt-Fix-memory-leak.patch +Patch0359: hw-remote-vfio-user-Fix-config-space-access-byte-ord.patch +Patch0360: block-fix-Werror-maybe-uninitialized-false-positive.patch +Patch0361: virtio-remove-virtio_tswap16s-call-in-vring_packed_e.patch +Patch0362: target-riscv-kvm-tolerate-KVM-disable-ext-errors.patch +Patch0363: ui-sdl2-set-swap-interval-explicitly-when-OpenGL-is-.patch +Patch0364: hw-intc-arm_gic-fix-spurious-level-triggered-interru.patch +Patch0365: hw-audio-virtio-sound-fix-heap-buffer-overflow.patch +Patch0366: crypto-run-qcrypto_pbkdf2_count_iters-in-a-new-threa.patch +Patch0367: softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch +Patch0368: tests-bump-QOS_PATH_MAX_ELEMENT_SIZE-again.patch +Patch0369: enable-virtio-device-mmio-access-and-wait-util-virti.patch +Patch0370: vdpa-fix-vdpa-device-migrate-rollback-wrong-when-sus.patch +Patch0371: vdpa-support-resizing-virtio-blk-capacity-online-for.patch +Patch0372: Revert-vdpa-add-vhost_vdpa_suspend.patch +Patch0373: Revert-vdpa-add-vhost_vdpa-suspended-parameter.patch +Patch0374: Revert-vdpa-block-migration-if-SVQ-does-not-admit-a-.patch +Patch0375: vdpa-remove-memory-listener-unregister-in-vhost_vdpa.patch +Patch0376: hw-gpio-aspeed_gpio-Avoid-shift-into-sign-bit.patch +Patch0377: crypto-use-consistent-error-reporting-pattern-for-un.patch +Patch0378: crypto-drop-gnutls-debug-logging-support.patch +Patch0379: crypto-factor-out-conversion-of-QAPI-to-gcrypt-const.patch +Patch0380: Consider-discard-option-when-writing-zeros.patch +Patch0381: util-userfaultfd-Remove-unused-uffd_poll_events.patch +Patch0382: tests-avocado-fix-typo-in-replay_linux.patch +Patch0383: hw-net-can-sja1000-fix-bug-for-single-acceptance-fil.patch +Patch0384: platform-bus-fix-refcount-leak.patch +Patch0385: edu-fix-DMA-range-upper-bound-check.patch +Patch0386: dma-Fix-function-names-in-documentation.patch +Patch0387: audio-pw-Report-more-accurate-error-when-connecting-.patch +Patch0388: audio-pw-Report-more-accurate-error-when-connecting--new.patch +Patch0389: docs-tools-qemu-img.rst-fix-typo-sumarizes.patch +Patch0390: hw-pci-bridge-Add-a-Kconfig-switch-for-the-normal-PC.patch +Patch0391: hw-intc-openpic-Improve-errors-for-out-of-bounds-pro.patch +Patch0392: acpi-ged-Add-macro-for-acpi-sleep-control-register.patch +Patch0393: tests-Wait-for-migration-completion-on-destination-Q.patch +Patch0394: hw-loongarch-virt-Remove-unnecessary-cpu.h-inclusion.patch +Patch0395: raw-format-Fix-error-message-for-invalid-offset-size.patch +Patch0396: linux-user-Clean-up-unused-header.patch +Patch0397: target-riscv-csr.c-Fix-an-access-to-VXSAT.patch +Patch0398: Fix-calculation-of-minimum-in-colo_compare_tcp.patch +Patch0399: virtio-net-Avoid-indirection_table_mask-overflow.patch +Patch0400: intel_iommu-Send-IQE-event-when-setting-reserved-bit.patch +Patch0401: hw-cxl-Ensure-there-is-enough-data-to-read-the-input.patch +Patch0402: virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch +Patch0403: block-blkio-use-FUA-flag-on-write-zeroes-only-if-sup.patch +Patch0404: docs-sphinx-depfile.py-Handle-env.doc2path-returning.patch +Patch0405: target-arm-Fix-SVE-SDOT-UDOT-USDOT-4-way-indexed.patch +Patch0406: tests-docker-update-debian-i686-and-mipsel-images-to.patch +Patch0407: module-Prevent-crash-by-resetting-local_err-in-modul.patch +Patch0408: target-arm-Clear-high-SVE-elements-in-handle_vec_sim.patch +Patch0409: target-ppc-Fix-migration-of-CPUs-with-TLB_EMB-TLB-ty.patch +Patch0410: target-hppa-Fix-PSW-V-bit-packaging-in-cpu_hppa_get-.patch +Patch0411: ppc-xive-Fix-ESB-length-overflow-on-32-bit-hosts.patch +Patch0412: hw-vfio-add-device-hct-based-on-vfio.patch +Patch0413: hw-vfio-hct-update-support-ccp-count-to-48.patch +Patch0414: hw-vfio-hct-fix-ccp_index-error-caused-by-uninitiali.patch +Patch0415: hw-vfio-hct-qemu-startup-terminate-once-error-happen.patch +Patch0416: s390x-sclp-Simplify-get_sclp_device.patch +Patch0417: ui-remove-break-after-g_assert_not_reached.patch +Patch0418: ui-console-vc-Silence-warning-about-sprintf-on-OpenB.patch +Patch0419: meson.build-Remove-ncurses-workaround-for-OpenBSD.patch +Patch0420: hw-audio-hda-free-timer-on-exit.patch +Patch0421: hw-pci-Add-parenthesis-to-PCI_BUILD_BDF-macro.patch +Patch0422: hw-cxl-Ensure-there-is-enough-data-for-the-header-in.patch +Patch0423: target-i386-sev-Fix-incompatibility-between-SEV-and-.patch +Patch0424: target-i386-sev-Add-support-for-reuse-ASID-for-diffe.patch +Patch0425: Add-virtCCA-Coda-annotation.patch +Patch0426: cvm-Add-support-for-TEE-based-national-encryption-ac.patch +Patch0427: hw-arm-virt-Keep-Guest-L1-cache-type-consistent-with.patch +Patch0428: target-i386-add-guest-phys-bits-cpu-property.patch +Patch0429: kvm-add-support-for-guest-physical-bits.patch +Patch0430: hw-i386-add-mem2-option-for-qemu.patch +Patch0431: hw-misc-support-tkm-use-mem2-memory.patch +Patch0432: hw-misc-psp-Pin-the-hugepage-memory-specified-by-mem.patch +Patch0433: 9pfs-fix-crash-on-Treaddir-request.patch +Patch0434: hw-nvme-fix-handling-of-over-committed-queues.patch +Patch0435: exec-memop-Remove-unused-memop_big_endian-helper.patch +Patch0436: qemu-bswap-Undefine-CPU_CONVERT-once-done.patch +Patch0437: next-kbd-convert-to-use-qemu_input_handler_register.patch +Patch0438: target-i386-csv-Add-CSV3-context.patch +Patch0439: target-i386-csv-Add-command-to-initialize-CSV3-conte.patch +Patch0440: target-i386-csv-Add-command-to-load-data-to-CSV3-gue.patch +Patch0441: target-i386-csv-Add-command-to-load-vmcb-to-CSV3-gue.patch +Patch0442: target-i386-cpu-Populate-CPUID-0x8000_001F-when-CSV3.patch +Patch0443: target-i386-csv-Do-not-register-unregister-guest-sec.patch +Patch0444: target-i386-csv-Load-initial-image-to-private-memory.patch +Patch0445: vga-Force-full-update-for-CSV3-guest.patch +Patch0446: vfio-Only-map-shared-region-for-CSV3-virtual-machine.patch +Patch0447: linux-headers-update-kernel-headers-to-include-CSV3-.patch +Patch0448: target-i386-csv-Add-support-to-migrate-the-outgoing-.patch +Patch0449: target-i386-csv-Add-support-to-migrate-the-incoming-.patch +Patch0450: target-i386-csv-Add-support-to-migrate-the-outgoing--new.patch +Patch0451: target-i386-csv-Add-support-to-migrate-the-incoming--new.patch +Patch0452: hw-arm-mps2-tz.c-fix-RX-TX-interrupts-order.patch +Patch0453: hw-i386-amd_iommu-Don-t-leak-memory-in-amdvi_update_.patch +Patch0454: hw-ppc-e500-Add-missing-device-tree-properties-to-i2.patch +Patch0455: hw-ppc-e500-Remove-unused-irqs-parameter.patch +Patch0456: sphinx-qapidoc-Fix-to-generate-doc-for-explicit-unbo.patch +Patch0457: hw-ppc-e500-Prefer-QOM-cast.patch +Patch0458: target-arm-Fix-FJCVTZS-vs-flush-to-zero.patch +Patch0459: ui-vnc-don-t-return-an-empty-SASL-mechlist-to-the-cl.patch +Patch0460: migration-Fix-file-migration-with-fdset.patch +Patch0461: tcg-loongarch64-Fix-tcg_out_movi-vs-some-pcrel-point.patch +Patch0462: accel-tcg-Fix-typo-causing-tb-page_addr-1-to-not-be-.patch +Patch0463: target-riscv-Fix-the-element-agnostic-function-probl.patch +Patch0464: qio-Inherit-follow_coroutine_ctx-across-TLS.patch +Patch0465: hw-intc-arm_gic-Fix-handling-of-NS-view-of-GICC_APR-.patch +Patch0466: hvf-arm-Fix-encodings-for-ID_AA64PFR1_EL1-and-debug-.patch +Patch0467: qemu-options-Fix-CXL-Fixed-Memory-Window-interleave-.patch +Patch0468: target-m68k-Map-FPU-exceptions-to-FPSR-register.patch +Patch0469: migration-fix-possible-int-overflow.patch +Patch0470: tcg-Allow-top-bit-of-SIMD_DATA_BITS-to-be-set-in-sim.patch +Patch0471: vdpa-dev-Fix-initialisation-order-to-restore-VDUSE-c.patch +Patch0472: hw-loongarch-Move-boot-functions-to-boot.c.patch +Patch0473: hw-loongarch-Add-load-initrd.patch +Patch0474: hw-loongarch-Add-slave-cpu-boot_code.patch +Patch0475: hw-loongarch-Add-init_cmdline.patch +Patch0476: hw-loongarch-Init-efi_system_table.patch +Patch0477: hw-loongarch-Init-efi_boot_memmap-table.patch +Patch0478: hw-loongarch-Init-efi_initrd-table.patch +Patch0479: hw-loongarch-Init-efi_fdt-table.patch +Patch0480: hw-loongarch-fdt-adds-cpu-interrupt-controller-node.patch +Patch0481: hw-loongarch-fdt-adds-Extend-I-O-Interrupt-Controlle.patch +Patch0482: hw-loongarch-fdt-adds-pch_pic-Controller.patch +Patch0483: hw-loongarch-fdt-adds-pch_msi-Controller.patch +Patch0484: hw-loongarch-fdt-adds-pcie-irq_map-node.patch +Patch0485: hw-loongarch-fdt-remove-unused-irqchip-node.patch +Patch0486: hw-loongarch-Add-cells-missing-from-uart-node.patch +Patch0487: hw-loongarch-Add-cells-missing-from-rtc-node.patch +Patch0488: loongarch-switch-boards-to-default-y.patch +Patch0489: hw-loongarch-move-memory-map-to-boot.c.patch +Patch0490: hw-loongarch-Rename-LOONGARCH_MACHINE-with-LOONGARCH.patch +Patch0491: hw-loongarch-Rename-LoongArchMachineState-with-Loong.patch +Patch0492: hw-loongarch-Refine-default-numa-id-calculation.patch +Patch0493: hw-loongarch-Add-VM-mode-in-IOCSR-feature-register-i.patch +Patch0494: hw-loongarch-Refine-acpi-srat-table-for-numa-memory.patch +Patch0495: hw-loongarch-Refine-fadt-memory-table-for-numa-memor.patch +Patch0496: hw-loongarch-Refine-fwcfg-memory-map.patch +Patch0497: hw-loongarch-Refine-system-dram-memory-region.patch +Patch0498: hw-loongarch-Remove-minimum-and-default-memory-size.patch +Patch0499: tests-libqos-Add-loongarch-virt-machine-node.patch +Patch0500: hw-loongarch-virt-Use-MemTxAttrs-interface-for-misc-.patch +Patch0501: hw-loongarch-boot.c-fix-out-of-bound-reading.patch +Patch0502: hw-loongarch-Change-the-tpm-support-by-default.patch +Patch0503: hw-loongarch-virt-Remove-unused-assignment.patch +Patch0504: hw-loongarch-Fix-length-for-lowram-in-ACPI-SRAT.patch +Patch0505: hw-loongarch-Remove-default-enable-with-VIRTIO_VGA-d.patch +Patch0506: hw-loongarch-virt-support-up-to-4-serial-ports.patch +Patch0507: hw-loongarch-virt-pass-random-seed-to-fdt.patch +Patch0508: hw-loongarch-Add-acpi-SPCR-table-support.patch +Patch0509: hw-loongarch-virt-Add-description-for-virt-machine-t.patch +Patch0510: hw-loongarch-virt-Add-FDT-table-support-with-acpi-ge.patch +Patch0511: hw-arm-virt-acpi-build.c-Migrate-SPCR-creation-to-co.patch +Patch0512: target-loongarch-Add-TCG-macro-in-structure-CPUArchS.patch +Patch0513: target-loongarch-Put-cpucfg-operation-before-CSR-reg.patch +Patch0514: target-loongarch-kvm-Fix-VM-recovery-from-disk-failu.patch +Patch0515: target-loongarch-Add-loongarch-vector-property-uncon.patch +Patch0516: target-loongarch-kvm-Add-software-breakpoint-support-sync-upstream.patch +Patch0517: target-loongarch-Remove-avail_64-in-trans_srai_w-and.patch +Patch0518: target-loongarch-Set-CSR_PRCFG1-and-CSR_PRCFG2-value.patch +Patch0519: target-loongarch-Fix-cpu_reset-set-wrong-CSR_CRMD.patch +Patch0520: target-loongarch-Add-compatible-support-about-VM-reb.patch +Patch0521: target-loongarch-kvm-Add-vCPU-reset-function.patch +Patch0522: target-loongarch-Support-QMP-dump-guest-memory.patch +Patch0523: target-loongarch-fix-Werror-maybe-uninitialized-fals.patch +Patch0524: target-loongarch-Use-explicit-little-endian-LD-ST-AP.patch +Patch0525: target-loongarch-Avoid-bits-shift-exceeding-width-of.patch +Patch0526: sync-loongarch-linux-headers.patch +Patch0527: target-loongarch-Add-loongson-binary-translation-fea.patch +Patch0528: target-loongarch-Implement-lbt-registers-save-restor.patch +Patch0529: target-loongarch-kvm-Implement-LoongArch-PMU-extensi.patch +Patch0530: linux-headers-loongarch-Add-kvm_para.h-and-unistd_64.patch +Patch0531: target-loongarch-Add-steal-time-support-on-migration.patch +Patch0532: accel-kvm-Extract-common-KVM-vCPU-creation-parking-c-sync-upstream.patch +Patch0533: hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c-sync-upstream.patch +Patch0534: hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho-sync-upstream.patch +Patch0535: hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change-sync-upstream.patch +Patch0536: physmem-Add-helper-function-to-destroy-CPU-AddressSp.patch +Patch0537: gdbstub-Add-helper-function-to-unregister-GDB-regist.patch +Patch0538: accel-kvm-kvm-all-Fixes-the-missing-break-in-vCPU-un.patch +Patch0539: hw-loongarch-virt-Add-CPU-topology-support.patch +Patch0540: hw-loongarch-virt-Add-basic-CPU-plug-support.patch +Patch0541: hw-loongarch-virt-Update-the-ACPI-table-for-hotplug-.patch +Patch0542: hw-loongarch-Add-KVM-IPI-device-support.patch +Patch0543: hw-loongarch-Add-KVM-extioi-device-support.patch +Patch0544: hw-loongarch-Add-KVM-pch-pic-device-support.patch +Patch0545: hw-loongarch-Add-KVM-pch-msi-device-support.patch +Patch0546: hw-loongarch-clean-code.patch +Patch0547: hw-loongarch-boot-Use-warn_report-when-no-kernel-fil.patch +Patch0548: hw-loongarch-fix-cpu-hotplug-reset.patch +Patch0549: fix-compile-error-on-loongarch.patch +Patch0550: Reserve-address-for-MSI-mapping-in-the-CVM-scenario.patch +Patch0551: linux-user-Honor-elf-alignment-when-placing-images.patch +Patch0552: accel-tcg-Fix-user-only-probe_access_internal-plugin.patch +Patch0553: linux-user-Tolerate-CONFIG_LSM_MMAP_MIN_ADDR.patch +Patch0554: acpi-tests-avocado-bits-wait-for-200-seconds-for-SHU.patch +Patch0555: audio-audio.c-remove-trailing-newline-in-error_setg.patch +Patch0556: Avoid-unaligned-fetch-in-ladr_match.patch +Patch0557: cpu-ensure-we-don-t-call-start_exclusive-from-cpu_ex.patch +Patch0558: target-i386-Fix-minor-typo-in-NO_NESTED_DATA_BP-feat.patch +Patch0559: hw-misc-mos6522-Fix-bad-class-definition-of-the-MOS6.patch +Patch0560: usb-hub-Fix-handling-port-power-control-messages.patch +Patch0561: target-riscv-Fix-vcompress-with-rvv_ta_all_1s.patch +Patch0562: hw-audio-virtio-snd-Always-use-little-endian-audio-f.patch +Patch0563: target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch +Patch0564: hw-timer-exynos4210_mct-fix-possible-int-overflow.patch +Patch0565: target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch +Patch0566: target-arm-Fix-A64-scalar-SQSHRN-and-SQRSHRN.patch +Patch0567: linux-user-Print-tid-not-pid-with-strace.patch +Patch0568: target-arm-Don-t-get-MDCR_EL2-in-pmu_counter_enabled.patch +Patch0569: target-arm-fix-exception-syndrome-for-AArch32-bkpt-i.patch +Patch0570: target-arm-Fix-incorrect-aa64_tidcp1-feature-check.patch +Patch0571: crypto-perform-runtime-check-for-hash-hmac-support-i.patch +Patch0572: hw-audio-hda-fix-memory-leak-on-audio-setup.patch +Patch0573: contrib-plugins-add-compat-for-g_memdup2.patch +Patch0574: target-i386-fix-hang-when-using-slow-path-for-ptw_se.patch +Patch0575: migration-Ensure-vmstate_save-sets-errp.patch +Patch0576: target-arm-Drop-user-only-special-case-in-sve_stN_r.patch +Patch0577: hw-intc-Don-t-clear-pending-bits-on-IRQ-lowering.patch +Patch0578: hw-pci-Remove-unused-pci_irq_pulse-method.patch +Patch0579: Change-vmstate_cpuhp_sts-vmstateDescription-version_.patch +Patch0580: crypto-fix-error-check-on-gcry_md_open.patch +Patch0581: target-arm-Fix-nregs-computation-in-do_-ld-st-_zpa.patch +Patch0582: target-arm-Fix-SVE-SME-gross-MTE-suppression-checks.patch +Patch0583: target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch +Patch0584: target-arm-Fix-VCMLA-Dd-Dn-Dm-idx.patch +Patch0585: Avoid-taking-address-of-out-of-bounds-array-index.patch +Patch0586: hw-misc-nrf51_rng-Don-t-use-BIT_MASK-when-we-mean-BI.patch +Patch0587: hvf-remove-unused-but-set-variable.patch +Patch0588: target-riscv-Avoid-bad-shift-in-riscv_cpu_do_interru.patch +Patch0589: target-arm-LDAPR-should-honour-SCTLR_ELx.nAA.patch +Patch0590: target-i386-cpu-Fix-notes-for-CPU-models.patch +Patch0591: target-arm-Reinstate-vfp-property-on-AArch32-CPUs.patch +Patch0592: target-arm-take-HSTR-traps-of-cp15-accesses-to-EL2-n.patch +Patch0593: target-arm-Use-float_status-copy-in-sme_fmopa_s.patch +Patch0594: Add-if-condition-to-avoid-assertion-failed-error-in-.patch +Patch0595: virtio-net-Fix-network-stall-at-the-host-side-waitin.patch +Patch0596: target-hexagon-don-t-look-for-static-glib.patch +Patch0597: target-riscv-vector_helper.c-set-vstart-0-in-GEN_VEX.patch +Patch0598: target-riscv-vector_helper.c-optimize-loops-in-ldst-.patch +Patch0599: target-riscv-vector_helper.c-fix-vmvr_v-memcpy-endia.patch +Patch0600: hw-usb-hcd-ehci-Fix-debug-printf-format-string.patch +Patch0601: backends-cryptodev-vhost-user-Fix-local_error-leaks.patch +Patch0602: parallels-fix-ext_off-assertion-failure-due-to-overf.patch +Patch0603: bakcend-VirtCCA-resolve-hugepage-memory-waste-issue-.patch +Patch0604: qapi-qom-target-i386-csv-guest-Introduce-secret-head.patch +Patch0605: target-i386-kvm-Support-to-get-and-enable-extensions.patch +Patch0606: target-i386-csv-Request-to-set-private-memory-of-CSV.patch +Patch0607: target-i386-csv-Support-load-kernel-hashes-for-CSV3-.patch +Patch0608: target-i386-csv-Support-inject-secret-for-CSV3-guest.patch +Patch0609: arm-VirtCCA-CVM-support-UEFI-boot.patch +Patch0610: arm-VirtCCA-qemu-uefi-boot-support-kae.patch +Patch0611: arm-VirtCCA-Compatibility-with-older-versions-of-TMM.patch +Patch0612: arm-VirtCCA-qemu-CoDA-support-UEFI-boot.patch +Patch0613: BUGFIX-Enforce-isolation-for-virtcca_shared_hugepage.patch +Patch0614: backends-VirtCCA-cvm_gpa_start-supports-both-1GB-and.patch +Patch0615: qga-Add-log-to-guest-fsfreeze-thaw-command.patch +Patch0616: qga-Don-t-daemonize-before-channel-is-initialized.patch +Patch0617: virtcca-add-kvm-isolation-when-get-tmi-version.patch +Patch0618: backends-cryptodev-Do-not-abort-for-invalid-session-.patch +Patch0619: backends-cryptodev-Do-not-ignore-throttle-backends-E.patch +Patch0620: hw-nvme-fix-invalid-check-on-mcl.patch +Patch0621: hw-nvme-fix-invalid-endian-conversion.patch +Patch0622: net-fix-build-when-libbpf-is-disabled-but-libxdp-is-.patch +Patch0623: target-i386-Add-more-features-enumerated-by-CPUID.7..patch +Patch0624: target-i386-fix-feature-dependency-for-WAITPKG.patch +Patch0625: target-i386-add-support-for-FRED-in-CPUID-enumeratio.patch +Patch0626: target-i386-mark-CR4.FRED-not-reserved.patch +Patch0627: vmxcap-add-support-for-VMX-FRED-controls.patch +Patch0628: target-i386-enumerate-VMX-nested-exception-support.patch +Patch0629: target-i386-Add-get-set-migrate-support-for-FRED-MSR.patch +Patch0630: target-i386-Delete-duplicated-macro-definition-CR4_F.patch +Patch0631: target-i386-Add-VMX-control-bits-for-nested-FRED-sup.patch +Patch0632: target-i386-Raise-the-highest-index-value-used-for-a.patch +Patch0633: target-i386-pass-X86CPU-to-x86_cpu_get_supported_fea.patch +Patch0634: i386-cpuid-Remove-subleaf-constraint-on-CPUID-leaf-1.patch +Patch0635: target-i386-Don-t-construct-a-all-zero-entry-for-CPU.patch +Patch0636: target-i386-Enable-fdp-excptn-only-and-zero-fcs-fds.patch +Patch0637: target-i386-Construct-CPUID-2-as-stateful-iff-times-.patch +Patch0638: target-i386-Make-invtsc-migratable-when-user-sets-ts.patch +Patch0639: hw-pci-host-designware-Fix-ATU_UPPER_TARGET-register.patch +Patch0640: hw-ufs-free-irq-on-exit.patch +Patch0641: hw-sd-sdhci-free-irq-on-exit.patch +Patch0642: target-s390x-Fix-a-typo-in-s390_cpu_class_init.patch +Patch0643: hw-misc-aspeed_hace-Fix-buffer-overflow-in-has_paddi.patch +Patch0644: hw-xen-Fix-xen_bus_realize-error-handling.patch +Patch0645: cryptodev-Fix-error-handling-in-cryptodev_lkcf_execu.patch +Patch0646: vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch +Patch0647: vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch +Patch0648: vfio-container-Switch-to-dma_map-unmap-API.patch +Patch0649: vfio-common-Introduce-vfio_container_init-destroy-he.patch +Patch0650: vfio-common-Move-giommu_list-in-base-container.patch +Patch0651: vfio-container-Move-space-field-to-base-container.patch +Patch0652: vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch +Patch0653: vfio-container-Move-per-container-device-list-in-bas.patch +Patch0654: vfio-container-Convert-functions-to-base-container.patch +Patch0655: vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch +Patch0656: vfio-container-Move-vrdl_list-to-base-container.patch +Patch0657: vfio-container-Move-listener-to-base-container.patch +Patch0658: vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch +Patch0659: vfio-container-Move-iova_ranges-to-base-container.patch +Patch0660: vfio-container-Implement-attach-detach_device.patch +Patch0661: vfio-spapr-Introduce-spapr-backend-and-target-interf.patch +Patch0662: vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch +Patch0663: vfio-spapr-Move-prereg_listener-into-spapr-container.patch +Patch0664: vfio-spapr-Move-hostwin_list-into-spapr-container.patch +Patch0665: backends-iommufd-Introduce-the-iommufd-object.patch +Patch0666: util-char_dev-Add-open_cdev.patch +Patch0667: vfio-common-return-early-if-space-isn-t-empty.patch +Patch0668: vfio-iommufd-Implement-the-iommufd-backend.patch +Patch0669: vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch +Patch0670: vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch +Patch0671: vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch +Patch0672: vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch +Patch0673: vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch +Patch0674: vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch +Patch0675: vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +Patch0676: vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch +Patch0677: vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch +Patch0678: vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch +Patch0679: vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch +Patch0680: vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch +Patch0681: vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +Patch0682: vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch +Patch0683: hw-arm-Activate-IOMMUFD-for-virt-machines.patch +Patch0684: kconfig-Activate-IOMMUFD-for-s390x-machines.patch +Patch0685: hw-i386-Activate-IOMMUFD-for-q35-machines.patch +Patch0686: vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch +Patch0687: vfio-platform-Move-VFIODevice-initializations-in-vfi.patch +Patch0688: vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch +Patch0689: vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch +Patch0690: vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch +Patch0691: docs-devel-Add-VFIO-iommufd-backend-documentation.patch +Patch0692: vfio-container-Introduce-vfio_legacy_setup-for-furth.patch +Patch0693: vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch +Patch0694: vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch +Patch0695: vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch +Patch0696: vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch +Patch0697: vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch +Patch0698: vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch +Patch0699: vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch +Patch0700: vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch +Patch0701: vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch +Patch0702: backends-Introduce-HostIOMMUDevice-abstract.patch +Patch0703: backends-host_iommu_device-Introduce-HostIOMMUDevice.patch +Patch0704: vfio-container-Introduce-TYPE_HOST_IOMMU_DEVICE_LEGA.patch +Patch0705: backends-iommufd-Introduce-TYPE_HOST_IOMMU_DEVICE_IO.patch +Patch0706: range-Introduce-range_get_last_bit.patch +Patch0707: vfio-container-Implement-HostIOMMUDeviceClass-realiz.patch +Patch0708: backends-iommufd-Introduce-helper-function-iommufd_b.patch +Patch0709: vfio-iommufd-Implement-HostIOMMUDeviceClass-realize-.patch +Patch0710: vfio-container-Implement-HostIOMMUDeviceClass-get_ca.patch +Patch0711: backends-iommufd-Implement-HostIOMMUDeviceClass-get_.patch +Patch0712: vfio-Create-host-IOMMU-device-instance.patch +Patch0713: hw-pci-Introduce-helper-function-pci_device_get_iomm.patch +Patch0714: hw-pci-Introduce-pci_device_-set-unset-_iommu_device.patch +Patch0715: vfio-pci-Pass-HostIOMMUDevice-to-vIOMMU.patch +Patch0716: intel_iommu-Extract-out-vtd_cap_init-to-initialize-c.patch +Patch0717: intel_iommu-Implement-set-unset-_iommu_device-callba.patch +Patch0718: intel_iommu-Check-compatibility-with-host-IOMMU-capa.patch +Patch0719: vfio-pci-Extract-mdev-check-into-an-helper.patch +Patch0720: vfio-iommufd-Don-t-initialize-nor-set-a-HOST_IOMMU_D.patch +Patch0721: backends-iommufd-Extend-iommufd_backend_get_device_i.patch +Patch0722: vfio-iommufd-Return-errno-in-iommufd_cdev_attach_ioa.patch +Patch0723: vfio-ap-Don-t-initialize-HOST_IOMMU_DEVICE-with-mdev.patch +Patch0724: vfio-ccw-Don-t-initialize-HOST_IOMMU_DEVICE-with-mde.patch +Patch0725: vfio-iommufd-Introduce-auto-domain-creation.patch +Patch0726: HostIOMMUDevice-Store-the-VFIO-VDPA-agent.patch +Patch0727: vfio-iommufd-container-Remove-caps-aw_bits.patch +Patch0728: vfio-iommufd-Add-hw_caps-field-to-HostIOMMUDeviceCap.patch +Patch0729: vfio-iommufd-container-Invoke-HostIOMMUDevice-realiz.patch +Patch0730: vfio-iommufd-Probe-and-request-hwpt-dirty-tracking-c.patch +Patch0731: vfio-iommufd-Implement-VFIOIOMMUClass-set_dirty_trac.patch +Patch0732: vfio-iommufd-Implement-VFIOIOMMUClass-query_dirty_bi.patch +Patch0733: vfio-migration-Don-t-block-migration-device-dirty-tr.patch +Patch0734: vfio-common-Allow-disabling-device-dirty-page-tracki.patch +Patch0735: Update-iommufd.h-header-for-vSVA.patch +Patch0736: backends-iommufd-Add-helpers-for-invalidating-user-m.patch +Patch0737: vfio-iommufd-Add-properties-and-handlers-to-TYPE_HOS.patch +Patch0738: HostIOMMUDevice-Introduce-realize_late-callback.patch +Patch0739: vfio-iommufd-Implement-HostIOMMUDeviceClass-realize_.patch +Patch0740: vfio-iommufd-Implement-at-de-tach_hwpt-handlers.patch +Patch0741: backends-iommufd-Introduce-iommufd_backend_alloc_vio.patch +Patch0742: backends-iommufd-Introduce-iommufd_vdev_alloc.patch +Patch0743: backends-iommufd-Introduce-iommufd_viommu_invalidate.patch +Patch0744: hw-arm-smmu-common-Add-a-nested-flag-to-SMMUState.patch +Patch0745: hw-arm-smmu-common-Bypass-emulated-IOTLB-for-a-neste.patch +Patch0746: hw-arm-smmu-common-Extract-smmu_get_sbus-and-smmu_ge.patch +Patch0747: hw-arm-smmu-common-Add-set-unset_iommu_device-callba.patch +Patch0748: hw-arm-smmu-common-Add-iommufd-helpers.patch +Patch0749: hw-arm-smmu-common-Return-sysmem-if-stage-1-is-bypas.patch +Patch0750: hw-arm-smmuv3-Ignore-IOMMU_NOTIFIER_MAP-for-nested-s.patch +Patch0751: hw-arm-smmuv3-Read-host-SMMU-device-info.patch +Patch0752: hw-arm-smmuv3-Check-idr-registers-for-STE_S1CDMAX-an.patch +Patch0753: hw-arm-smmuv3-Add-smmu_dev_install_nested_ste-for-CF.patch +Patch0754: hw-arm-smmuv3-Add-missing-STE-invalidation.patch +Patch0755: hw-arm-smmu-common-Replace-smmu_iommu_mr-with-smmu_f.patch +Patch0756: hw-arm-smmuv3-Forward-cache-invalidate-commands-via-.patch +Patch0757: tests-qtest-Allow-DSDT-acpi-tables-to-change.patch +Patch0758: acpi-gpex-Fix-PCI-Express-Slot-Information-function-.patch +Patch0759: tests-data-acpi-Update-DSDT-acpi-tables.patch +Patch0760: hw-pci-host-gpex-needs-kernel-fix-Allow-to-generate-.patch +Patch0761: hw-arm-virt-Add-an-SMMU_IO_LEN-macro.patch +Patch0762: hw-arm-smmuv3-Add-initial-support-for-SMMUv3-Nested-.patch +Patch0763: hw-arm-smmuv3-Associate-a-pci-bus-with-a-SMMUv3-Nest.patch +Patch0764: hw-arm-virt-acpi-build-Build-IORT-with-multiple-SMMU.patch +Patch0765: tests-qtest-Allow-IORT-acpi-table-to-change.patch +Patch0766: hw-arm-virt-acpi-build-Add-IORT-RMR-regions-to-handl.patch +Patch0767: tests-data-acpi-virt-Update-IORT-acpi-table.patch +Patch0768: iommufd.h-Updated-to-openeuler-olk-6.6-kernel.patch +Patch0769: hw-arm-smmuv3-Enable-sva-stall-IDR-features.patch +Patch0770: kvm-Translate-MSI-doorbell-address-only-if-it-is-val.patch +Patch0771: smmuv3-Add-support-for-page-fault-handling.patch +Patch0772: pci-Get-pasid-capability-from-vIOMMU.patch +Patch0773: backend-iommufd-Report-PASID-capability.patch +Patch0774: vfio-Synthesize-vPASID-capability-to-VM.patch +Patch0775: smmuv3-realize-get_pasid_cap-and-set-ssidsize-with-p.patch +Patch0776: smmu-common-Return-sysmem-address-space-only-for-vfi.patch +Patch0777: smmuv3-Change-arm-smmuv3-nested-name-to-arm-smmuv3-a.patch +Patch0778: smmuv3-Use-default-bus-for-arm-smmuv3-accel.patch +Patch0779: gpex-acpi-Remove-duplicate-DSM-5.patch +Patch0780: Revert-linux-user-Print-tid-not-pid-with-strace.patch +Patch0781: fw_cfg-Don-t-set-callback_opaque-NULL-in-fw_cfg_modi.patch +Patch0782: target-arm-Change-arm_cpu_mp_affinity-when-enabled-I.patch +Patch0783: vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch +Patch0784: target-loongarch-fix-vcpu-reset-command-word-issue.patch +Patch0785: target-loongarch-Fix-the-cpu-unplug-resource-leak.patch +Patch0786: hw-loongarch-boot-Adjust-the-loading-position-of-the.patch +Patch0787: hw-rtc-Fixed-loongson-rtc-emulation-errors.patch +Patch0788: hw-intc-Add-extioi-ability-of-256-vcpu-interrupt-rou.patch +Patch0789: backends-iommufd-Remove-check-on-number-of-backend-u.patch +Patch0790: backends-iommufd-Remove-mutex.patch +Patch0791: backends-iommufd-Fix-missing-ERRP_GUARD-for-error_pr.patch +Patch0792: backends-iommufd-Make-iommufd_backend_-return-bool.patch +Patch0793: backends-iommufd-Get-rid-of-qemu_open_old.patch +Patch0794: Kconfig-iommufd-VDPA-Update-IOMMUFD-module-configura.patch +Patch0795: vdpa-iommufd-support-associating-iommufd-backend-for.patch +Patch0796: vdpa-iommufd-Introduce-vdpa-iommufd-module.patch +Patch0797: vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch +Patch0798: target-i386-Introduce-SierraForest-v2-model.patch +Patch0799: target-i386-Export-BHI_NO-bit-to-guests.patch +Patch0800: docs-Add-GNR-SRF-and-CWF-CPU-models.patch +Patch0801: target-i386-add-sha512-sm3-sm4-feature-bits.patch +Patch0802: target-i386-Add-new-CPU-model-ClearwaterForest.patch +Patch0803: target-i386-csv-Release-CSV3-shared-pages-after-unma.patch +Patch0804: hw-arm-virt-support-the-HDBSS-feature.patch +Patch0805: migration-multifd-Fix-error-message-in-multifd_recv_.patch +Patch0806: migration-multifd-Simplify-multifd_channel_connect-i.patch +Patch0807: migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch +Patch0808: migration-multifd-Remove-error_setg-in-migration_ioc.patch +Patch0809: migration-Fix-migration_channel_read_peek-error-path.patch +Patch0810: migration-multifd-Remove-unnecessary-usage-of-local-.patch +Patch0811: migration-multifd-Remove-MultiFDPages_t-packet_num.patch +Patch0812: migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch +Patch0813: migration-multifd-Change-multifd_pages_init-argument.patch +Patch0814: migration-Report-error-in-incoming-migration.patch +Patch0815: tests-qtest-migration-Print-migration-incoming-error.patch +Patch0816: tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch +Patch0817: tests-qtest-migration-Use-the-new-migration_test_add.patch +Patch0818: tests-qtest-Re-enable-multifd-cancel-test.patch +Patch0819: docs-migration-Create-migration-directory.patch +Patch0820: docs-migration-Create-index-page.patch +Patch0821: docs-migration-Convert-virtio.txt-into-rST.patch +Patch0822: docs-migration-Split-Backwards-compatibility-separat.patch +Patch0823: docs-migration-Split-Debugging-and-Firmware.patch +Patch0824: docs-migration-Split-Postcopy.patch +Patch0825: docs-migration-Split-dirty-limit.patch +Patch0826: docs-migration-Organize-Postcopy-page.patch +Patch0827: docs-migration-Further-move-vfio-to-be-feature-of-mi.patch +Patch0828: docs-migration-Further-move-virtio-to-be-feature-of-.patch +Patch0829: migration-multifd-Drop-stale-comment-for-multifd-zer.patch +Patch0830: migration-multifd-multifd_send_kick_main.patch +Patch0831: migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch +Patch0832: migration-multifd-Postpone-reset-of-MultiFDPages_t.patch +Patch0833: migration-multifd-Drop-MultiFDSendParams.normal-arra.patch +Patch0834: migration-multifd-Separate-SYNC-request-with-normal-.patch +Patch0835: migration-multifd-Simplify-locking-in-sender-thread.patch +Patch0836: migration-multifd-Drop-pages-num-check-in-sender-thr.patch +Patch0837: migration-multifd-Rename-p-num_packets-and-clean-it-.patch +Patch0838: migration-multifd-Move-total_normal_pages-accounting.patch +Patch0839: migration-multifd-Move-trace_multifd_send-recv.patch +Patch0840: migration-multifd-multifd_send_prepare_header.patch +Patch0841: migration-multifd-Move-header-prepare-fill-into-send.patch +Patch0842: migration-multifd-Forbid-spurious-wakeups.patch +Patch0843: migration-multifd-Split-multifd_send_terminate_threa.patch +Patch0844: migration-multifd-Change-retval-of-multifd_queue_pag.patch +Patch0845: migration-multifd-Change-retval-of-multifd_send_page.patch +Patch0846: migration-multifd-Rewrite-multifd_queue_page.patch +Patch0847: migration-multifd-Cleanup-multifd_save_cleanup.patch +Patch0848: migration-multifd-Cleanup-multifd_load_cleanup.patch +Patch0849: migration-multifd-Stick-with-send-recv-on-function-n.patch +Patch0850: migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch +Patch0851: migration-multifd-Optimize-sender-side-to-be-lockles.patch +Patch0852: migration-Fix-logic-of-channels-and-transport-compat.patch +Patch0853: migration-multifd-Join-the-TLS-thread.patch +Patch0854: migration-multifd-Remove-p-running.patch +Patch0855: migration-multifd-Move-multifd_send_setup-error-hand.patch +Patch0856: migration-multifd-Move-multifd_send_setup-into-migra.patch +Patch0857: migration-multifd-Unify-multifd-and-TLS-connection-p.patch +Patch0858: migration-multifd-Add-a-synchronization-point-for-ch.patch +Patch0859: migration-multifd-Remove-p-quit-from-recv-side.patch +Patch0860: migration-multifd-Release-recv-sem_sync-earlier.patch +Patch0861: migration-multifd-Cleanup-TLS-iochannel-referencing.patch +Patch0862: migration-multifd-Drop-registered_yank.patch +Patch0863: migration-multifd-Make-multifd_channel_connect-retur.patch +Patch0864: migration-multifd-Cleanup-outgoing_args-in-state-des.patch +Patch0865: migration-multifd-Drop-unnecessary-helper-to-destroy.patch +Patch0866: migration-Properly-apply-migration-compression-level.patch +Patch0867: tests-migration-Set-compression-level-in-migration-t.patch +Patch0868: migration-multifd-Cleanup-multifd_recv_sync_main.patch +Patch0869: migration-multifd-Rename-MultiFDSend-RecvParams-data.patch +Patch0870: migration-multifd-Decouple-recv-method-from-pages.patch +Patch0871: migration-multifd-Allow-multifd-without-packets.patch +Patch0872: migration-multifd-Add-new-migration-option-zero-page.patch +Patch0873: migration-multifd-Implement-zero-page-transmission-o.patch +Patch0874: migration-multifd-Implement-ram_save_target_page_mul.patch +Patch0875: migration-multifd-solve-zero-page-causing-multiple-p.patch +Patch0876: docs-migration-add-qpl-compression-feature.patch +Patch0877: migration-multifd-put-IOV-initialization-into-compre.patch +Patch0878: configure-add-enable-qpl-build-option.patch +Patch0879: migration-multifd-add-qpl-compression-method.patch +Patch0880: migration-multifd-include-ram.h-in-multifd.h.patch +Patch0881: migration-multifd-implement-initialization-of-qpl-co.patch +Patch0882: migration-multifd-implement-qpl-compression-and-deco.patch +Patch0883: tests-migration-test-add-qpl-compression-test.patch +Patch0884: docs-migration-add-uadk-compression-feature.patch +Patch0885: configure-Add-uadk-option.patch +Patch0886: migration-multifd-add-uadk-compression-framework.patch +Patch0887: migration-multifd-Add-UADK-initialization.patch +Patch0888: migration-multifd-Add-UADK-based-compression-and-dec.patch +Patch0889: migration-multifd-Switch-to-no-compression-when-no-h.patch +Patch0890: tests-migration-test-add-uadk-compression-test.patch +Patch0891: migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch +Patch0892: docs-migration-add-qatzip-compression-feature.patch +Patch0893: meson-Introduce-qatzip-feature-to-the-build-system.patch +Patch0894: migration-Add-migration-parameters-for-QATzip.patch +Patch0895: migration-Introduce-qatzip-compression-method.patch +Patch0896: tests-migration-Add-integration-test-for-qatzip-comp.patch +Patch0897: migration-multifd-Fix-loop-conditions-in-multifd_zst.patch +Patch0898: migration-multifd-Fix-rb-receivedmap-cleanup-race.patch +Patch0899: migration-multifd-Ensure-packet-ramblock-is-null-ter.patch +Patch0900: migration-multifd-Zero-p-flags-before-starting-filli.patch +Patch0901: multifd-bugfix-for-migration-using-compression-metho.patch +Patch0902: multifd-bugfix-for-incorrect-migration-data-with-QPL.patch +Patch0903: multifd-bugfix-for-incorrect-migration-data-with-qat.patch +Patch0904: hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch +Patch0905: hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch +Patch0906: hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch +Patch0907: arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch +Patch0908: arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch +Patch0909: virtio-pci-Batch-processing-of-IRQFD-mapping-for-mul.patch +Patch0910: kvm-msi-Mark-whether-there-is-an-IRQ-route-table-upd.patch +Patch0911: virtio-irqfd-Batch-processing-of-irqfd-related-opera.patch +Patch0912: migration-Extand-the-fdtable-in-the-incoming-phase-o.patch +Patch0913: migration-memory-Optimize-unnecessary-memory-region-.patch +Patch0914: memory-eventfd-Introduce-ioeventfd-batch-processing-.patch +Patch0915: memory-Optimize-flatview-ioeventfd-processing.patch +Patch0916: vdpa-iommufd-All-vdpa-devices-perform-only-one-log_s.patch +Patch0917: Revert-target-arm-Change-arm_cpu_mp_affinity-when-en.patch +Patch0918: target-arm-support-the-IPIV-feature.patch +Patch0919: Fix-error-in-virtCCA-CoDA-scenario.patch +Patch0920: Revert-backends-iommufd-Make-iommufd_backend_-return.patch +Patch0921: qapi-misc-target-Add-Virtcca-capability-struct-and-q.patch +Patch0922: qapi-misc-target-Add-KVM-option-to-isolate-virtcca-d.patch +Patch0923: Add-stub-function-for-tmm_get_kae_num-if-CONFIG_KVM-.patch +Patch0924: sync-header-file-from-upstream.patch +Patch0925: backends-tpm-Avoid-using-g_alloca.patch +Patch0926: hw-virtio-virtio-pci-Support-shadow-device-for-virti.patch +Patch0927: smbios-add-processor-family-option.patch +Patch0928: smbios-function-to-set-default-processor-family.patch +Patch0929: target-riscv-SMBIOS-support-for-RISC-V-virt-machine.patch +Patch0930: qemu-options-enable-smbios-option-on-RISC-V.patch +Patch0931: qemu-options.hx-correct-formatting-smbios-type-4.patch +Patch0932: tests-unit-test-char-Avoid-using-g_alloca.patch +Patch0933: virtio-processes-indirect-descriptors-even-if-the-re.patch +Patch0934: hw-audio-cs4231a-fix-assertion-error-in-isa_bus_get_.patch +Patch0935: block-blkio-Make-s-mem_region_alignment-be-64-bits.patch +Patch0936: target-arm-Adjust-and-validate-mtedesc-sizem1.patch +Patch0937: block-io-accept-NULL-qiov-in-bdrv_pad_request.patch +Patch0938: target-arm-fix-qemu-arm-target-build-error.patch +Patch0939: target-i386-Add-new-Hygon-Chengdu-CPU-model.patch +Patch0940: hw-acpi-Fix-the-memory-leak-issue.patch +Patch0941: virtio-net-Fix-num_buffers-for-version-1.patch +Patch0942: hw-net-cadence_gem-fix-register-mask-initialization.patch +Patch0943: memory-Export-a-helper-to-get-intersection-of-a-Memo.patch +Patch0944: memory-Change-memory_region_set_ram_discard_manager-.patch +Patch0945: memory-Unify-the-definiton-of-ReplayRamPopulate-and-.patch +Patch0946: memory-Introduce-generic-state-change-parent-class-f.patch +Patch0947: memory-Introduce-PrivateSharedManager-Interface-as-c.patch +Patch0948: vfio-Add-the-support-for-PrivateSharedManager-Interf.patch +Patch0949: memory-Change-NotifyStateClear-definition-to-return-.patch +Patch0950: ram-block-attribute-Add-priority-listener-support-fo.patch +Patch0951: linux-headers-Add-KVM-Arm-RME-definitions-to-Linux-h.patch +Patch0952: kvm-Use-kvm_vm_check_extension-where-necessary.patch +Patch0953: include-qom-object.h-New-OBJECT_DEFINE_SIMPLE_TYPE-_.patch +Patch0954: target-arm-Add-confidential-guest-support.patch +Patch0955: target-arm-kvm-Return-immediately-on-error-in-kvm_ar.patch +Patch0956: KVM-track-whether-guest-state-is-encrypted.patch +Patch0957: target-arm-kvm-rme-Initialize-realm.patch +Patch0958: target-arm-kvm-Split-kvm_arch_get-put_registers.patch +Patch0959: target-arm-kvm-rme-Initialize-vCPU.patch +Patch0960: target-arm-kvm-Create-scratch-VM-as-Realm-if-necessa.patch +Patch0961: hw-core-loader-Add-ROM-loader-notifier.patch +Patch0962: target-arm-kvm-rme-Initialize-Realm-memory.patch +Patch0963: target-arm-kvm-rme-Add-Realm-Personalization-Value-p.patch +Patch0964: target-arm-kvm-rme-Add-measurement-algorithm-propert.patch +Patch0965: target-arm-cpu-Set-number-of-breakpoints-and-watchpo.patch +Patch0966: target-arm-cpu-Set-number-of-PMU-counters-in-KVM.patch +Patch0967: target-arm-cpu-Inform-about-reading-confidential-CPU.patch +Patch0968: hw-arm-virt-Add-support-for-Arm-RME.patch +Patch0969: hw-arm-virt-Disable-DTB-randomness-for-confidential-.patch +Patch0970: hw-arm-virt-Reserve-one-bit-of-guest-physical-addres.patch +Patch0971: hw-arm-boot-Mark-all-guest-memory-as-RIPAS_RAM.patch +Patch0972: target-arm-kvm-rme-Add-DMA-remapping-for-the-shared-.patch +Patch0973: hw-arm-virt-Move-virt_flash_create-to-machvirt_init.patch +Patch0974: hw-arm-virt-Use-RAM-instead-of-flash-for-confidentia.patch +Patch0975: docs-interop-firmware.json-Add-arm-rme-firmware-feat.patch +Patch0976: hw-arm-boot-Load-DTB-as-is-for-confidential-VMs.patch +Patch0977: hw-arm-boot-Skip-bootloader-for-confidential-guests.patch +Patch0978: hw-tpm-Add-TPM-event-log.patch +Patch0979: hw-core-loader-Add-fields-to-RomLoaderNotify.patch +Patch0980: target-arm-kvm-rme-Add-measurement-log.patch +Patch0981: hw-arm-virt-Add-measurement-log-for-confidential-boo.patch +Patch0982: On-the-Adaptation-of-CCA-and-virtCCA.patch +Patch0983: Bugfix-Fix-compile-error-in-aarch32.patch +Patch0984: target-i386-kvm-Refine-VMX-controls-setting-for-back.patch +Patch0985: Bugfix-Correctly-set-vms-bootinfo.confidential-in-vi.patch +Patch0986: hw-arm-virt-acpi-build.c-Migrate-fw_cfg-creation-to-.patch +Patch0987: hw-arm-virt-acpi-build.c-Migrate-virtio-creation-to-.patch +Patch0988: hw-i386-acpi-microvm.c-Use-common-function-to-add-vi.patch +Patch0989: hw-riscv-virt-Make-few-IMSIC-macros-and-functions-pu.patch +Patch0990: hw-riscv-virt-acpi-build.c-Add-AIA-support-in-RINTC.patch +Patch0991: hw-riscv-virt-acpi-build.c-Add-IMSIC-in-the-MADT.patch +Patch0992: hw-riscv-virt-acpi-build.c-Add-APLIC-in-the-MADT.patch +Patch0993: hw-riscv-virt-acpi-build.c-Add-CMO-information-in-RH.patch +Patch0994: hw-riscv-virt-acpi-build.c-Add-MMU-node-in-RHCT.patch +Patch0995: hw-pci-host-gpex-Define-properties-for-MMIO-ranges.patch +Patch0996: hw-riscv-virt-Update-GPEX-MMIO-related-properties.patch +Patch0997: hw-riscv-virt-acpi-build.c-Add-IO-controllers-and-de.patch +Patch0998: hw-riscv-virt-acpi-build.c-Add-PLIC-in-MADT.patch +Patch0999: hw-riscv-virt.c-fix-the-interrupts-extended-property.patch +Patch1000: hw-riscv-virt-acpi-build.c-Add-namespace-devices-for.patch +Patch1001: hw-riscv-virt-acpi-build.c-Update-the-HID-of-RISC-V-.patch +Patch1002: hw-riscv-virt-acpi-build.c-Generate-SPCR-table.patch +Patch1003: hw-riscv-virt-acpi-build.c-Add-SRAT-and-SLIT-ACPI-ta.patch +Patch1004: plugins-loader-fix-deadlock-when-resetting-uninstall.patch +Patch1005: smbios-Fix-buffer-overrun-when-using-path-option.patch -Patch0001: pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch -Patch0002: pl031-support-rtc-timer-property-for-pl031.patch -Patch0003: vhost-cancel-migration-when-vhost-user-restarted.patch -Patch0004: qcow2-fix-memory-leak-in-qcow2_read_extensions.patch -Patch0005: bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch -Patch0006: hw-arm-expose-host-CPU-frequency-info-to-guest.patch -Patch0007: smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch -Patch0008: tests-bios-tables-test-disable-this-testcase.patch -Patch0009: hw-arm-virt-Introduce-cpu-topology-support.patch -Patch0010: hw-arm64-add-vcpu-cache-info-support.patch -Patch0011: xhci-Fix-memory-leak-in-xhci_address_slot.patch -Patch0012: xhci-Fix-memory-leak-in-xhci_kick_epctx.patch -Patch0013: ehci-fix-queue-dev-null-ptr-dereference.patch -Patch0014: util-async-hold-AioContext-ref-to-prevent-use-after-free.patch -Patch0015: vhost-user-scsi-prevent-using-uninitialized-vqs.patch -Patch0016: cpu-add-Kunpeng-920-cpu-support.patch -Patch0017: cpu-parse-feature-to-avoid-failure.patch -Patch0018: cpu-add-Cortex-A72-processor-kvm-target-support.patch -Patch0019: pcie-disable-the-PCI_EXP_LINKSTA_DLLA-cap.patch -Patch0020: vnc-fix-memory-leak-when-vnc-disconnect.patch -Patch0021: linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch -Patch0022: intc-arm_gic-Support-IRQ-injection-for-more-than-256.patch -Patch0023: ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch -Patch0024: 9pfs-local-Fix-possible-memory-leak-in-local_link.patch -Patch0025: scsi-disk-define-props-in-scsi_block_disk-to-avoid-memleaks.patch -Patch0026: arm-translate-a64-fix-uninitialized-variable-warning.patch -Patch0027: nbd-fix-uninitialized-variable-warning.patch -Patch0028: xhci-Fix-memory-leak-in-xhci_kick_epctx-when-poweroff.patch -Patch0029: block-fix-memleaks-in-bdrv_refresh_filename.patch -Patch0030: iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch -Patch0031: tcp_emu-Fix-oob-access.patch -Patch0032: slirp-use-correct-size-while-emulating-IRC-commands.patch -Patch0033: slirp-use-correct-size-while-emulating-commands.patch -Patch0034: util-add-slirp_fmt-helpers.patch -Patch0035: tcp_emu-fix-unsafe-snprintf-usages.patch -Patch0036: block-iscsi-use-MIN-between-mx_sb_len-and-sb_len_wr.patch -Patch0037: monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch -Patch0038: memory-Align-MemoryRegionSections-fields.patch -Patch0039: memory-Provide-an-equality-function-for-MemoryRegion.patch -Patch0040: vhost-Fix-memory-region-section-comparison.patch -Patch0041: file-posix-Handle-undetectable-alignment.patch -Patch0042: block-backup-fix-max_transfer-handling-for-copy_rang.patch -Patch0043: block-backup-fix-backup_cow_with_offload-for-last-cl.patch -Patch0044: qcow2-Limit-total-allocation-range-to-INT_MAX.patch -Patch0045: mirror-Do-not-dereference-invalid-pointers.patch -Patch0046: COLO-compare-Fix-incorrect-if-logic.patch -Patch0047: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch -Patch0048: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch -Patch0049: pcie-Compat-with-devices-which-do-not-support-Link-W.patch -Patch0050: aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch -Patch0051: async-use-explicit-memory-barriers.patch -Patch0052: dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch -Patch0053: Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch -Patch0054: pc-Don-t-make-die-id-mandatory-unless-necessary.patch -Patch0055: block-file-posix-Reduce-xfsctl-use.patch -Patch0056: pr-manager-Fix-invalid-g_free-crash-bug.patch -Patch0057: x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch -Patch0058: vpc-Return-0-from-vpc_co_create-on-success.patch -Patch0059: target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch -Patch0060: target-arm-Don-t-abort-on-M-profile-exception-return.patch -Patch0061: libvhost-user-fix-SLAVE_SEND_FD-handling.patch -Patch0062: qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch -Patch0063: block-nfs-tear-down-aio-before-nfs_close.patch -Patch0064: blockjob-update-nodes-head-while-removing-all-bdrv.patch -Patch0065: block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch -Patch0066: coroutine-Add-qemu_co_mutex_assert_locked.patch -Patch0067: qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch -Patch0068: hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch -Patch0069: make-release-pull-in-edk2-submodules-so-we-can-build.patch -Patch0070: roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch -Patch0071: block-snapshot-Restrict-set-of-snapshot-nodes.patch -Patch0072: vhost-user-save-features-if-the-char-dev-is-closed.patch -Patch0073: hw-core-loader-Fix-possible-crash-in-rom_copy.patch -Patch0074: ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch -Patch0075: virtio-new-post_load-hook.patch -Patch0076: virtio-net-prevent-offloads-reset-on-migration.patch -Patch0077: util-hbitmap-strict-hbitmap_reset.patch -Patch0078: hbitmap-handle-set-reset-with-zero-length.patch -Patch0079: target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch -Patch0080: scsi-lsi-exit-infinite-loop-while-executing-script-C.patch -Patch0081: virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch -Patch0082: qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch -Patch0083: util-iov-introduce-qemu_iovec_init_extended.patch -Patch0084: util-iov-improve-qemu_iovec_is_zero.patch -Patch0085: block-io-refactor-padding.patch -Patch0086: block-Make-wait-mark-serialising-requests-public.patch -Patch0087: block-Add-bdrv_co_get_self_request.patch -Patch0088: block-file-posix-Let-post-EOF-fallocate-serialize.patch -Patch0089: block-posix-Always-allocate-the-first-block.patch -Patch0090: block-create-Do-not-abort-if-a-block-driver-is-not-a.patch -Patch0091: mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch -Patch0092: target-arm-kvm-trivial-Clean-up-header-documentation.patch -Patch0093: target-arm-kvm64-kvm64-cpus-have-timer-registers.patch -Patch0094: target-arm-kvm-Implement-virtual-time-adjustment.patch -Patch0095: target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch -Patch0096: hw-acpi-Make-ACPI-IO-address-space-configurable.patch -Patch0097: hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch -Patch0098: hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch -Patch0099: hw-arm-virt-Add-memory-hotplug-framework.patch -Patch0100: hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch -Patch0101: hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch -Patch0102: hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch -Patch0103: hw-arm-Use-GED-for-system_powerdown-event.patch -Patch0104: docs-specs-Add-ACPI-GED-documentation.patch -Patch0105: tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch -Patch0106: tests-acpi-add-empty-files.patch -Patch0107: tests-allow-empty-expected-files.patch -Patch0108: tests-Add-bios-tests-to-arm-virt.patch -Patch0109: tests-document-how-to-update-acpi-tables.patch -Patch0110: hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch -Patch0111: bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch -Patch0112: Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch -Patch0113: acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch -Patch0114: acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch -Patch0115: arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch -Patch0116: acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch -Patch0117: acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch -Patch0118: arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch -Patch0119: arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch -Patch0120: arm-virt-Add-CPU-hotplug-framework.patch -Patch0121: arm-virt-Add-CPU-topology-support.patch -Patch0122: test-numa-Adjust-aarch64-numa-test.patch -Patch0123: hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch -Patch0124: hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch -Patch0125: arm-virt-gic-Construct-irqs-connection-from-create_g.patch -Patch0126: intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch -Patch0127: intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch -Patch0128: intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch -Patch0129: hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch -Patch0130: accel-kvm-Add-pre-park-vCPU-support.patch -Patch0131: intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch -Patch0132: acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch -Patch0133: arm-virt-Add-cpu_hotplug_enabled-field.patch -Patch0134: arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch -Patch0135: arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch -Patch0136: arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch -Patch0137: arm-virt-Start-up-CPU-hot-plug.patch -Patch0138: migration-always-initialise-ram_counters-for-a-new-m.patch -Patch0139: migration-add-qemu_file_update_transfer-interface.patch -Patch0140: migration-add-speed-limit-for-multifd-migration.patch -Patch0141: migration-update-ram_counters-for-multifd-sync-packe.patch -Patch0142: migration-Make-global-sem_sync-semaphore-by-channel.patch -Patch0143: migration-multifd-fix-nullptr-access-in-terminating-m.patch -Patch0144: migration-Maybe-VM-is-paused-when-migration-is-cance.patch -Patch0145: migration-multifd-fix-potential-wrong-acception-orde.patch -Patch0146: migration-multifd-fix-destroyed-mutex-access-in-term.patch -Patch0147: migration-multifd-fix-nullptr-access-in-multifd_send.patch -Patch0148: vtimer-compat-cross-version-migration-from-v4.0.1.patch -Patch0149: migration-ram-Do-error_free-after-migrate_set_error-.patch -Patch0150: migration-ram-fix-memleaks-in-multifd_new_send_chann.patch -Patch0151: migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch -Patch0152: arm-virt-Support-CPU-cold-plug.patch -Patch0153: ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch -Patch0154: ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch -Patch0155: slirp-tftp-restrict-relative-path-access.patch -Patch0156: ip_reass-Fix-use-after-free.patch -Patch0157: bt-use-size_t-type-for-length-parameters-instead-of-.patch -Patch0158: log-Add-some-logs-on-VM-runtime-path.patch -Patch0159: Revert-vtimer-compat-cross-version-migration-from-v4.patch -Patch0160: ARM64-record-vtimer-tick-when-cpu-is-stopped.patch -Patch0161: hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch -Patch0162: migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch -Patch0163: vtimer-Drop-vtimer-virtual-timer-adjust.patch -Patch0164: target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch -Patch0165: target-arm-Fix-PAuth-sbox-functions.patch -Patch0166: tests-Disalbe-filemonitor-testcase.patch -Patch0167: es1370-check-total-frame-count-against-current-frame.patch -Patch0168: exec-set-map-length-to-zero-when-returning-NULL.patch -Patch0169: ati-vga-check-mm_index-before-recursive-call-CVE-202.patch -Patch0170: megasas-use-unsigned-type-for-reply_queue_head-and-c.patch -Patch0171: megasas-avoid-NULL-pointer-dereference.patch -Patch0172: megasas-use-unsigned-type-for-positive-numeric-field.patch -Patch0173: hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch -Patch0174: hw-arm-acpi-enable-SHPC-native-hot-plug.patch -Patch0175: hw-tpm-rename-Error-parameter-to-more-common-errp.patch -Patch0176: tpm-ppi-page-align-PPI-RAM.patch -Patch0177: tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch -Patch0178: spapr-Implement-get_dt_compatible-callback.patch -Patch0179: delete-the-in-tpm.txt.patch -Patch0180: tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch -Patch0181: tpm_spapr-Support-suspend-and-resume.patch -Patch0182: hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch -Patch0183: docs-specs-tpm-reST-ify-TPM-documentation.patch -Patch0184: tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch -Patch0185: tpm-Use-TPMState-as-a-common-struct.patch -Patch0186: tpm-Separate-tpm_tis-common-functions-from-isa-code.patch -Patch0187: tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch -Patch0188: tpm-Add-the-SysBus-TPM-TIS-device.patch -Patch0189: hw-arm-virt-vTPM-support.patch -Patch0190: docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch -Patch0191: test-tpm-pass-optional-machine-options-to-swtpm-test.patch -Patch0192: test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch -Patch0193: test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch -Patch0194: build-smt-processor-structure-to-support-smt-topolog.patch -Patch0195: target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch -Patch0196: target-arm-Add-ID_AA64MMFR2_EL1.patch -Patch0197: target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch -Patch0198: target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch -Patch0199: target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch -Patch0200: target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch -Patch0201: target-arm-Stop-assuming-DBGDIDR-always-exists.patch -Patch0202: target-arm-Move-DBGDIDR-into-ARMISARegisters.patch -Patch0203: target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch -Patch0204: target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch -Patch0205: target-arm-Read-debug-related-ID-registers-from-KVM.patch -Patch0206: target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch -Patch0207: target-arm-monitor-query-cpu-model-expansion-crashed.patch -Patch0208: target-arm-convert-isar-regs-to-array.patch -Patch0209: target-arm-parse-cpu-feature-related-options.patch -Patch0210: target-arm-register-CPU-features-for-property.patch -Patch0211: target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch -Patch0212: target-arm-introduce-CPU-feature-dependency-mechanis.patch -Patch0213: target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch -Patch0214: target-arm-Add-CPU-features-to-query-cpu-model-expan.patch -Patch0215: target-arm-Update-ID-fields.patch -Patch0216: target-arm-Add-more-CPU-features.patch -Patch0217: hw-usb-core-fix-buffer-overflow.patch -Patch0218: target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch -Patch0219: Drop-bogus-IPv6-messages.patch -Patch0220: hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch -Patch0221: hw-xhci-check-return-value-of-usb_packet_map.patch -Patch0222: hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch -Patch0223: hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch -Patch0224: sm501-Convert-printf-abort-to-qemu_log_mask.patch -Patch0225: sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch -Patch0226: sm501-Use-BIT-x-macro-to-shorten-constant.patch -Patch0227: sm501-Clean-up-local-variables-in-sm501_2d_operation.patch -Patch0228: sm501-Replace-hand-written-implementation-with-pixma.patch -Patch0229: pci-check-bus-pointer-before-dereference.patch -Patch0230: hw-ide-check-null-block-before-_cancel_dma_sync.patch -Patch0231: elf2dmp-Fix-memory-leak-on-main-error-paths.patch -Patch0232: io-Don-t-use-flag-of-printf-format.patch -Patch0233: hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch -Patch0234: hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch -Patch0235: block-vvfat-Fix-bad-printf-format-specifiers.patch -Patch0236: block-Remove-unused-include.patch -Patch0237: ssi-Fix-bad-printf-format-specifiers.patch -Patch0238: net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch -Patch0239: ati-check-x-y-display-parameter-values.patch -Patch0240: migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch -Patch0241: migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch -Patch0242: migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch -Patch0243: migration-dirtyrate-Add-dirtyrate-statistics-series-.patch -Patch0244: migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch -Patch0245: migration-dirtyrate-Record-hash-results-for-each-sam.patch -Patch0246: migration-dirtyrate-Compare-page-hash-results-for-re.patch -Patch0247: migration-dirtyrate-skip-sampling-ramblock-with-size.patch -Patch0248: migration-dirtyrate-Implement-set_sample_page_period.patch -Patch0249: migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch -Patch0250: migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch -Patch0251: migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch -Patch0252: migration-dirtyrate-record-start_time-and-calc_time-.patch -Patch0253: migration-dirtyrate-present-dirty-rate-only-when-que.patch -Patch0254: migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch -Patch0255: migration-tls-save-hostname-into-MigrationState.patch -Patch0256: migration-tls-extract-migration_tls_client_create-fo.patch -Patch0257: migration-tls-add-tls_hostname-into-MultiFDSendParam.patch -Patch0258: migration-tls-extract-cleanup-function-for-common-us.patch -Patch0259: migration-tls-add-support-for-multifd-tls-handshake.patch -Patch0260: migration-tls-add-trace-points-for-multifd-tls.patch -Patch0261: qemu-file-Don-t-do-IO-after-shutdown.patch -Patch0262: multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch -Patch0263: migration-Don-t-send-data-if-we-have-stopped.patch -Patch0264: migration-Create-migration_is_running.patch -Patch0265: migration-fix-COLO-broken-caused-by-a-previous-commi.patch -Patch0266: migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch -Patch0267: multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch -Patch0268: net-remove-an-assert-call-in-eth_get_gso_type.patch -Patch0269: json-Fix-a-memleak-in-parse_pair.patch -Patch0270: Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch -Patch0271: slirp-check-pkt_len-before-reading-protocol-header.patch -Patch0272: hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch -Patch0273: hw-ehci-check-return-value-of-usb_packet_map.patch -Patch0274: hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch -Patch0275: hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch -Patch0276: hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch -Patch0277: target-arm-Fix-write-redundant-values-to-kvm.patch -Patch0278: memory-clamp-cached-translation-in-case-it-points-to.patch -Patch0279: scsi-bus-Refactor-the-code-that-retries-requests.patch -Patch0280: scsi-disk-Add-support-for-retry-on-errors.patch -Patch0281: qapi-block-core-Add-retry-option-for-error-action.patch -Patch0282: block-backend-Introduce-retry-timer.patch -Patch0283: block-backend-Add-device-specific-retry-callback.patch -Patch0284: block-backend-Enable-retry-action-on-errors.patch -Patch0285: block-backend-Add-timeout-support-for-retry.patch -Patch0286: block-Add-error-retry-param-setting.patch -Patch0287: virtio-blk-Refactor-the-code-that-processes-queued-r.patch -Patch0288: virtio-blk-On-restart-process-queued-requests-in-the.patch -Patch0289: virtio_blk-Add-support-for-retry-on-errors.patch -Patch0290: migration-Add-multi-thread-compress-method.patch -Patch0291: migration-Refactoring-multi-thread-compress-migratio.patch -Patch0292: migration-Add-multi-thread-compress-ops.patch -Patch0293: migration-Add-zstd-support-in-multi-thread-compressi.patch -Patch0294: migration-Add-compress_level-sanity-check.patch -Patch0295: doc-Update-multi-thread-compression-doc.patch -Patch0296: configure-Enable-test-and-libs-for-zstd.patch -Patch0297: ati-use-vga_read_byte-in-ati_cursor_define.patch -Patch0298: sd-sdhci-assert-data_count-is-within-fifo_buffer.patch -Patch0299: msix-add-valid.accepts-methods-to-check-address.patch -Patch0300: ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch -Patch0301: block-backend-Stop-retrying-when-draining.patch -Patch0302: migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch -Patch0303: migration-tls-fix-inverted-semantics-in-multifd_chan.patch -Patch0304: migration-tls-add-error-handling-in-multifd_tls_hand.patch -Patch0305: net-vmxnet3-validate-configuration-values-during-act.patch -Patch0306: block-Add-sanity-check-when-setting-retry-parameters.patch -Patch0307: hw-pci-host-add-pci-intack-write-method.patch -Patch0308: pci-host-add-pcie-msi-read-method.patch -Patch0309: vfio-add-quirk-device-write-method.patch -Patch0310: prep-add-ppc-parity-write-method.patch -Patch0311: nvram-add-nrf51_soc-flash-read-method.patch -Patch0312: spapr_pci-add-spapr-msi-read-method.patch -Patch0313: tz-ppc-add-dummy-read-write-methods.patch -Patch0314: imx7-ccm-add-digprog-mmio-write-method.patch -Patch0315: util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch -Patch0316: arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch -Patch0317: blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch -Patch0318: vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch -Patch0319: vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch -Patch0320: vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch -Patch0321: vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch -Patch0322: vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch -Patch0323: vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch -Patch0324: vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch -Patch0325: ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch -Patch0326: hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch -Patch0327: usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch -Patch0328: hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch -Patch0329: x86-Intel-AVX512_BF16-feature-enabling.patch -Patch0330: i386-Add-MSR-feature-bit-for-MDS-NO.patch -Patch0331: i386-Add-macro-for-stibp.patch -Patch0332: i386-Add-new-CPU-model-Cooperlake.patch -Patch0333: target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch -Patch0334: target-i386-Add-missed-security-features-to-Cooperla.patch -Patch0335: target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch -Patch0336: target-i386-Export-TAA_NO-bit-to-guests.patch -Patch0337: target-i386-Introduce-Denverton-CPU-model.patch -Patch0338: target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch -Patch0339: i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch -Patch0340: crypto-add-support-for-nettle-s-native-XTS-impl.patch -Patch0341: crypto-add-support-for-gcrypt-s-native-XTS-impl.patch -Patch0342: tests-benchmark-crypto-with-fixed-data-size-not-time.patch -Patch0343: tests-allow-filtering-crypto-cipher-benchmark-tests.patch -Patch0344: target-i386-handle-filtered_features-in-a-new-functi.patch -Patch0345: target-i386-introduce-generic-feature-dependency-mec.patch -Patch0346: target-i386-expand-feature-words-to-64-bits.patch -Patch0347: target-i386-add-VMX-definitions.patch -Patch0348: vmxcap-correct-the-name-of-the-variables.patch -Patch0349: target-i386-add-VMX-features.patch -Patch0350: target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch -Patch0351: target-i386-add-VMX-features-to-named-CPU-models.patch -Patch0352: target-i386-add-two-missing-VMX-features-for-Skylake.patch -Patch0353: target-i386-disable-VMX-features-if-nested-0.patch -Patch0354: i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch -Patch0355: target-i386-do-not-set-unsupported-VMX-secondary-exe.patch -Patch0356: migration-fix-multifd_send_pages-next-channel.patch -Patch0357: migration-Make-sure-that-we-don-t-call-write-in-case.patch -Patch0358: virtio-don-t-enable-notifications-during-polling.patch -Patch0359: usbredir-Prevent-recursion-in-usbredir_write.patch -Patch0360: xhci-recheck-slot-status.patch -Patch0361: vhost-Add-names-to-section-rounded-warning.patch -Patch0362: vhost-user-Print-unexpected-slave-message-types.patch -Patch0363: contrib-libvhost-user-Protect-slave-fd-with-mutex.patch -Patch0364: libvhost-user-Fix-some-memtable-remap-cases.patch -Patch0365: xics-Don-t-deassert-outputs.patch -Patch0366: i386-Resolve-CPU-models-to-v1-by-default.patch -Patch0367: block-curl-HTTP-header-fields-allow-whitespace-aroun.patch -Patch0368: block-curl-HTTP-header-field-names-are-case-insensit.patch -Patch0369: backup-Improve-error-for-bdrv_getlength-failure.patch -Patch0370: mirror-Make-sure-that-source-and-target-size-match.patch -Patch0371: iotests-143-Create-socket-in-SOCK_DIR.patch -Patch0372: nbd-server-Avoid-long-error-message-assertions-CVE-2.patch -Patch0373: block-Call-attention-to-truncation-of-long-NBD-expor.patch -Patch0374: qemu-img-convert-Don-t-pre-zero-images.patch -Patch0375: qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch -Patch0376: mirror-Wait-only-for-in-flight-operations.patch -Patch0377: virtio-net-delete-also-control-queue-when-TX-RX-dele.patch -Patch0378: target-i386-enable-monitor-and-ucode-revision-with-c.patch -Patch0379: target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch -Patch0380: target-i386-kvm-initialize-feature-MSRs-very-early.patch -Patch0381: target-i386-add-a-ucode-rev-property.patch -Patch0382: migration-use-migration_is_active-to-represent-activ.patch -Patch0383: migration-Rate-limit-inside-host-pages.patch -Patch0384: hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch -Patch0385: qapi-block-core-Introduce-BackupCommon.patch -Patch0386: drive-backup-create-do_backup_common.patch -Patch0387: blockdev-backup-utilize-do_backup_common.patch -Patch0388: qapi-add-BitmapSyncMode-enum.patch -Patch0389: block-backup-Add-mirror-sync-mode-bitmap.patch -Patch0390: block-backup-add-never-policy-to-bitmap-sync-mode.patch -Patch0391: block-backup-loosen-restriction-on-readonly-bitmaps.patch -Patch0392: block-backup-hoist-bitmap-check-into-QMP-interface.patch -Patch0393: block-backup-deal-with-zero-detection.patch -Patch0394: mirror-Fix-bdrv_has_zero_init-use.patch -Patch0395: blockdev-fix-coding-style-issues-in-drive_backup_pre.patch -Patch0396: blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch -Patch0397: blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch -Patch0398: blockdev-honor-bdrv_try_set_aio_context-context-requ.patch -Patch0399: blockdev-Return-bs-to-the-proper-context-on-snapshot.patch -Patch0400: block-Fix-cross-AioContext-blockdev-snapshot.patch -Patch0401: vl-Don-t-mismatch-g_strsplit-g_free.patch -Patch0402: seqlock-fix-seqlock_write_unlock_impl-function.patch -Patch0403: target-i386-kvm-initialize-microcode-revision-from-K.patch -Patch0404: target-i386-check-for-availability-of-MSR_IA32_UCODE.patch -Patch0405: hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch -Patch0406: Fixed-integer-overflow-in-e1000e.patch -Patch0407: migration-fix-cleanup_bh-leak-on-resume.patch -Patch0408: qmp-fix-leak-on-callbacks-that-return-both-value-and.patch -Patch0409: qga-commands-posix-fix-use-after-free-of-local_err.patch -Patch0410: file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch -Patch0411: object-return-self-in-object_ref.patch -Patch0412: lm32-do-not-leak-memory-on-object_new-object_unref.patch -Patch0413: cris-do-not-leak-struct-cris_disasm_data.patch -Patch0414: hppa-fix-leak-from-g_strdup_printf.patch -Patch0415: mcf5208-fix-leak-from-qemu_allocate_irqs.patch -Patch0416: microblaze-fix-leak-of-fdevice-tree-blob.patch -Patch0417: ide-fix-leak-from-qemu_allocate_irqs.patch -Patch0418: make-check-unit-use-after-free-in-test-opts-visitor.patch -Patch0419: xhci-fix-valid.max_access_size-to-access-address-reg.patch -Patch0420: qga-fix-assert-regression-on-guest-shutdown.patch -Patch0421: char-fix-use-after-free-with-dup-chardev-reconnect.patch -Patch0422: migration-Count-new_dirty-instead-of-real_dirty.patch -Patch0423: qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch -Patch0424: chardev-tcp-Fix-error-message-double-free-error.patch -Patch0425: colo-compare-Fix-memory-leak-in-packet_enqueue.patch -Patch0426: hw-block-nvme-fix-pin-based-interrupt-behavior.patch -Patch0427: hw-block-nvme-fix-pci-doorbell-size-calculation.patch -Patch0428: virtio-pci-fix-queue_enable-write.patch -Patch0429: hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch -Patch0430: linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch -Patch0431: migration-rdma-cleanup-rdma-context-before-g_free-to.patch -Patch0432: pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch -Patch0433: block-qcow2-do-free-crypto_opts-in-qcow2_close.patch -Patch0434: qemu-img-free-memory-before-re-assign.patch -Patch0435: block-qcow2-threads-fix-qcow2_decompress.patch -Patch0436: block-Avoid-memleak-on-qcow2-image-info-failure.patch -Patch0437: block-bdrv_set_backing_bs-fix-use-after-free.patch -Patch0438: hmp-vnc-Fix-info-vnc-list-leak.patch -Patch0439: migration-colo-fix-use-after-free-of-local_err.patch -Patch0440: migration-ram-fix-use-after-free-of-local_err.patch -Patch0441: block-mirror-fix-use-after-free-of-local_err.patch -Patch0442: block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch -Patch0443: virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch -Patch0444: virtio-blk-delete-vqs-on-the-error-path-in-realize.patch -Patch0445: fix-vhost_user_blk_watch-crash.patch -Patch0446: vhost-user-blk-delay-vhost_user_blk_disconnect.patch -Patch0447: usbredir-fix-buffer-overflow-on-vmload.patch -Patch0448: display-bochs-display-fix-memory-leak.patch -Patch0449: audio-fix-integer-overflow.patch -Patch0450: migration-multifd-clean-pages-after-filling-packet.patch -Patch0451: migration-multifd-not-use-multifd-during-postcopy.patch -Patch0452: migration-Define-VMSTATE_INSTANCE_ID_ANY.patch -Patch0453: migration-Change-SaveStateEntry.instance_id-into-uin.patch -Patch0454: apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch -Patch0455: virtio-add-ability-to-delete-vq-through-a-pointer.patch -Patch0456: virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch -Patch0457: virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch -Patch0458: vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch -Patch0459: vhost-user-blk-convert-to-new-virtio_delete_queue.patch -Patch0460: block-nbd-extract-the-common-cleanup-code.patch -Patch0461: virtio-gracefully-handle-invalid-region-caches.patch -Patch0462: migration-savevm-release-gslist-after-dump_vmstate_j.patch -Patch0463: virtio-input-fix-memory-leak-on-unrealize.patch -Patch0464: target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch -Patch0465: target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch -Patch0466: target-arm-Update-the-ID-registers-of-Kunpeng-920.patch -Patch0467: hw-net-fix-vmxnet3-live-migration.patch -Patch0468: include-Make-headers-more-self-contained.patch -Patch0469: migration-register_savevm_live-doesn-t-need-dev.patch -Patch0470: vmstate-add-qom-interface-to-get-id.patch -Patch0471: linux-headers-Update-against-Add-migration-support-f.patch -Patch0472: vfio-Add-function-to-unmap-VFIO-region.patch -Patch0473: vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch -Patch0474: vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch -Patch0475: vfio-Add-migration-region-initialization-and-finaliz.patch -Patch0476: vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch -Patch0477: vfio-Add-migration-state-change-notifier.patch -Patch0478: vfio-Register-SaveVMHandlers-for-VFIO-device.patch -Patch0479: vfio-Add-save-state-functions-to-SaveVMHandlers.patch -Patch0480: vfio-Add-load-state-functions-to-SaveVMHandlers.patch -Patch0481: memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch -Patch0482: vfio-Get-migration-capability-flags-for-container.patch -Patch0483: vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch -Patch0484: vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch -Patch0485: vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch -Patch0486: vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch -Patch0487: vfio-Make-vfio-pci-device-migration-capable.patch -Patch0488: qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch -Patch0489: vfio-Move-the-saving-of-the-config-space-to-the-righ.patch -Patch0490: vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch -Patch0491: vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch -Patch0492: kvm-split-too-big-memory-section-on-several-memslots.patch -Patch0493: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch -Patch0494: accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch -Patch0495: memory-Skip-dirty-tracking-for-un-migratable-memory-.patch -Patch0496: Fix-use-after-free-in-vfio_migration_probe.patch -Patch0497: vfio-Make-migration-support-experimental.patch -Patch0498: vfio-Change-default-dirty-pages-tracking-behavior-du.patch -Patch0499: vfio-Fix-vfio_listener_log_sync-function-name-typo.patch -Patch0500: vfio-Support-host-translation-granule-size.patch -Patch0501: vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch -Patch0502: vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch -Patch0503: migration-ram-Reduce-unnecessary-rate-limiting.patch -Patch0504: migration-ram-Optimize-ram_save_host_page.patch -Patch0505: qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch -Patch0506: linux-headers-update-against-5.10-and-manual-clear-v.patch -Patch0507: vfio-Maintain-DMA-mapping-range-for-the-container.patch -Patch0508: vfio-migration-Add-support-for-manual-clear-vfio-dir.patch -Patch0509: hw-arm-smmuv3-Support-16K-translation-granule.patch -Patch0510: hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch -Patch0511: hw-vfio-common-trace-vfio_connect_container-operatio.patch -Patch0512: update-linux-headers-Import-iommu.h.patch -Patch0513: vfio.h-and-iommu.h-header-update-against-5.10.patch -Patch0514: memory-Add-new-fields-in-IOTLBEntry.patch -Patch0515: hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch -Patch0516: hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch -Patch0517: memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch -Patch0518: memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch -Patch0519: memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch -Patch0520: iommu-Introduce-generic-header.patch -Patch0521: pci-introduce-PCIPASIDOps-to-PCIDevice.patch -Patch0522: vfio-Force-nested-if-iommu-requires-it.patch -Patch0523: vfio-Introduce-hostwin_from_range-helper.patch -Patch0524: vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch -Patch0525: vfio-Set-up-nested-stage-mappings.patch -Patch0526: vfio-Pass-stage-1-MSI-bindings-to-the-host.patch -Patch0527: vfio-Helper-to-get-IRQ-info-including-capabilities.patch -Patch0528: vfio-pci-Register-handler-for-iommu-fault.patch -Patch0529: vfio-pci-Set-up-the-DMA-FAULT-region.patch -Patch0530: vfio-pci-Implement-the-DMA-fault-handler.patch -Patch0531: hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch -Patch0532: hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch -Patch0533: hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch -Patch0534: hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch -Patch0535: hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch -Patch0536: hw-arm-smmuv3-Implement-fault-injection.patch -Patch0537: hw-arm-smmuv3-Allow-MAP-notifiers.patch -Patch0538: pci-Add-return_page_response-pci-ops.patch -Patch0539: vfio-pci-Implement-return_page_response-page-respons.patch -Patch0540: vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch -Patch0541: vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch -Patch0542: vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch -Patch0543: vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch -Patch0544: vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch -Patch0545: hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch BuildRequires: flex BuildRequires: gcc +BuildRequires: make BuildRequires: bison BuildRequires: texinfo BuildRequires: perl-podlators -BuildRequires: kernel BuildRequires: chrpath BuildRequires: gettext BuildRequires: python-sphinx +BuildRequires: ninja-build BuildRequires: zlib-devel BuildRequires: zstd-devel @@ -588,7 +1054,6 @@ BuildRequires: libattr-devel BuildRequires: libcurl-devel BuildRequires: libjpeg-devel BuildRequires: libpng-devel -BuildRequires: brlapi-devel BuildRequires: pixman-devel BuildRequires: libusbx-devel BuildRequires: bzip2-devel @@ -599,16 +1064,29 @@ BuildRequires: libudev-devel BuildRequires: pam-devel BuildRequires: perl-Test-Harness BuildRequires: python3-devel +%if %{with rbd} BuildRequires: librbd-devel +%endif BuildRequires: krb5-devel BuildRequires: libssh-devel BuildRequires: glib2 -BuildRequires: spice-server-devel >= 0.12.5 -BuildRequires: spice-protocol >= 0.12.3 -%ifarch aarch64 BuildRequires: libfdt-devel BuildRequires: virglrenderer-devel +BuildRequires: libslirp-devel +BuildRequires: liburing-devel +%ifarch loongarch64 +BuildRequires: spice-server-devel %endif +%ifarch x86_64 +BuildRequires: qatzip-devel +BuildRequires: intel-qpl-devel +%endif + +BuildRequires: glibc-static glib2-static zlib-static libatomic-static + +# for upgrade from qemu-kvm +Provides: qemu-kvm +Obsoletes: qemu-kvm < 11:8.2.0 Requires(post): /usr/bin/getent Requires(post): /usr/sbin/groupadd @@ -617,6 +1095,10 @@ Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units Requires(postun): qemu-block-iscsi +Requires(postun): qemu-block-curl +Requires(postun): qemu-hw-usb-host +Requires: libgcc +Requires: liburing %description @@ -654,10 +1136,12 @@ Summary: QEMU command line tool for manipulating disk images %description img This package provides a command line tool for manipulating disk images +%if %{with rbd} %package block-rbd Summary: Qemu-block-rbd %description block-rbd This package provides RBD support for Qemu +%endif %package block-ssh Summary: Qemu-block-ssh @@ -669,6 +1153,16 @@ Summary: Qemu-block-iscsi %description block-iscsi This package provides block-iscsi support for Qemu +%package block-curl +Summary: Qemu-block-curl +%description block-curl +This package provides block-curl support for Qemu + +%package hw-usb-host +Summary: Qemu-hw-usb-host +%description hw-usb-host +This package provides hw-usb-host support for Qemu + %ifarch %{ix86} x86_64 %package seabios Summary: QEMU seabios @@ -676,6 +1170,63 @@ Summary: QEMU seabios This package include bios-256k.bin and bios.bin of seabios %endif +%package system-aarch64 +Summary: Qemu-system-aarch64 +Requires: qemu +%description system-aarch64 +This package provides the QEMU system emulator for AArch64. + +%package system-arm +Summary: Qemu-system-arm +Requires: qemu +%description system-arm +This package provides the QEMU system emulator for ARM. + +%package system-x86_64 +Summary: Qemu-system-x86_64 +Requires: qemu +%description system-x86_64 +This package provides the QEMU system emulator for x86_64. + +%package system-ppc64 +Summary: Qemu-system-ppc64 +Requires: qemu +%description system-ppc64 +This package provides the QEMU system emulator for ppc64le. + +%package system-riscv +Summary: Qemu-system-riscv32, Qemu-system-riscv64 +Requires: qemu +%description system-riscv +This package provides the QEMU system emulator for riscv. + +%package system-loongarch64 +Summary: Qemu-system-loongarch64 +Requires: qemu +Requires: spice-gtk +%description system-loongarch64 +This package provides the QEMU system emulator for loongarch64. + +%package user +Summary: Qemu-user +Requires: qemu +%description user +This package provides the QEMU user emulator for multi-arch. + +%package user-static +Summary: Qemu-user-static +%description user-static +This package provides the statically-linked QEMU user emulator for multi-arch. +These static emulators are particularly useful for container builds. + +%package user-binfmt +Summary: QEMU user mode emulation of qemu-user-static +Requires: systemd >= 197 +Requires(post): systemd +Requires(postun): systemd +%description user-binfmt +System level emulation of foreign architectures + %prep %setup -q -n qemu-%{version}%{?rcstr} %autopatch -p1 @@ -683,20 +1234,58 @@ This package include bios-256k.bin and bios.bin of seabios %build %ifarch x86_64 buildarch="x86_64-softmmu" +targetarch="aarch64-softmmu arm-softmmu riscv32-softmmu riscv64-softmmu \ + aarch64-linux-user arm-linux-user riscv32-linux-user riscv64-linux-user" +usermode_emulator="qemu-aarch64 qemu-arm qemu-riscv32 qemu-riscv64" +usermode_static="qemu-aarch64-static qemu-arm-static qemu-riscv32-static qemu-riscv64-static" %endif %ifarch aarch64 buildarch="aarch64-softmmu" +targetarch="x86_64-softmmu arm-softmmu riscv32-softmmu riscv64-softmmu \ + arm-linux-user riscv32-linux-user riscv64-linux-user" +usermode_emulator="qemu-arm qemu-riscv32 qemu-riscv64" +usermode_static="qemu-arm-static qemu-riscv32-static qemu-riscv64-static" +%endif + +%ifarch ppc64le +buildarch="ppc64-softmmu" +targetarch="x86_64-softmmu aarch64-softmmu arm-softmmu riscv32-softmmu riscv64-softmmu \ + aarch64-linux-user arm-linux-user riscv32-linux-user riscv64-linux-user" +usermode_emulator="qemu-aarch64 qemu-arm qemu-riscv32 qemu-riscv64" +usermode_static="qemu-aarch64-static qemu-arm-static qemu-riscv32-static qemu-riscv64-static" +%endif + +%ifarch loongarch64 +buildarch="loongarch64-softmmu" +targetarch="x86_64-softmmu aarch64-softmmu arm-softmmu riscv32-softmmu riscv64-softmmu \ + aarch64-linux-user arm-linux-user riscv32-linux-user riscv64-linux-user" +usermode_emulator="qemu-aarch64 qemu-arm qemu-riscv32 qemu-riscv64" +usermode_static="qemu-aarch64-static qemu-arm-static qemu-riscv32-static qemu-riscv64-static" +%endif + +%ifarch riscv64 +buildarch="riscv64-softmmu" +targetarch="x86_64-softmmu aarch64-softmmu arm-softmmu riscv32-softmmu \ + aarch64-linux-user arm-linux-user riscv32-linux-user" +usermode_emulator="qemu-aarch64 qemu-arm qemu-riscv32" +usermode_static="qemu-aarch64-static qemu-arm-static qemu-riscv32-static" %endif buildldflags="VL_LDFLAGS=-Wl,--build-id" +qemubuilddir="build" + +tar xf %{SOURCE4} +cd BinDir/ +\cp -r -a * ../ +cd ../ ./configure \ --prefix=%{_prefix} \ - --target-list=${buildarch} \ - --extra-cflags="%{optflags} -fPIE -DPIE -fPIC" \ - --extra-ldflags="-Wl,--build-id -pie -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack" \ + --target-list="${buildarch} ${targetarch}" \ + --extra-cflags="%{optflags} -fPIE -DPIE -fPIC -ftls-model=initial-exec" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack" \ --datadir=%{_datadir} \ - --docdir=%{_docdir}/%{name} \ + --docdir=%{_docdir}/ \ --libdir=%{_libdir} \ --libexecdir=%{_libexecdir} \ --localstatedir=%{_localstatedir} \ @@ -705,7 +1294,8 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --firmwarepath=%{_datadir}/%{name} \ --with-pkgversion=%{name}-%{version}-%{release} \ --python=/usr/bin/python3 \ - --disable-slirp \ + --enable-slirp \ + --enable-slirp-smbd \ --enable-gtk \ --enable-docs \ --enable-guest-agent \ @@ -718,33 +1308,84 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-tcg \ --enable-rdma \ --enable-linux-aio \ + --enable-linux-io-uring \ --enable-cap-ng \ --enable-vhost-user \ + --enable-vhost-net \ + --enable-vhost-kernel \ + --enable-vhost-user-blk-server \ + --enable-vhost-vdpa \ --enable-tpm \ --enable-modules \ --enable-libssh \ - --enable-spice \ -%ifarch aarch64 --enable-fdt \ --enable-virglrenderer \ -%endif --enable-cap-ng \ --enable-libusb \ - --disable-bluez \ +%if %{with rbd} + --enable-rbd \ +%else + --disable-rbd \ +%endif --disable-dmg \ --disable-qcow1 \ --disable-vdi \ --disable-vvfat \ --disable-qed \ --disable-parallels \ - --disable-sheepdog \ --disable-capstone \ --disable-smartcard \ - --enable-zstd + --enable-zstd \ + --disable-brlapi \ + --disable-plugins \ + --enable-debug make %{?_smp_mflags} $buildldflags V=1 -cp -a ${buildarch}/qemu-system-* qemu-kvm + +cp ${qemubuilddir}/${buildarch}/qemu-system-* qemu-kvm + +# Independent static build for user mode emulators +# Dynamically generate static_targets from usermode_static +static_targets="" +for binary in $usermode_static; do + arch=$(echo $binary | sed 's/qemu-//' | sed 's/-static//') + static_targets="$static_targets $arch-linux-user" +done +# Remove leading space +static_targets=$(echo $static_targets | sed 's/^ *//') + +# Independent static build directory +mkdir -p static_builddir +cd static_builddir + +../configure \ + --prefix=%{_prefix} \ + --enable-attr \ + --enable-linux-user \ + --enable-pie \ + --enable-tcg \ + --disable-install-blobs \ + --target-list="$static_targets" \ + --static + +make %{?_smp_mflags} V=1 +cd .. + +# Generate file lists for user packages +for i in ${usermode_emulator}; do + echo "%{_bindir}/${i}" >> %{name}.user +done + +for binary in ${usermode_static}; do + echo "%{_bindir}/${binary}" >> %{name}.user-static +done + +# Create list of static binfmt configurations for %files section +for binary in ${usermode_static}; do + arch=$(echo $binary | sed 's/qemu-//' | sed 's/-static//') + echo "%{_exec_prefix}/lib/binfmt.d/qemu-${arch}-static.conf" >> %{name}.user-static-binfmt +done %install @@ -755,7 +1396,39 @@ make %{?_smp_mflags} DESTDIR=%{buildroot} \ install -m 0755 qemu-kvm %{buildroot}%{_libexecdir}/ ln -s %{_libexecdir}/qemu-kvm %{buildroot}/%{_bindir}/qemu-kvm -rm %{buildroot}/%{_bindir}/qemu-system-* +# Install user-static binaries +%define static_buildroot %{buildroot}/static/ +mkdir -p %{static_buildroot} + +pushd static_builddir +make DESTDIR=%{static_buildroot} install + +# Duplicates what the main build installs and we don't +# need second copy with a -static suffix +rm -f %{static_buildroot}%{_bindir}/qemu-trace-stap +popd +# back to root build directory + +# Rename all QEMU user emulators to have a -static suffix +for src in %{static_buildroot}%{_bindir}/qemu-*; do + mv $src %{buildroot}%{_bindir}/$(basename $src)-static; done + +rm -rf %{static_buildroot} + +# Install binfmt configuration files +%global binfmt_dir %{buildroot}%{_exec_prefix}/lib/binfmt.d +mkdir -p %{binfmt_dir} + +# Generate binfmt configuration for dynamic emulators +./scripts/qemu-binfmt-conf.sh --systemd ALL --exportdir %{binfmt_dir} --qemu-path %{_bindir} +for i in %{binfmt_dir}/*; do mv $i $(echo $i | sed 's/.conf/-dynamic.conf/'); done + +# Generate binfmt configuration for static emulators from dynamic ones +for regularfmt in %{binfmt_dir}/*; do + staticfmt="$(echo $regularfmt | sed 's/-dynamic/-static/g')" + cat $regularfmt | tr -d '\n' | sed "s/:$/-static:F/" > $staticfmt +done + install -D -p -m 0644 contrib/systemd/qemu-pr-helper.service %{buildroot}%{_unitdir}/qemu-pr-helper.service install -D -p -m 0644 contrib/systemd/qemu-pr-helper.socket %{buildroot}%{_unitdir}/qemu-pr-helper.socket install -D -p -m 0644 qemu.sasl %{buildroot}%{_sysconfdir}/sasl2/qemu.conf @@ -772,63 +1445,78 @@ touch %{buildroot}%{_localstatedir}/log/qga-fsfreeze-hook.log # For qemu docs package %global qemudocdir %{_docdir}/%{name} rm -rf %{buildroot}%{qemudocdir}/specs -install -D -p -m 0644 -t %{buildroot}%{qemudocdir} Changelog README COPYING COPYING.LIB LICENSE +rm -rf %{buildroot}%{qemudocdir}/.buildinfo +rm -rf %{buildroot}%{qemudocdir}/objects.inv +rm -rf %{buildroot}%{qemudocdir}/genindex.html +rm -rf %{buildroot}%{qemudocdir}/index.html +install -D -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst COPYING COPYING.LIB LICENSE chmod -x %{buildroot}%{_mandir}/man1/* - -%ifarch aarch64 -rm -rf %{buildroot}%{_datadir}/%{name}/vgabios*bin -rm -rf %{buildroot}%{_datadir}/%{name}/bios*.bin -rm -rf %{buildroot}%{_datadir}/%{name}/linuxboot.bin -rm -rf %{buildroot}%{_datadir}/%{name}/kvmvapic.bin -rm -rf %{buildroot}%{_datadir}/%{name}/sgabios.bin -rm -rf %{buildroot}%{_datadir}/%{name}/multiboot.bin -rm -rf %{buildroot}%{_datadir}/%{name}/linuxboot_dma.bin -rm -rf %{buildroot}%{_datadir}/%{name}/pvh.bin -%endif -%ifarch x86_64 rm -rf %{buildroot}%{_datadir}/%{name}/vgabios-ati.bin -%endif +rm -rf %{buildroot}%{_datadir}/%{name}/bios-microvm.bin rm -rf %{buildroot}%{_datadir}/%{name}/openbios-* -rm -rf %{buildroot}%{_datadir}/%{name}/slof.bin rm -rf %{buildroot}%{_datadir}/%{name}/QEMU,*.bin rm -rf %{buildroot}%{_datadir}/%{name}/bamboo.dtb rm -rf %{buildroot}%{_datadir}/%{name}/canyonlands.dtb rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware.img rm -rf %{buildroot}%{_datadir}/%{name}/palcode-clipper rm -rf %{buildroot}%{_datadir}/%{name}/petalogix-* -rm -rf %{buildroot}%{_datadir}/%{name}/ppc_* rm -rf %{buildroot}%{_datadir}/%{name}/qemu_vga.ndrv rm -rf %{buildroot}%{_datadir}/%{name}/s390-* +%ifnarch ppc64le +rm -rf %{buildroot}%{_datadir}/%{name}/slof.bin rm -rf %{buildroot}%{_datadir}/%{name}/skiboot.lid rm -rf %{buildroot}%{_datadir}/%{name}/spapr-* +rm -rf %{buildroot}%{_datadir}/%{name}/ppc_* +%endif rm -rf %{buildroot}%{_datadir}/%{name}/u-boot* +rm -rf %{buildroot}%{_datadir}/%{name}/core3-hmcode +rm -rf %{buildroot}%{_datadir}/%{name}/core3-reset +rm -rf %{buildroot}%{_datadir}/%{name}/uefi-bios-sw rm -rf %{buildroot}%{_bindir}/ivshmem* rm -f %{buildroot}%{_datadir}/%{name}/edk2* rm -rf %{buildroot}%{_datadir}/%{name}/firmware -rm -rf %{buildroot}%{_datadir}/%{name}/opensbi* rm -rf %{buildroot}%{_datadir}/%{name}/qemu-nsis.bmp -rm -rf %{buildroot}%{_libdir}/%{name}/audio-oss.so rm -rf %{buildroot}%{_libdir}/%{name}/audio-pa.so -rm -rf %{buildroot}%{_libdir}/%{name}/block-curl.so rm -rf %{buildroot}%{_libdir}/%{name}/block-gluster.so +rm -rf %{buildroot}%{_libdir}/%{name}/ui-sdl.so +rm -rf %{buildroot}%{_libdir}/%{name}/chardev-baum.so +%ifnarch loongarch64 +rm -rf %{buildroot}%{_libdir}/%{name}/audio-oss.so +rm -rf %{buildroot}%{_libdir}/%{name}/audio-spice.so rm -rf %{buildroot}%{_libdir}/%{name}/ui-curses.so rm -rf %{buildroot}%{_libdir}/%{name}/ui-gtk.so -rm -rf %{buildroot}%{_libdir}/%{name}/ui-sdl.so +rm -rf %{buildroot}%{_libdir}/%{name}/chardev-spice.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-qxl.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-usb-redirect.so +rm -rf %{buildroot}%{_libdir}/%{name}/ui-opengl.so +rm -rf %{buildroot}%{_libdir}/%{name}/ui-spice-app.so +rm -rf %{buildroot}%{_libdir}/%{name}/ui-spice-core.so +%endif + rm -rf %{buildroot}%{_libexecdir}/vhost-user-gpu rm -rf %{buildroot}%{_datadir}/%{name}/vhost-user/50-qemu-gpu.json +%ifarch ppc64le +%endif +%if %{with rbd} strip %{buildroot}%{_libdir}/%{name}/block-rbd.so +%endif strip %{buildroot}%{_libdir}/%{name}/block-iscsi.so +strip %{buildroot}%{_libdir}/%{name}/block-curl.so strip %{buildroot}%{_libdir}/%{name}/block-ssh.so +strip %{buildroot}%{_libdir}/%{name}/hw-usb-host.so for f in %{buildroot}%{_bindir}/* %{buildroot}%{_libdir}/* \ %{buildroot}%{_libexecdir}/*; do - if file $f | grep -q ELF | grep -q -i shared; then chrpath --delete $f; fi + if file $f | grep ELF | grep -q -i shared || readelf -d $f | grep -q -i rpath; then chrpath --delete $f; fi done %check -make check V=1 +echo "#define CONFIG_DISABLE_QEMU_LOG" >> build/config-host.h +make %{?_smp_mflags} $buildldflags V=1 +make check V=1 %{?_smp_mflags} %pre getent group kvm >/dev/null || groupadd -g 36 -r kvm @@ -848,6 +1536,23 @@ getent passwd qemu >/dev/null || \ %dir %{_datadir}/%{name}/ %{_libexecdir}/qemu-kvm %{_bindir}/qemu-kvm +%{_libdir}/%{name}/accel-qtest-*.so +%ifarch x86_64 +%{_libdir}/%{name}/accel-tcg-*.so +%{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%{_libdir}/%{name}/hw-display-virtio-vga.so +%endif + +%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +%{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +%{_libdir}/%{name}/hw-display-virtio-gpu-pci.so +%{_libdir}/%{name}/hw-display-virtio-gpu.so +%{_libdir}/%{name}/audio-dbus.so +%{_libdir}/%{name}/ui-dbus.so +%{_libdir}/%{name}/ui-egl-headless.so +%{_docdir}/%{name}/dbus-dbusindex.html +%{_datadir}/%{name}/vof-nvram.bin +%{_datadir}/%{name}/vof.bin %{_datadir}/%{name}/efi-virtio.rom %{_datadir}/%{name}/efi-e1000.rom %{_datadir}/%{name}/efi-e1000e.rom @@ -862,6 +1567,7 @@ getent passwd qemu >/dev/null || \ %{_datadir}/%{name}/pxe-pcnet.rom %{_datadir}/%{name}/pxe-rtl8139.rom %{_datadir}/%{name}/pxe-eepro100.rom +%{_datadir}/%{name}/qboot.rom %{_datadir}/%{name}/trace-events-all %{_datadir}/applications/qemu.desktop %{_datadir}/icons/hicolor/*/apps/* @@ -870,7 +1576,7 @@ getent passwd qemu >/dev/null || \ %{_bindir}/qemu-edid %{_bindir}/qemu-keymap %{_bindir}/qemu-pr-helper -%{_bindir}/virtfs-proxy-helper +%{_libexecdir}/virtfs-proxy-helper %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket %attr(4755, root, root) %{_libexecdir}/qemu-bridge-helper @@ -896,34 +1602,104 @@ getent passwd qemu >/dev/null || \ %{_datadir}/%{name}/linuxboot_dma.bin %{_datadir}/%{name}/pvh.bin %{_datadir}/%{name}/multiboot.bin +%{_datadir}/%{name}/multiboot_dma.bin +%{_datadir}/%{name}/kvmvapic.bin +%endif + + +%files system-aarch64 +%{_bindir}/qemu-system-aarch64 + +%files system-arm +%{_bindir}/qemu-system-arm +%{_datadir}/%{name}/npcm7xx_bootrom.bin + +%files system-x86_64 +%{_bindir}/qemu-system-x86_64 +%ifnarch x86_64 +%{_libdir}/%{name}/accel-tcg-*.so +%{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%{_libdir}/%{name}/hw-display-virtio-vga.so +%{_datadir}/%{name}/bios.bin +%{_datadir}/%{name}/bios-256k.bin +%{_datadir}/%{name}/vgabios.bin +%{_datadir}/%{name}/vgabios-cirrus.bin +%{_datadir}/%{name}/vgabios-qxl.bin +%{_datadir}/%{name}/vgabios-stdvga.bin +%{_datadir}/%{name}/vgabios-vmware.bin +%{_datadir}/%{name}/vgabios-virtio.bin +%{_datadir}/%{name}/vgabios-ramfb.bin +%{_datadir}/%{name}/vgabios-bochs-display.bin +%{_datadir}/%{name}/linuxboot.bin +%{_datadir}/%{name}/linuxboot_dma.bin +%{_datadir}/%{name}/pvh.bin +%{_datadir}/%{name}/multiboot.bin +%{_datadir}/%{name}/multiboot_dma.bin %{_datadir}/%{name}/kvmvapic.bin -%{_datadir}/%{name}/sgabios.bin +%endif + +%ifarch ppc64le +%files system-ppc64 +%{_bindir}/qemu-system-ppc64 +%{_datadir}/%{name}/slof.bin +%{_datadir}/%{name}/skiboot.lid +%endif + +%files system-riscv +%{_bindir}/qemu-system-riscv32 +%{_bindir}/qemu-system-riscv64 +%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* +%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* +%ifnarch ppc64le +%endif + +%ifarch loongarch64 +%files system-loongarch64 +%{_bindir}/qemu-system-loongarch64 +%{_libdir}/%{name}/audio-oss.so +%{_libdir}/%{name}/ui-curses.so +%{_libdir}/%{name}/ui-gtk.so +%{_libdir}/%{name}/audio-spice.so +%{_libdir}/%{name}/chardev-spice.so +%{_libdir}/%{name}/hw-display-qxl.so +%{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so +%{_libdir}/%{name}/hw-usb-redirect.so +%{_libdir}/%{name}/ui-opengl.so +%{_libdir}/%{name}/ui-spice-app.so +%{_libdir}/%{name}/ui-spice-core.so +%endif + +%ifnarch loongarch64 %endif %files help %dir %{qemudocdir} -%doc %{qemudocdir}/qemu-doc.html -%doc %{qemudocdir}/qemu-doc.txt -%doc %{qemudocdir}/qemu-ga-ref.html -%doc %{qemudocdir}/qemu-ga-ref.txt -%doc %{qemudocdir}/qemu-qmp-ref.html -%doc %{qemudocdir}/qemu-qmp-ref.txt +%doc %{qemudocdir}/about +%doc %{qemudocdir}/devel %doc %{qemudocdir}/interop -%doc %{qemudocdir}/README -%doc %{qemudocdir}/Changelog +%doc %{qemudocdir}/search* +%doc %{qemudocdir}/_static +%doc %{qemudocdir}/system +%doc %{qemudocdir}/tools +%doc %{qemudocdir}/user +%doc %{qemudocdir}/README.rst %{_mandir}/man1/qemu.1* +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man1/qemu-storage-daemon.1* %{_mandir}/man1/virtfs-proxy-helper.1* %{_mandir}/man7/qemu-block-drivers.7* %{_mandir}/man7/qemu-cpu-models.7* %{_mandir}/man7/qemu-ga-ref.7* %{_mandir}/man7/qemu-qmp-ref.7* -%{_mandir}/man1/qemu-img.1* +%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* +%{_mandir}/man8/qemu-ga.8* %{_mandir}/man8/qemu-nbd.8* +%{_mandir}/man8/qemu-pr-helper.8* + %files guest-agent %defattr(-,root,root,-) %{_bindir}/qemu-ga -%{_mandir}/man8/qemu-ga.8* %{_unitdir}/qemu-guest-agent.service %{_udevdir}/99-qemu-guest-agent.rules %ghost %{_localstatedir}/log/qga-fsfreeze-hook.log @@ -932,9 +1708,12 @@ getent passwd qemu >/dev/null || \ %{_bindir}/qemu-img %{_bindir}/qemu-io %{_bindir}/qemu-nbd +%{_bindir}/qemu-storage-daemon +%if %{with rbd} %files block-rbd %{_libdir}/%{name}/block-rbd.so +%endif %files block-ssh %{_libdir}/%{name}/block-ssh.so @@ -942,669 +1721,1185 @@ getent passwd qemu >/dev/null || \ %files block-iscsi %{_libdir}/%{name}/block-iscsi.so +%files block-curl +%{_libdir}/%{name}/block-curl.so + +%files hw-usb-host +%{_libdir}/%{name}/hw-usb-host.so + %ifarch %{ix86} x86_64 %files seabios %{_datadir}/%{name}/bios-256k.bin %{_datadir}/%{name}/bios.bin %endif -%changelog -* Wed Aug 04 2021 Chen Qun -- vfio: Support host translation granule size -- vfio/migrate: Move switch of dirty tracking into vfio_memory_listener -- vfio: Fix unregister SaveVMHandler in vfio_migration_finalize -- migration/ram: Reduce unnecessary rate limiting -- migration/ram: Optimize ram_save_host_page() -- qdev/monitors: Fix reundant error_setg of qdev_add_device -- linux-headers: update against 5.10 and manual clear vfio dirty log series -- vfio: Maintain DMA mapping range for the container -- vfio/migration: Add support for manual clear vfio dirty log -- hw/arm/smmuv3: Support 16K translation granule -- hw/arm/smmuv3: Set the restoration priority of the vSMMUv3 explicitly -- hw/vfio/common: trace vfio_connect_container operations -- update-linux-headers: Import iommu.h -- vfio.h and iommu.h header update against 5.10 -- memory: Add new fields in IOTLBEntry -- hw/arm/smmuv3: Improve stage1 ASID invalidation -- hw/arm/smmu-common: Allow domain invalidation for NH_ALL/NSNH_ALL -- memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region attribute -- memory: Add IOMMU_ATTR_MSI_TRANSLATE IOMMU memory region attribute -- memory: Introduce IOMMU Memory Region inject_faults API -- iommu: Introduce generic header -- pci: introduce PCIPASIDOps to PCIDevice -- vfio: Force nested if iommu requires it -- vfio: Introduce hostwin_from_range helper -- vfio: Introduce helpers to DMA map/unmap a RAM section -- vfio: Set up nested stage mappings -- vfio: Pass stage 1 MSI bindings to the host -- vfio: Helper to get IRQ info including capabilities -- vfio/pci: Register handler for iommu fault -- vfio/pci: Set up the DMA FAULT region -- vfio/pci: Implement the DMA fault handler -- hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute -- hw/arm/smmuv3: Store the PASID table GPA in the translation config -- hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA invalidation -- hw/arm/smmuv3: Fill the IOTLBEntry leaf field on NH_VA invalidation -- hw/arm/smmuv3: Pass stage 1 configurations to the host -- hw/arm/smmuv3: Implement fault injection -- hw/arm/smmuv3: Allow MAP notifiers -- pci: Add return_page_response pci ops -- vfio/pci: Implement return_page_response page response callback -- vfio/common: Avoid unmap ram section at vfio_listener_region_del() in nested mode -- vfio: Introduce helpers to mark dirty pages of a RAM section -- vfio: Add vfio_prereg_listener_log_sync in nested stage -- vfio: Add vfio_prereg_listener_log_clear to re-enable mark dirty pages -- vfio: Add vfio_prereg_listener_global_log_start/stop in nested stage -- hw/arm/smmuv3: Post-load stage 1 configurations to the host - -* Tue Aug 03 2021 Chen Qun -- kvm: split too big memory section on several memslots -- kvm: Reallocate dirty_bmap when we change a slot -- accel: kvm: Fix memory waste under mismatch page size -- memory: Skip dirty tracking for un-migratable memory regions -- Fix use after free in vfio_migration_probe -- vfio: Make migration support experimental -- vfio: Change default dirty pages tracking behavior during migration -- vfio: Fix vfio_listener_log_sync function name typo - -* Thu Jul 29 2021 Chen Qun -- vfio: Move the saving of the config space to the right place in VFIO migration -- vfio: Set the priority of the VFIO VM state change handler explicitly -- vfio: Avoid disabling and enabling vectors repeatedly in VFIO migration - -* Thu Jul 29 2021 imxcc -- hw/net: fix vmxnet3 live migration -- include: Make headers more self-contained -- migration: register_savevm_live doesn't need dev -- vmstate: add qom interface to get id -- linux headers: Update against "Add migration support for VFIO devices" -- vfio: Add function to unmap VFIO region -- vfio: Add vfio_get_object callback to VFIODeviceOps -- vfio: Add save and load functions for VFIO PCI devices -- vfio: Add migration region initialization and finalize function -- vfio: Add VM state change handler to know state of VM -- vfio: Add migration state change notifier -- vfio: Register SaveVMHandlers for VFIO device -- vfio: Add save state functions to SaveVMHandlers -- vfio: Add load state functions to SaveVMHandlers -- memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled -- vfio: Get migration capability flags for container -- vfio: Add function to start and stop dirty pages tracking -- vfio: Add vfio_listener_log_sync to mark dirty pages -- vfio: Dirty page tracking when vIOMMU is enabled -- vfio: Add ioctl to get dirty pages bitmap during dma unmap -- vfio: Make vfio-pci device migration capable -- qapi: Add VFIO devices migration stats in Migration stats - -* Wed Jul 28 2021 imxcc -- object: return self in object_ref() -- file-posix: Fix leaked fd in raw_open_common() error path -- qga/commands-posix: fix use after free of local_err -- qmp: fix leak on callbacks that return both value and error -- migration: fix cleanup_bh leak on resume -- Fixed integer overflow in e1000e -- lm32-do-not-leak-memory-on-object_new-object_unref.patch -- cris-do-not-leak-struct-cris_disasm_data.patch -- hppa-fix-leak-from-g_strdup_printf.patch -- mcf5208-fix-leak-from-qemu_allocate_irqs.patch -- microblaze-fix-leak-of-fdevice-tree-blob.patch -- ide-fix-leak-from-qemu_allocate_irqs.patch -- make-check-unit-use-after-free-in-test-opts-visitor.patch -- virtio-pci: fix queue_enable write -- hw/block/nvme: fix pci doorbell size calculation -- hw/block/nvme: fix pin-based interrupt behavior -- colo-compare: Fix memory leak in packet_enqueue() -- chardev/tcp: Fix error message double free error -- qga: Plug unlikely memory leak in guest-set-memory-blocks -- migration: Count new_dirty instead of real_dirty -- char: fix use-after-free with dup chardev & reconnect -- qga: fix assert regression on guest-shutdown -- xhci: fix valid.max_access_size to access address registers -- block/qcow2: do free crypto_opts in qcow2_close() -- qemu-img: free memory before re-assign -- block/qcow2-threads: fix qcow2_decompress -- block: Avoid memleak on qcow2 image info failure -- block: bdrv_set_backing_bs: fix use-after-free -- hmp/vnc: Fix info vnc list leak -- migration/colo: fix use after free of local_err -- migration/ram: fix use after free of local_err -- block/mirror: fix use after free of local_err -- block: fix bdrv_root_attach_child forget to unref child_bs -- virtio-serial-bus: Plug memory leak on realize() error paths -- virtio-blk: delete vqs on the error path in realize() -- fix vhost_user_blk_watch crash -- vhost-user-blk: delay vhost_user_blk_disconnect -- hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch -- linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch -- migration-rdma-cleanup-rdma-context-before-g_free-to.patch -- pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch -- usbredir-fix-buffer-overflow-on-vmload.patch -- apic: Use 32bit APIC ID for migration instance-ID -- audio: fix integer overflow -- display/bochs-display: fix memory leak -- migration: Change SaveStateEntry.instance_id into uint32_t -- migration: Define VMSTATE_INSTANCE_ID_ANY -- migration/multifd: clean pages after filling packet -- migration/multifd: not use multifd during postcopy -- virtio: add ability to delete vq through a pointer -- virtio-pmem: do delete rq_vq in virtio_pmem_unrealize -- virtio-crypto: do delete ctrl_vq in virtio_crypto_device_unrealize -- vhost-user-blk: delete virtioqueues in unrealize to fix memleaks -- vhost-user-blk: convert to new virtio_delete_queue -- block/nbd: extract the common cleanup code -- virtio: gracefully handle invalid region caches -- migration/savevm: release gslist after dump_vmstate_json -- virtio-input: fix memory leak on unrealize -- target/arm: only set ID_PFR1_EL1.GIC for AArch32 guest -- target/arm: clear EL2 and EL3 only when kvm is not enabled -- target/arm: Update the ID registers of Kunpeng-920 - -* Fri Jul 23 2021 imxcc -- hw/arm/virt: Init PMU for hotplugged vCPU - -* Fri Jul 23 2021 Chen Qun -- vl: Don't mismatch g_strsplit()/g_free() -- seqlock: fix seqlock_write_unlock_impl function -- target/i386: kvm: initialize microcode revision from KVM -- target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR - -* Thu Jul 22 2021 Chen Qun -- qapi/block-core: Introduce BackupCommon -- drive-backup: create do_backup_common -- blockdev-backup: utilize do_backup_common -- qapi: add BitmapSyncMode enum -- block/backup: Add mirror sync mode 'bitmap' -- block/backup: add 'never' policy to bitmap sync mode -- block/backup: loosen restriction on readonly bitmaps -- block/backup: hoist bitmap check into QMP interface -- block/backup: deal with zero detection -- mirror: Fix bdrv_has_zero_init() use -- blockdev: fix coding style issues in drive_backup_prepare -- blockdev: unify qmp_drive_backup and drive-backup transaction paths -- blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths -- blockdev: honor bdrv_try_set_aio_context() context requirements -- blockdev: Return bs to the proper context on snapshot abort -- block: Fix cross-AioContext blockdev-snapshot - -* Thu Jul 22 2021 Chen Qun -- hw/pci/pcie: Move hot plug capability check to pre_plug callback - -* Thu Jul 22 2021 Chen Qun -- migration: use migration_is_active to represent active state -- migration: Rate limit inside host pages - -* Thu Jul 22 2021 Chen Qun -- virtio-net: delete also control queue when TX/RX deleted -- target/i386: enable monitor and ucode revision with -cpu max -- target/i386: set the CPUID level to 0x14 on old machine-type -- target/i386: kvm: initialize feature MSRs very early -- target/i386: add a ucode-rev property - -* Thu Jul 22 2021 Chen Qun -- qcow2: Fix qcow2_alloc_cluster_abort() for external data file -- mirror: Wait only for in-flight operations - -* Wed Jul 21 2021 Chen Qun -- block/curl: HTTP header fields allow whitespace around values -- block/curl: HTTP header field names are case insensitive -- backup: Improve error for bdrv_getlength() failure -- mirror: Make sure that source and target size match -- iotests/143: Create socket in $SOCK_DIR -- nbd/server: Avoid long error message assertions CVE-2020-10761 -- block: Call attention to truncation of long NBD exports -- qemu-img convert: Don't pre-zero images - -* Wed Jul 21 2021 Chen Qun -- virtio: don't enable notifications during polling -- usbredir: Prevent recursion in usbredir_write -- xhci: recheck slot status -- vhost: Add names to section rounded warning -- vhost-user: Print unexpected slave message types -- contrib/libvhost-user: Protect slave fd with mutex -- libvhost-user: Fix some memtable remap cases -- xics: Don't deassert outputs -- i386: Resolve CPU models to v1 by default - -* Wed Jul 21 2021 imxcc -- target/i386: handle filtered_features in a new function mark_unavailable_features -- target/i386: introduce generic feature dependency mechanism -- target/i386: expand feature words to 64 bits -- target/i386: add VMX definitions -- vmxcap: correct the name of the variables -- target/i386: add VMX features -- target/i386: work around KVM_GET_MSRS bug for secondary execution controls -- target/i386: add VMX features to named CPU models -- target/i386: add two missing VMX features for Skylake and CascadeLake Server -- target/i386: disable VMX features if nested=0 -- i386/cpu: Don't add unavailable_features to env->user_features -- target/i386: do not set unsupported VMX secondary execution controls -- migration: fix multifd_send_pages() next channel -- migration: Make sure that we don't call write() in case of error - -* Tue Jul 20 2021 Chen Qun -- crypto: add support for nettle's native XTS impl -- crypto: add support for gcrypt's native XTS impl -- tests: benchmark crypto with fixed data size, not time period -- tests: allow filtering crypto cipher benchmark tests - -* Tue Jul 20 2021 Chen Qun -- target/i386: Introduce Denverton CPU model -- target/i386: Add Snowridge-v2 (no MPX) CPU model -- i386: Add CPUID bit for CLZERO and XSAVEERPTR - -* Mon Jul 19 2021 Chen Qun -- x86: Intel AVX512_BF16 feature enabling -- i386: Add MSR feature bit for MDS-NO -- i386: Add macro for stibp -- i386: Add new CPU model Cooperlake -- target/i386: Add new bit definitions of MSR_IA32_ARCH_CAPABILITIES -- target/i386: Add missed security features to Cooperlake CPU model -- target/i386: add PSCHANGE_NO bit for the ARCH_CAPABILITIES MSR -- target/i386: Export TAA_NO bit to guests - -* Mon Jul 19 2021 Chen Qun -- hw/net/rocker_of_dpa: fix double free bug of rocker device - -* Mon Jun 28 2021 imxcc -- spec: add gcc buildrequire - -* Mon Jun 21 2021 Chen Qun -- ide: ahci: add check to avoid null dereference (CVE-2019-12067) -- hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register -- usb: limit combined packets to 1 MiB (CVE-2021-3527) - -* Tue Jun 15 2021 Chen Qun -- vhost-user-gpu: fix resource leak in 'vg_resource_create_2d' (CVE-2021-3544) -- vhost-user-gpu: fix memory leak in vg_resource_attach_backing (CVE-2021-3544) -- vhost-user-gpu: fix memory leak while calling 'vg_resource_unref' (CVE-2021-3544) -- vhost-user-gpu: fix memory leak in 'virgl_cmd_resource_unref' (CVE-2021-3544) -- vhost-user-gpu: fix memory leak in 'virgl_resource_attach_backing' (CVE-2021-3544) -- vhost-user-gpu: fix memory disclosure in virgl_cmd_get_capset_info (CVE-2021-3545) -- vhost-user-gpu: fix OOB write in 'virgl_cmd_get_capset' (CVE-2021-3546) - -* Fri May 28 2021 Chen Qun -- blockjob: Fix crash with IOthread when block commit after snapshot - -* Thu 20 May 2021 zhouli57 -- arm/cpu: Fixed function undefined error at compile time under arm - -* Wed May 19 2021 Ming Yang -- add strip for block-iscsi.so, block-rbd.so and block-ssh.so. - -* Wed 19 May 2021 zhouli57 -- util/cacheinfo: fix crash when compiling with uClibc - -* Fri Mar 26 2021 Chen Qun -- hw/pci-host: add pci-intack write method -- pci-host: add pcie-msi read method -- vfio: add quirk device write method -- prep: add ppc-parity write method -- nvram: add nrf51_soc flash read method -- spapr_pci: add spapr msi read method -- tz-ppc: add dummy read/write methods -- imx7-ccm: add digprog mmio write method - -* Thu Mar 18 2021 Chen Qun -- block: Add sanity check when setting retry parameters - -* Wed Mar 17 2021 Huawei Technologies Co., Ltd -- qemu.spec: enable strip for qemu-block-rbd.so and qemu-block-ssh.so - -* Fri Mar 12 2021 Chen Qun -- net: vmxnet3: validate configuration values during activate (CVE-2021-20203) +%files user -f %{name}.user -* Fri Mar 12 2021 Chen Qun -- migration: fix memory leak in qmp_migrate_set_parameters -- migration/tls: fix inverted semantics in multifd_channel_connect -- migration/tls: add error handling in multifd_tls_handshake_thread +%files user-static -f %{name}.user-static +%license COPYING COPYING.LIB LICENSE +# Include static binfmt configurations +%{_exec_prefix}/lib/binfmt.d/qemu-*-static.conf -* Thu Mar 11 2021 Huawei Technologies Co., Ltd -- qemu.spec: add iscsi rpm package requirement - -* Wed Mar 10 2021 Huawei Technologies Co., Ltd -- qemu.spec: make iscsi rpm package - -* Tue Mar 02 2021 Huawei Technologies Co., Ltd -- qemu.spec: Add --enable-zstd compile parameter - -* Fri Feb 26 2021 Huawei Technologies Co., Ltd -- block-backend: Stop retrying when draining +%files user-binfmt +%defattr(-,root,root,-) +# user-binfmt provides systemd registration for binfmt_misc +%{_exec_prefix}/lib/binfmt.d/qemu-*-dynamic.conf -* Fri Feb 26 2021 Huawei Technologies Co., Ltd -- ide:atapi: check io_buffer_index in ide_atapi_cmd_reply_end +%post user-binfmt +/bin/systemctl try-restart systemd-binfmt.service &>/dev/null || : -* Fri Feb 19 2021 Huawei Technologies Co., Ltd -- ati: use vga_read_byte in ati_cursor_define -- sd: sdhci: assert data_count is within fifo_buffer -- msix: add valid.accepts methods to check address +%postun user-binfmt +/bin/systemctl try-restart systemd-binfmt.service &>/dev/null || : -* Thu Feb 04 2021 Huawei Technologies Co., Ltd -- migration: Add multi-thread compress method -- migration: Refactoring multi-thread compress migration -- migration: Add multi-thread compress ops -- migration: Add zstd support in multi-thread compression -- migration: Add compress_level sanity check +%changelog +* Tue Oct 28 2025 huangyan - 11:8.2.0-42 +- Add qemu-user package for arm, aarch64, riscv32, riscv64 targets +- Add user-static, user-binfmt package for statically-linked QEMU user emulator + +* Tue Aug 26 2025 Pengrui Zhang - 11:8.2.0-41 +- target/i386/kvm: Refine VMX controls setting for backward compatibility +- Bugfix: Correctly set vms->bootinfo.confidential in virtCCA senarios. +- hw/arm/virt-acpi-build.c: Migrate fw_cfg creation to common location +- hw/arm/virt-acpi-build.c: Migrate virtio creation to common location +- hw/i386/acpi-microvm.c: Use common function to add virtio in DSDT +- hw/riscv: virt: Make few IMSIC macros and functions public +- hw/riscv/virt-acpi-build.c: Add AIA support in RINTC +- hw/riscv/virt-acpi-build.c: Add IMSIC in the MADT +- hw/riscv/virt-acpi-build.c: Add APLIC in the MADT +- hw/riscv/virt-acpi-build.c: Add CMO information in RHCT +- hw/riscv/virt-acpi-build.c: Add MMU node in RHCT +- hw/pci-host/gpex: Define properties for MMIO ranges +- hw/riscv/virt: Update GPEX MMIO related properties +- hw/riscv/virt-acpi-build.c: Add IO controllers and devices +- hw/riscv/virt-acpi-build.c: Add PLIC in MADT +- hw/riscv/virt.c: fix the interrupts-extended property format of PLIC +- hw/riscv/virt-acpi-build.c: Add namespace devices for PLIC and APLIC +- hw/riscv/virt-acpi-build.c: Update the HID of RISC-V UART +- hw/riscv/virt-acpi-build.c: Generate SPCR table +- hw/riscv/virt-acpi-build.c: Add SRAT and SLIT ACPI tables +- plugins/loader: fix deadlock when resetting/uninstalling a plugin +- smbios: Fix buffer overrun when using path= option + + +* Tue Aug 26 2025 Pengrui Zhang - 11:8.2.0-40 +- hw/acpi: Fix the memory leak issue +- virtio-net: Fix num_buffers for version 1 +- hw/net/cadence_gem: fix register mask initialization +- memory: Export a helper to get intersection of a MemoryRegionSection with a given range +- memory: Change memory_region_set_ram_discard_manager() to return the result +- memory: Unify the definiton of ReplayRamPopulate() and ReplayRamDiscard() +- memory: Introduce generic state change parent class for RamDiscardManager +- memory: Introduce PrivateSharedManager Interface as child of GenericStateManager +- memory: Add the support for PrivateSharedManager Interface +- vfio: Add the support for PrivateSharedManager Interface +- memory: Change NotifyStateClear() definition to return the result +- ram-block-attribute: Add priority listener support for PrivateSharedListener +- linux-headers: Add KVM Arm RME definitions to Linux headers +- kvm: Use kvm_vm_check_extension() where necessary +- include/qom/object.h: New OBJECT_DEFINE_SIMPLE_TYPE{, _WITH_INTERFACES} macros +- target/arm: Add confidential guest support +- KVM: track whether guest state is encrypted +- target/arm/kvm: Return immediately on error in kvm_arch_init() +- target/arm/kvm: Split kvm_arch_get/put_registers +- target/arm/kvm: Create scratch VM as Realm if necessary +- hw/core/loader: Add ROM loader notifier +- target/arm/kvm-rme: Initialize realm +- target/arm/kvm-rme: Initialize vCPU +- target/arm/kvm-rme: Initialize Realm memory +- target/arm/kvm-rme: Add Realm Personalization Value parameter +- target/arm/kvm-rme: Add measurement algorithm property +- target/arm/cpu: Set number of breakpoints and watchpoints in KVM +- target/arm/cpu: Set number of PMU counters in KVM +- target/arm/cpu: Inform about reading confidential CPU registers +- target/arm/kvm-rme: Initialize Realm memory +- target/arm/kvm-rme: Add Realm Personalization Value parameter +- target/arm/kvm-rme: Add measurement algorithm property +- hw/arm/virt: Add support for Arm RME +- hw/arm/virt: Disable DTB randomness for confidential VMs +- hw/arm/virt: Reserve one bit of guest-physical address for RME +- hw/arm/virt: Move virt_flash_create() to machvirt_init() +- hw/arm/virt: Use RAM instead of flash for confidential guest firmware +- hw/core/loader: Add ROM loader notifier +- target/arm/kvm-rme: Initialize Realm memory +- target/arm/kvm-rme: Add measurement algorithm property +- target/arm/kvm-rme: Add Realm Personalization Value parameter +- target/arm/kvm-rme: Initialize Realm memory +- hw/arm/boot: Mark all guest memory as RIPAS_RAM. +- target/arm/kvm-rme: Add DMA remapping for the shared memory region +- hw/arm/virt: Move virt_flash_create() to machvirt_init() +- hw/arm/virt: Use RAM instead of flash for confidential guest firmware +- hw/core/loader: Add fields to RomLoaderNotify +- docs/interop/firmware.json: Add arm-rme firmware feature +- hw/arm/boot: Load DTB as is for confidential VMs +- hw/arm/boot: Skip bootloader for confidential guests +- hw/tpm: Add TPM event log +- hw/core/loader: Add fields to RomLoaderNotify +- hw/arm/virt: Use RAM instead of flash for confidential guest firmware +- hw/arm/virt: Reserve one bit of guest-physical address for RME +- hw/arm/virt: Disable DTB randomness for confidential VMs +- hw/arm/virt: Add support for Arm RME +- target/arm/cpu: Inform about reading confidential CPU registers +- target/arm/cpu: Set number of PMU counters in KVM +- target/arm/cpu: Set number of breakpoints and watchpoints in KVM +- target/arm/kvm-rme: Add measurement algorithm property +- target/arm/kvm-rme: Add Realm Personalization Value parameter +- target/arm/kvm-rme: Initialize Realm memory +- hw/core/loader: Add ROM loader notifier +- target/arm/kvm: Create scratch VM as Realm if necessary +- target/arm/kvm-rme: Initialize vCPU +- target/arm/kvm: Split kvm_arch_get/put_registers +- target/arm/kvm-rme: Initialize realm +- KVM: track whether guest state is encrypted +- target/arm/kvm: Return immediately on error in kvm_arch_init() +- target/arm: Add confidential guest support +- include/qom/object.h: New OBJECT_DEFINE_SIMPLE_TYPE{, _WITH_INTERFACES} macros +- kvm: Use kvm_vm_check_extension() where necessary +- linux-headers: Add KVM Arm RME definitions to Linux headers +- docs/interop/firmware.json: Add arm-rme firmware feature +- hw/arm/boot: Load DTB as is for confidential VMs +- hw/arm/boot: Skip bootloader for confidential guests +- hw/tpm: Add TPM event log +- hw/core/loader: Add fields to RomLoaderNotify +- hw/arm/virt: Use RAM instead of flash for confidential guest firmware +- hw/arm/virt: Move virt_flash_create() to machvirt_init() +- target/arm/kvm-rme: Add DMA remapping for the shared memory region +- hw/arm/boot: Mark all guest memory as RIPAS_RAM. +- hw/arm/virt: Reserve one bit of guest-physical address for RME +- hw/arm/virt: Disable DTB randomness for confidential VMs +- hw/arm/virt: Add support for Arm RME +- target/arm/cpu: Inform about reading confidential CPU registers +- target/arm/cpu: Set number of PMU counters in KVM +- target/arm/cpu: Set number of breakpoints and watchpoints in KVM +- target/arm/kvm-rme: Add measurement algorithm property +- target/arm/kvm-rme: Add Realm Personalization Value parameter +- target/arm/kvm-rme: Initialize Realm memory +- hw/core/loader: Add ROM loader notifier +- target/arm/kvm: Create scratch VM as Realm if necessary +- target/arm/kvm-rme: Initialize vCPU +- target/arm/kvm: Split kvm_arch_get/put_registers +- target/arm/kvm-rme: Initialize realm +- KVM: track whether guest state is encrypted +- target/arm/kvm: Return immediately on error in kvm_arch_init() +- target/arm: Add confidential guest support +- include/qom/object.h: New OBJECT_DEFINE_SIMPLE_TYPE{, _WITH_INTERFACES} macros +- kvm: Use kvm_vm_check_extension() where necessary +- linux-headers: Add KVM Arm RME definitions to Linux headers +- Bugfix: Fix compile error in aarch32. + + +* Fri Jul 25 2025 Pengrui Zhang - 11:8.2.0-39 +- hw/audio/cs4231a: fix assertion error in isa_bus_get_irq +- block/blkio: Make s->mem_region_alignment be 64 bits +- target/arm: Adjust and validate mtedesc sizem1 +- block/io: accept NULL qiov in bdrv_pad_request +- target-arm: fix qemu-arm target build error +- target/i386: Add new Hygon 'Chengdu' CPU model + +* Fri Jul 18 2025 Pengrui Zhang - 11:8.2.0-38 +- sync header file from upstream +- backends/tpm: Avoid using g_alloca() +- hw/virtio/virtio-pci:Support shadow device for virtio-net/blk/scsi devices +- smbios: add processor-family option +- smbios: function to set default processor family +- target/riscv: SMBIOS support for RISC-V virt machine +- qemu-options: enable -smbios option on RISC-V +- qemu-options.hx: correct formatting -smbios type=4 +- tests/unit/test-char: Avoid using g_alloca() +- virtio processes indirect descriptors even if the respected + +* Wed Jun 18 2025 Panhengchang - 11:8.2.0-37 +- Add stub function for 'tmm_get_kae_num' if 'CONFIG_KVM' is not set. +- qapi/misc-target: Add KVM option to isolate virtcca detection interface. +- qapi/misc-target: Add Virtcca capability struct and query command. + +* Mon Jun 16 2025 Pengrui Zhang - 11:8.2.0-36 +- Fix error in virtCCA CoDA scenario. +- Revert "backends/iommufd: Make iommufd_backend_*() return bool" + +* Wed Jun 04 2025 Jason Zeng - 11:8.2.0-35 +- Enable Intel qatzip and qpl acceleration for multifd live migration. + +* Wed May 28 2025 Jiabo Feng - 11:8.2.0-34 +- target/arm: support the IPIV feature +- Revert "target/arm: Change arm_cpu_mp_affinity when enabled IPIV feature" +- vdpa/iommufd: All vdpa devices perform only one log_sync each time. +- memory:Optimize flatview ioeventfd processing +- memory/eventfd:Introduce ioeventfd batch processing to reduce the time required to update ioeventfd +- migration/memory:Optimize unnecessary memory region updates during live migration +- migration:Extand the fdtable in the incoming phase of migration +- virtio/irqfd: Batch processing of irqfd related operations during virtio device startup +- kvm/msi: Mark whether there is an IRQ route table update through changes +- virtio-pci:Batch processing of IRQFD mapping for multi queue Virtio devices + +* Wed May 28 2025 Pengrui Zhang - 11:8.2.0-33 +- arm: cvm: fix arm-softmmu build on x86 platform +- arm: VirtCCA: fix arm-softmmu build on x86 platform +- hw/arm/virt: HDBSS: fix arm-softmmu build on x86 platform +- hw/arm/virt: decouple migrate_hdbss_buffer_size() with kvm_update_hdbss_cap() +- hw/arm/virt: only support the HDBSS feature in aarch64 +- multifd: bugfix for incorrect migration data with qatzip compression +- multifd: bugfix for incorrect migration data with QPL compression +- multifd: bugfix for migration using compression methods +- migration/multifd: Zero p->flags before starting filling a packet +- migration/multifd: Ensure packet->ramblock is null-terminated +- migration/multifd: Fix rb->receivedmap cleanup race +- migration/multifd: Fix loop conditions in multifd_zstd_send_prepare and multifd_zstd_recv +- tests/migration: Add integration test for 'qatzip' compression method +- migration: Introduce 'qatzip' compression method +- migration: Add migration parameters for QATzip +- meson: Introduce 'qatzip' feature to the build system +- docs/migration: add qatzip compression feature +- migration/multifd: Fix p->iov leak in multifd-uadk.c +- tests/migration-test: add uadk compression test +- migration/multifd: Switch to no compression when no hardware support +- migration/multifd: Add UADK based compression and decompression +- migration/multifd: Add UADK initialization +- migration/multifd: add uadk compression framework +- configure: Add uadk option +- docs/migration: add uadk compression feature +- configure: add --enable-qpl build option +- migration/multifd: implement qpl compression and decompression +- migration/multifd: implement initialization of qpl compression +- migration/multifd: include ram.h in multifd.h +- migration/multifd: add qpl compression method +- migration/multifd: put IOV initialization into compression method +- docs/migration: add qpl compression feature +- migration/multifd: solve zero page causing multiple page faults +- migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page. +- migration/multifd: Implement zero page transmission on the multifd thread +- migration/multifd: Add new migration option zero-page-detection +- migration/multifd: Allow multifd without packets +- migration/multifd: Rename MultiFDSend|RecvParams::data to compress_data +- migration/multifd: Cleanup multifd_recv_sync_main +- tests/migration: Set compression level in migration tests +- migration: Properly apply migration compression level parameters +- migration/multifd: Drop unnecessary helper to destroy IOC +- migration/multifd: Cleanup outgoing_args in state destroy +- migration/multifd: Make multifd_channel_connect() return void +- migration/multifd: Drop registered_yank +- migration/multifd: Cleanup TLS iochannel referencing +- migration/multifd: Release recv sem_sync earlier +- migration/multifd: Remove p->quit from recv side +- migration/multifd: Add a synchronization point for channel creation +- migration/multifd: Unify multifd and TLS connection paths +- migration/multifd: Move multifd_send_setup into migration thread +- migration/multifd: Move multifd_send_setup error handling in to the function +- migration/multifd: Remove p->running +- migration/multifd: Join the TLS thread +- migration: Fix logic of channels and transport compatibility check +- migration/multifd: Optimize sender side to be lockless +- migration/multifd: Stick with send/recv on function names +- migration/multifd: Cleanup multifd_load_cleanup() +- migration/multifd: Cleanup multifd_save_cleanup() +- migration/multifd: Rewrite multifd_queue_page() +- migration/multifd: Change retval of multifd_send_pages() +- migration/multifd: Change retval of multifd_queue_page() +- migration/multifd: Split multifd_send_terminate_threads() +- migration/multifd: Forbid spurious wakeups +- migration/multifd: Move header prepare/fill into send_prepare() +- migration/multifd: Move trace_multifd_send|recv() +- migration/multifd: Move total_normal_pages accounting +- migration/multifd: Rename p->num_packets and clean it up +- migration/multifd: Drop pages->num check in sender thread +- migration/multifd: Simplify locking in sender thread +- migration/multifd: Separate SYNC request with normal jobs +- migration/multifd: Drop MultiFDSendParams.normal[] array +- migration/multifd: Postpone reset of MultiFDPages_t +- migration/multifd: Drop MultiFDSendParams.quit, cleanup error paths +- migration/multifd: multifd_send_kick_main() +- migration/multifd: Drop stale comment for multifd zero copy +- docs/migration: Further move virtio to be feature of migration +- docs/migration: Further move vfio to be feature of migration +- docs/migration: Organize "Postcopy" page +- docs/migration: Split "dirty limit" +- docs/migration: Split "Postcopy" +- docs/migration: Split "Debugging" and "Firmware" +- docs/migration: Split "Backwards compatibility" separately +- docs/migration: Convert virtio.txt into rST +- docs/migration: Create index page +- docs/migration: Create migration/ directory +- tests/qtest: Re-enable multifd cancel test +- tests/qtest/migration: Use the new migration_test_add +- tests/qtest/migration: Add a wrapper to print test names +- tests/qtest/migration: Print migration incoming errors +- migration: Report error in incoming migration +- migration/multifd: Change multifd_pages_init argument +- migration/multifd: Remove QEMUFile from where it is not needed +- migration/multifd: Remove MultiFDPages_t::packet_num +- migration/multifd: Remove unnecessary usage of local Error +- migration: Fix migration_channel_read_peek() error path +- migration/multifd: Remove error_setg() in migration_ioc_process_incoming() +- migration/multifd: Fix leaking of Error in TLS error flow +- migration/multifd: Simplify multifd_channel_connect() if else statement +- migration/multifd: Fix error message in multifd_recv_initial_packet() +- hw/arm/virt: support the HDBSS feature + +* Thu May 15 2025 Jiabo Feng - 11:8.2.0-32 +- target/i386: csv: Release CSV3 shared pages after unmapping DMA +- target/i386: Add new CPU model ClearwaterForest +- target/i386: add sha512, sm3, sm4 feature bits +- docs: Add GNR, SRF and CWF CPU models +- target/i386: Export BHI_NO bit to guests +- target/i386: Introduce SierraForest-v2 model +- vdpa/iommufd:Implement DMA mapping through the iommufd interface +- vdpa/iommufd:Introduce vdpa-iommufd module +- vdpa/iommufd:support associating iommufd backend for vDPA devices +- Kconfig/iommufd/VDPA: Update IOMMUFD module configuration dependencies The vDPA module can also use IOMMUFD like the VFIO module. +- backends/iommufd: Get rid of qemu_open_old() +- backends/iommufd: Make iommufd_backend_*() return bool +- backends/iommufd: Fix missing ERRP_GUARD() for error_prepend() +- backends/iommufd: Remove mutex +- backends/iommufd: Remove check on number of backend users +- hw/intc: Add extioi ability of 256 vcpu interrupt routing +- hw/rtc: Fixed loongson rtc emulation errors +- hw/loongarch/boot: Adjust the loading position of the initrd +- target/loongarch: Fix the cpu unplug resource leak +- target/loongarch: fix vcpu reset command word issue +- vdpa:Fix dirty page bitmap synchronization not done after suspend for vdpa devices + +* Thu Apr 24 2025 Jiabo Feng - 11:8.2.0-31 +- target/arm: Change arm_cpu_mp_affinity when enabled IPIV feature +- fw_cfg: Don't set callback_opaque NULL in fw_cfg_modify_bytes_read() + +* Tue Apr 22 2025 Jiabo Feng - 11:8.2.0-30 +- Revert "linux-user: Print tid not pid with strace" +- gpex-acpi: Remove duplicate DSM #5 +- smmuv3: Use default bus for arm-smmuv3-accel +- smmuv3: Change arm-smmuv3-nested name to arm-smmuv3-accel +- smmu-common: Return sysmem address space only for vfio-pci +- smmuv3: realize get_pasid_cap and set ssidsize with pasid +- vfio: Synthesize vPASID capability to VM +- backend/iommufd: Report PASID capability +- pci: Get pasid capability from vIOMMU +- smmuv3: Add support for page fault handling +- kvm: Translate MSI doorbell address only if it is valid +- hw/arm/smmuv3: Enable sva/stall IDR features +- iommufd.h: Updated to openeuler olk-6.6 kernel +- tests/data/acpi/virt: Update IORT acpi table +- hw/arm/virt-acpi-build: Add IORT RMR regions to handle MSI nested binding +- tests/qtest: Allow IORT acpi table to change +- hw/arm/virt-acpi-build: Build IORT with multiple SMMU nodes +- hw/arm/smmuv3: Associate a pci bus with a SMMUv3 Nested device +- hw/arm/smmuv3: Add initial support for SMMUv3 Nested device +- hw/arm/virt: Add an SMMU_IO_LEN macro +- hw/pci-host/gpex: [needs kernel fix] Allow to generate preserve boot config DSM #5 +- tests/data/acpi: Update DSDT acpi tables +- acpi/gpex: Fix PCI Express Slot Information function 0 returned value +- tests/qtest: Allow DSDT acpi tables to change +- hw/arm/smmuv3: Forward cache invalidate commands via iommufd +- hw/arm/smmu-common: Replace smmu_iommu_mr with smmu_find_sdev +- hw/arm/smmuv3: Add missing STE invalidation +- hw/arm/smmuv3: Add smmu_dev_install_nested_ste() for CFGI_STE +- hw/arm/smmuv3: Check idr registers for STE_S1CDMAX and STE_S1STALLD +- hw/arm/smmuv3: Read host SMMU device info +- hw/arm/smmuv3: Ignore IOMMU_NOTIFIER_MAP for nested-smmuv3 +- hw/arm/smmu-common: Return sysmem if stage-1 is bypassed +- hw/arm/smmu-common: Add iommufd helpers +- hw/arm/smmu-common: Add set/unset_iommu_device callback +- hw/arm/smmu-common: Extract smmu_get_sbus and smmu_get_sdev helpers +- hw/arm/smmu-common: Bypass emulated IOTLB for a nested SMMU +- hw/arm/smmu-common: Add a nested flag to SMMUState +- backends/iommufd: Introduce iommufd_viommu_invalidate_cache +- backends/iommufd: Introduce iommufd_vdev_alloc +- backends/iommufd: Introduce iommufd_backend_alloc_viommu +- vfio/iommufd: Implement [at|de]tach_hwpt handlers +- vfio/iommufd: Implement HostIOMMUDeviceClass::realize_late() handler +- HostIOMMUDevice: Introduce realize_late callback +- vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD +- backends/iommufd: Add helpers for invalidating user-managed HWPT +- Update iommufd.h header for vSVA +- vfio/common: Allow disabling device dirty page tracking +- vfio/migration: Don't block migration device dirty tracking is unsupported +- vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support +- vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support +- vfio/iommufd: Probe and request hwpt dirty tracking capability +- vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device() +- vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps +- vfio/{iommufd,container}: Remove caps::aw_bits +- HostIOMMUDevice: Store the VFIO/VDPA agent +- vfio/iommufd: Introduce auto domain creation +- vfio/ccw: Don't initialize HOST_IOMMU_DEVICE with mdev +- vfio/ap: Don't initialize HOST_IOMMU_DEVICE with mdev +- vfio/iommufd: Return errno in iommufd_cdev_attach_ioas_hwpt() +- backends/iommufd: Extend iommufd_backend_get_device_info() to fetch HW capabilities +- vfio/iommufd: Don't initialize nor set a HOST_IOMMU_DEVICE with mdev +- vfio/pci: Extract mdev check into an helper +- intel_iommu: Check compatibility with host IOMMU capabilities +- intel_iommu: Implement [set|unset]_iommu_device() callbacks +- intel_iommu: Extract out vtd_cap_init() to initialize cap/ecap +- vfio/pci: Pass HostIOMMUDevice to vIOMMU +- hw/pci: Introduce pci_device_[set|unset]_iommu_device() +- hw/pci: Introduce helper function pci_device_get_iommu_bus_devfn() +- vfio: Create host IOMMU device instance +- backends/iommufd: Implement HostIOMMUDeviceClass::get_cap() handler +- vfio/container: Implement HostIOMMUDeviceClass::get_cap() handler +- vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler +- backends/iommufd: Introduce helper function iommufd_backend_get_device_info() +- vfio/container: Implement HostIOMMUDeviceClass::realize() handler +- range: Introduce range_get_last_bit() +- backends/iommufd: Introduce TYPE_HOST_IOMMU_DEVICE_IOMMUFD[_VFIO] devices +- vfio/container: Introduce TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO device +- backends/host_iommu_device: Introduce HostIOMMUDeviceCaps +- backends: Introduce HostIOMMUDevice abstract +- vfio/iommufd: Remove CONFIG_IOMMUFD usage +- vfio/spapr: Extend VFIOIOMMUOps with a release handler +- vfio/spapr: Only compile sPAPR IOMMU support when needed +- vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM interface +- vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface +- vfio/container: Intoduce a new VFIOIOMMUClass::setup handler +- vfio/container: Introduce a VFIOIOMMU legacy QOM interface +- vfio/container: Introduce a VFIOIOMMU QOM interface +- vfio/container: Initialize VFIOIOMMUOps under vfio_init_container() +- vfio/container: Introduce vfio_legacy_setup() for further cleanups +- docs/devel: Add VFIO iommufd backend documentation +- vfio: Introduce a helper function to initialize VFIODevice +- vfio/ccw: Move VFIODevice initializations in vfio_ccw_instance_init +- vfio/ap: Move VFIODevice initializations in vfio_ap_instance_init +- vfio/platform: Move VFIODevice initializations in vfio_platform_instance_init +- vfio/pci: Move VFIODevice initializations in vfio_instance_init +- hw/i386: Activate IOMMUFD for q35 machines +- kconfig: Activate IOMMUFD for s390x machines +- hw/arm: Activate IOMMUFD for virt machines +- vfio: Make VFIOContainerBase poiner parameter const in VFIOIOMMUOps callbacks +- vfio/ccw: Make vfio cdev pre-openable by passing a file handle +- vfio/ccw: Allow the selection of a given iommu backend +- vfio/ap: Make vfio cdev pre-openable by passing a file handle +- vfio/ap: Allow the selection of a given iommu backend +- vfio/platform: Make vfio cdev pre-openable by passing a file handle +- vfio/platform: Allow the selection of a given iommu backend +- vfio/pci: Make vfio cdev pre-openable by passing a file handle +- vfio/pci: Allow the selection of a given iommu backend +- vfio/iommufd: Enable pci hot reset through iommufd cdev interface +- vfio/pci: Introduce a vfio pci hot reset interface +- vfio/pci: Extract out a helper vfio_pci_get_pci_hot_reset_info +- vfio/iommufd: Add support for iova_ranges and pgsizes +- vfio/iommufd: Relax assert check for iommufd backend +- vfio/iommufd: Implement the iommufd backend +- vfio/common: return early if space isn't empty +- util/char_dev: Add open_cdev() +- backends/iommufd: Introduce the iommufd object +- vfio/spapr: Move hostwin_list into spapr container +- vfio/spapr: Move prereg_listener into spapr container +- vfio/spapr: switch to spapr IOMMU BE add/del_section_window +- vfio/spapr: Introduce spapr backend and target interface +- vfio/container: Implement attach/detach_device +- vfio/container: Move iova_ranges to base container +- vfio/container: Move dirty_pgsizes and max_dirty_bitmap_size to base container +- vfio/container: Move listener to base container +- vfio/container: Move vrdl_list to base container +- vfio/container: Move pgsizes and dma_max_mappings to base container +- vfio/container: Convert functions to base container +- vfio/container: Move per container device list in base container +- vfio/container: Switch to IOMMU BE set_dirty_page_tracking/query_dirty_bitmap API +- vfio/container: Move space field to base container +- vfio/common: Move giommu_list in base container +- vfio/common: Introduce vfio_container_init/destroy helper +- vfio/container: Switch to dma_map|unmap API +- vfio/container: Introduce a empty VFIOIOMMUOps +- vfio: Introduce base object for VFIOContainer and targeted interface +- cryptodev: Fix error handling in cryptodev_lkcf_execute_task() +- hw/xen: Fix xen_bus_realize() error handling +- hw/misc/aspeed_hace: Fix buffer overflow in has_padding function +- target/s390x: Fix a typo in s390_cpu_class_init() +- hw/sd/sdhci: free irq on exit +- hw/ufs: free irq on exit +- hw/pci-host/designware: Fix ATU_UPPER_TARGET register access +- target/i386: Make invtsc migratable when user sets tsc-khz explicitly +- target/i386: Construct CPUID 2 as stateful iff times > 1 +- target/i386: Enable fdp-excptn-only and zero-fcs-fds +- target/i386: Don't construct a all-zero entry for CPUID[0xD 0x3f] +- i386/cpuid: Remove subleaf constraint on CPUID leaf 1F +- target/i386: pass X86CPU to x86_cpu_get_supported_feature_word +- target/i386: Raise the highest index value used for any VMCS encoding +- target/i386: Add VMX control bits for nested FRED support +- target/i386: Delete duplicated macro definition CR4_FRED_MASK +- target/i386: Add get/set/migrate support for FRED MSRs +- target/i386: enumerate VMX nested-exception support +- vmxcap: add support for VMX FRED controls +- target/i386: mark CR4.FRED not reserved +- target/i386: add support for FRED in CPUID enumeration +- target/i386: fix feature dependency for WAITPKG +- target/i386: Add more features enumerated by CPUID.7.2.EDX +- net: fix build when libbpf is disabled, but libxdp is enabled +- hw/nvme: fix invalid endian conversion +- hw/nvme: fix invalid check on mcl +- backends/cryptodev: Do not ignore throttle/backends Errors +- backends/cryptodev: Do not abort for invalid session ID +- virtcca: add kvm isolation when get tmi version. +- qga: Don't daemonize before channel is initialized +- qga: Add log to guest-fsfreeze-thaw command +- backends: VirtCCA: cvm_gpa_start supports both 1GB and 3GB +- BUGFIX: Enforce isolation for virtcca_shared_hugepage +- arm: VirtCCA: qemu CoDA support UEFI boot +- arm: VirtCCA: Compatibility with older versions of TMM and the kernel +- arm: VirtCCA: qemu uefi boot support kae +- arm: VirtCCA: CVM support UEFI boot + +* Fri Feb 21 2025 Jiabo Feng - 11:8.2.0-29 +- target/i386: csv: Support inject secret for CSV3 guest only if the extension is enabled +- target/i386: csv: Support load kernel hashes for CSV3 guest only if the extension is enabled +- target/i386: csv: Request to set private memory of CSV3 guest if the extension is enabled +- target/i386: kvm: Support to get and enable extensions for Hygon CoCo guest +- qapi/qom,target/i386: csv-guest: Introduce secret-header-file=str and secret-file=str options +- bakcend: VirtCCA:resolve hugepage memory waste issue in vhost-user scenario +- parallels: fix ext_off assertion failure due to overflow +- backends/cryptodev-vhost-user: Fix local_error leaks +- hw/usb/hcd-ehci: Fix debug printf format string +- target/riscv/vector_helper.c: fix 'vmvr_v' memcpy endianess +- target/riscv/vector_helper.c: optimize loops in ldst helpers +- target/riscv/vector_helper.c: set vstart = 0 in GEN_VEXT_VSLIDEUP_VX() +- target/hexagon: don't look for static glib +- virtio-net: Fix network stall at the host side waiting for kick +- Add if condition to avoid assertion failed error in blockdev_init +- target/arm: Use float_status copy in sme_fmopa_s +- target/arm: take HSTR traps of cp15 accesses to EL2, not EL1 +- target/arm: Reinstate "vfp" property on AArch32 CPUs +- target/i386/cpu: Fix notes for CPU models +- target/arm: LDAPR should honour SCTLR_ELx.nAA +- target/riscv: Avoid bad shift in riscv_cpu_do_interrupt() +- hvf: remove unused but set variable +- hw/misc/nrf51_rng: Don't use BIT_MASK() when we mean BIT() +- Avoid taking address of out-of-bounds array index +- target/arm: Fix VCMLA Dd, Dn, Dm[idx] +- target/arm: Fix UMOPA/UMOPS of 16-bit values +- target/arm: Fix SVE/SME gross MTE suppression checks +- target/arm: Fix nregs computation in do_{ld,st}_zpa +- crypto: fix error check on gcry_md_open +- Change vmstate_cpuhp_sts vmstateDescription version_id +- hw/pci: Remove unused pci_irq_pulse() method +- hw/intc: Don't clear pending bits on IRQ lowering +- target/arm: Drop user-only special case in sve_stN_r +- migration: Ensure vmstate_save() sets errp +- target/i386: fix hang when using slow path for ptw_setl +- contrib/plugins: add compat for g_memdup2 +- hw/audio/hda: fix memory leak on audio setup +- crypto: perform runtime check for hash/hmac support in gcrypt +- target/arm: Fix incorrect aa64_tidcp1 feature check +- target/arm: fix exception syndrome for AArch32 bkpt insn +- target/arm: Don't get MDCR_EL2 in pmu_counter_enabled() before checking ARM_FEATURE_PMU +- linux-user: Print tid not pid with strace +- target/arm: Fix A64 scalar SQSHRN and SQRSHRN +- target/arm: Don't assert for 128-bit tile accesses when SVL is 128 +- hw/timer/exynos4210_mct: fix possible int overflow +- target/arm: Avoid shifts by -1 in tszimm_shr() and tszimm_shl() +- hw/audio/virtio-snd: Always use little endian audio format +- target/riscv: Fix vcompress with rvv_ta_all_1s +- usb-hub: Fix handling port power control messages + +* Fri Feb 21 2025 Jiabo Feng - 11:8.2.0-28 +- hw/misc/mos6522: Fix bad class definition of the MOS6522 device +- target/i386: Fix minor typo in NO_NESTED_DATA_BP feature bit +- cpu: ensure we don't call start_exclusive from cpu_exec +- Avoid unaligned fetch in ladr_match() +- audio/audio.c: remove trailing newline in error_setg +- acpi/tests/avocado/bits: wait for 200 seconds for SHUTDOWN event from bits VM +- linux-user: Tolerate CONFIG_LSM_MMAP_MIN_ADDR +- accel/tcg: Fix user-only probe_access_internal plugin +- linux-user: Honor elf alignment when placing images +- Reserve address for MSI mapping in the CVM scenario. + +* Fri Dec 13 2024 Xianglai Li - 11:8.2.0-27 +- fix compile error on loongarch +- hw/loongarch: fix cpu hotplug reset +- hw/loongarch/boot: Use warn_report when no kernel filename +- hw/loongarch: clean code +- hw/loongarch: Add KVM pch msi device support +- hw/loongarch: Add KVM pch pic device support +- hw/loongarch: Add KVM extioi device support +- hw/loongarch: Add KVM IPI device support +- hw/loongarch/virt: Update the ACPI table for hotplug cpu +- hw/loongarch/virt: Add basic CPU plug support +- hw/loongarch/virt: Add CPU topology support +- accel/kvm/kvm-all: Fixes the missing break in vCPU unpark logic +- gdbstub: Add helper function to unregister GDB register space +- physmem: Add helper function to destroy CPU AddressSpace +- hw/acpi: Update CPUs AML with cpu-(ctrl)dev change +- hw/acpi: Update ACPI GED framework to support vCPU Hotplug +- hw/acpi: Move CPU ctrl-dev MMIO region len macro to common header file +- accel/kvm: Extract common KVM vCPU {creation,parking} code +- target/loongarch: Add steal time support on migration +- linux-headers: loongarch: Add kvm_para.h and unistd_64.h +- target/loongarch/kvm: Implement LoongArch PMU extension +- target/loongarch: Implement lbt registers save/restore function +- target/loongarch: Add loongson binary translation feature +- sync loongarch linux-headers +- target/loongarch: Avoid bits shift exceeding width of bool type +- target/loongarch: Use explicit little-endian LD/ST API +- target/loongarch: fix -Werror=maybe-uninitialized false-positive +- target/loongarch: Support QMP dump-guest-memory +- target/loongarch/kvm: Add vCPU reset function +- target/loongarch: Add compatible support about VM reboot +- target/loongarch: Fix cpu_reset set wrong CSR_CRMD +- target/loongarch: Set CSR_PRCFG1 and CSR_PRCFG2 values +- target/loongarch: Remove avail_64 in trans_srai_w() and simplify it +- target/loongarch/kvm: Add software breakpoint support +- target/loongarch: Add loongarch vector property unconditionally +- target/loongarch/kvm: Fix VM recovery from disk failures +- target/loongarch: Put cpucfg operation before CSR register +- target/loongarch: Add TCG macro in structure CPUArchState +- hw/arm/virt-acpi-build.c: Migrate SPCR creation to common location +- hw/loongarch/virt: Add FDT table support with acpi ged pm register +- hw/loongarch/virt: Add description for virt machine type +- hw/loongarch: Add acpi SPCR table support +- hw/loongarch: virt: pass random seed to fdt +- hw/loongarch: virt: support up to 4 serial ports +- hw/loongarch: Remove default enable with VIRTIO_VGA device +- hw/loongarch: Fix length for lowram in ACPI SRAT +- hw/loongarch/virt: Remove unused assignment +- hw/loongarch: Change the tpm support by default +- hw/loongarch/boot.c: fix out-of-bound reading +- hw/loongarch/virt: Use MemTxAttrs interface for misc ops +- tests/libqos: Add loongarch virt machine node +- hw/loongarch: Remove minimum and default memory size +- hw/loongarch: Refine system dram memory region +- hw/loongarch: Refine fwcfg memory map +- hw/loongarch: Refine fadt memory table for numa memory +- hw/loongarch: Refine acpi srat table for numa memory +- hw/loongarch: Add VM mode in IOCSR feature register in kvm mode +- hw/loongarch: Refine default numa id calculation +- hw/loongarch: Rename LoongArchMachineState with LoongArchVirtMachineState +- hw/loongarch: Rename LOONGARCH_MACHINE with LOONGARCH_VIRT_MACHINE +- hw/loongarch: move memory map to boot.c +- loongarch: switch boards to "default y" +- hw/loongarch: Add cells missing from rtc node +- hw/loongarch: Add cells missing from uart node +- hw/loongarch: fdt remove unused irqchip node +- hw/loongarch: fdt adds pcie irq_map node +- hw/loongarch: fdt adds pch_msi Controller +- hw/loongarch: fdt adds pch_pic Controller +- hw/loongarch: fdt adds Extend I/O Interrupt Controller +- hw/loongarch: fdt adds cpu interrupt controller node +- hw/loongarch: Init efi_fdt table +- hw/loongarch: Init efi_initrd table +- hw/loongarch: Init efi_boot_memmap table +- hw/loongarch: Init efi_system_table +- hw/loongarch: Add init_cmdline +- hw/loongarch: Add slave cpu boot_code +- hw/loongarch: Add load initrd +- hw/loongarch: Move boot functions to boot.c + +* Thu Dec 12 2024 Jiabo Feng - 11:8.2.0-26 +- vdpa-dev: Fix initialisation order to restore VDUSE compatibility +- tcg: Allow top bit of SIMD_DATA_BITS to be set in simd_desc() +- migration: fix-possible-int-overflow +- target/m68k: Map FPU exceptions to FPSR register +- qemu-options: Fix CXL Fixed Memory Window interleave-granularity typo +- hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System registers +- hw/intc/arm_gic: Fix handling of NS view of GICC_APR +- qio: Inherit follow_coroutine_ctx across TLS +- target/riscv: Fix the element agnostic function problem +- accel/tcg: Fix typo causing tb->page_addr[1] to not be recorded +- tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers +- migration: Fix file migration with fdset +- ui/vnc: don't return an empty SASL mechlist to the client +- target/arm: Fix FJCVTZS vs flush-to-zero +- hw/ppc/e500: Prefer QOM cast +- sphinx/qapidoc: Fix to generate doc for explicit, unboxed arguments +- hw/ppc/e500: Remove unused "irqs" parameter +- hw/ppc/e500: Add missing device tree properties to i2c controller node +- hw/i386/amd_iommu: Don't leak memory in amdvi_update_iotlb() +- hw/arm/mps2-tz.c: fix RX/TX interrupts order +- target/i386: csv: Add support to migrate the incoming context for CSV3 guest +- target/i386: csv: Add support to migrate the outgoing context for CSV3 guest +- target/i386: csv: Add support to migrate the incoming page for CSV3 guest +- target/i386: csv: Add support to migrate the outgoing page for CSV3 guest +- linux-headers: update kernel headers to include CSV3 migration cmds +- vfio: Only map shared region for CSV3 virtual machine +- vga: Force full update for CSV3 guest +- target/i386: csv: Load initial image to private memory for CSV3 guest +- target/i386: csv: Do not register/unregister guest secure memory for CSV3 guest +- target/i386: cpu: Populate CPUID 0x8000_001F when CSV3 is active +- target/i386: csv: Add command to load vmcb to CSV3 guest memory +- target/i386: csv: Add command to load data to CSV3 guest memory +- target/i386: csv: Add command to initialize CSV3 context +- target/i386: csv: Add CSV3 context +- next-kbd: convert to use qemu_input_handler_register() +- qemu/bswap: Undefine CPU_CONVERT() once done +- exec/memop: Remove unused memop_big_endian() helper +- hw/nvme: fix handling of over-committed queues +- 9pfs: fix crash on 'Treaddir' request +- hw/misc/psp: Pin the hugepage memory specified by mem2 during use for psp +- hw/misc: support tkm use mem2 memory +- hw/i386: add mem2 option for qemu +- kvm: add support for guest physical bits +- target/i386: add guest-phys-bits cpu property + +* Sat Nov 30 2024 Jiabo Feng - 11:8.2.0-25 +- hw/arm/virt:Keep Guest L1 cache type consistent with KVM +- cvm : Add support for TEE-based national encryption acceleration. +- Add virtCCA Coda annotation Adjust the position of the security device +- target/i386: sev: Add support for reuse ASID for different CSV guests +- target/i386: sev: Fix incompatibility between SEV and CSV on the GET_ID API +- hw/cxl: Ensure there is enough data for the header in cmd_ccls_set_lsa() +- hw/pci: Add parenthesis to PCI_BUILD_BDF macro +- hw/audio/hda: free timer on exit +- meson.build: Remove ncurses workaround for OpenBSD +- ui/console-vc: Silence warning about sprintf() on OpenBSD +- ui: remove break after g_assert_not_reached() +- s390x/sclp: Simplify get_sclp_device() +- hw/vfio/hct: qemu startup terminate once error happened in hct +- hw/vfio/hct: fix ccp_index error caused by uninitialized buf +- hw/vfio/hct: update support ccp count to 48. +- hw/vfio: add device hct based on vfio. + +* Sat Nov 30 2024 Jiabo Feng - 11:8.2.0-24 +- ppc/xive: Fix ESB length overflow on 32-bit hosts +- target/hppa: Fix PSW V-bit packaging in cpu_hppa_get for hppa64 +- target/ppc: Fix migration of CPUs with TLB_EMB TLB type +- target/arm: Clear high SVE elements in handle_vec_simd_wshli +- module: Prevent crash by resetting local_err in module_load_qom_all() +- tests/docker: update debian i686 and mipsel images to bookworm +- target/arm: Fix SVE SDOT/UDOT/USDOT (4-way, indexed) +- docs/sphinx/depfile.py: Handle env.doc2path() returning a Path not a str +- block/blkio: use FUA flag on write zeroes only if supported +- virtio-pci: Fix the use of an uninitialized irqfd +- hw/cxl: Ensure there is enough data to read the input header in cmd_get_physical_port_state() +- intel_iommu: Send IQE event when setting reserved bit in IQT_TAIL +- virtio-net: Avoid indirection_table_mask overflow +- Fix calculation of minimum in colo_compare_tcp +- target/riscv/csr.c: Fix an access to VXSAT +- linux-user: Clean up unused header +- raw-format: Fix error message for invalid offset/size +- hw/loongarch/virt: Remove unnecessary 'cpu.h' inclusion +- tests: Wait for migration completion on destination QEMU to avoid failures +- acpi: ged: Add macro for acpi sleep control register +- hw/intc/openpic: Improve errors for out of bounds property values +- hw/pci-bridge: Add a Kconfig switch for the normal PCI bridge +- docs/tools/qemu-img.rst: fix typo (sumarizes) +- audio/pw: Report more accurate error when connecting to PipeWire fails +- audio/pw: Report more accurate error when connecting to PipeWire fails +- dma: Fix function names in documentation Ensure the function names match. +- edu: fix DMA range upper bound check +- platform-bus: fix refcount leak +- hw/net/can/sja1000: fix bug for single acceptance filter and standard frame +- tests/avocado: fix typo in replay_linux +- util/userfaultfd: Remove unused uffd_poll_events +- Consider discard option when writing zeros +- crypto: factor out conversion of QAPI to gcrypt constants +- crypto: drop gnutls debug logging support +- crypto: use consistent error reporting pattern for unsupported cipher modes +- hw/gpio/aspeed_gpio: Avoid shift into sign bit + +* Thu Nov 28 2024 fangyi - 11:8.2.0-23 +- vdpa: fix vdpa device migrate rollback wrong when suspend device failed. +- vdpa: support resizing virtio-blk capacity online for kernel vdpa +- Revert "vdpa: add vhost_vdpa_suspend" +- Revert "vdpa: add vhost_vdpa->suspended parameter" +- Revert "vdpa: block migration if SVQ does not admit a feature" +- vdpa: remove memory listener unregister in vhost_vdpa_reset_status + +* Wed Nov 27 2024 fangyi - 11:8.2.0-22 +- seabios: enable virtio device mmio access and wait util virtio device reset done + +* Thu Nov 7 2024 Jiabo Feng - 11:8.2.0-21 +- tests: bump QOS_PATH_MAX_ELEMENT_SIZE again +- softmmu/physmem: fix memory leak in dirty_memory_extend() +- crypto: run qcrypto_pbkdf2_count_iters in a new thread +- hw/audio/virtio-sound: fix heap buffer overflow +- hw/intc/arm_gic: fix spurious level triggered interrupts +- ui/sdl2: set swap interval explicitly when OpenGL is enabled +- target/riscv/kvm: tolerate KVM disable ext errors +- virtio: remove virtio_tswap16s() call in vring_packed_event_read() +- block: fix -Werror=maybe-uninitialized false-positive +- hw/remote/vfio-user: Fix config space access byte order +- hw/loongarch/virt: Fix memory leak +- hw/intc/riscv_aplic: APLICs should add child earlier than realize +- stdvga: fix screen blanking +- ui/gtk: Draw guest frame at refresh cycle +- target/i386: fix size of EBP writeback in gen_enter() +- virtio-net: drop too short packets early +- target/ppc: Fix lxv/stxv MSR facility check +- target/ppc: Fix lxvx/stxvx facility check +- virtio-snd: add max size bounds check in input cb(CVE-2024-7730) + +* Mon Oct 21 2024 Jiabo Feng - 11:8.2.0-20 +- Added CoDA feature support in the context of CVM. When virtcca cvm is enabled, the iommu is tagged as secure. +- hw/block: fix uint32 overflow +- hw/ufs: add basic info of query response upiu +- crypto: avoid leak of ctx when bad cipher mode is given Fixes: Coverity CID 1546884 + +* Mon Oct 14 2024 Jiabo Feng - 11:8.2.0-19 +- mac_dbdma: Remove leftover `dma_memory_unmap` calls(CVE-2024-8612) +- softmmu: Support concurrent bounce buffers(CVE-2024-8612) +- system/physmem: Per-AddressSpace bounce buffering +- system/physmem: Propagate AddressSpace to MapClient helpers + +* Wed Sep 18 2024 Jiabo Feng - 11:8.2.0-18 +- hw/loongarch/virt: Fix FDT memory node address width +- hw/loongarch: Fix fdt memory node wrong 'reg' +- load_elf: fix iterator's type for elf file processing +- migration/colo: Fix bdrv_graph_rdlock_main_loop: Assertion `!qemu_in_… +- target/i386: no single-step exception after MOV or POP SS +- char-stdio: Restore blocking mode of stdout on exit +- backends/cryptodev-builtin: Fix local_error leaks +- target/loongarch: fix a wrong print in cpu dump +- virtio-pci: fix use of a released vector +- target/arm: Disable SVE extensions when SVE is disabled +- hw/misc/bcm2835_property: Fix handling of FRAMEBUFFER_SET_PALETTE +- target/i386: Introduce SapphireRapids-v3 to add missing features +- virtio-net: Ensure queue index fits with RSS (CVE-2024-6505) +- nbd/server: CVE-2024-7409: Avoid use-after-free when closing server +- update io/trace-events. Parameters should remain consistent. +- update docs/tools/virtfs-proxy-helper.rst. This place is spelled wrong. +- kvm: Add support for CSV2 reboot +- target/i386/kvm: Fix the resettable info when emulate Hygon CSV2 guest +- target/i386: get/set/migrate GHCB state +- target/i386: csv: Add support for migrate VMSA for CSV2 guest +- migration/ram: Accelerate the loading of CSV guest's encrypted pages +- migration/ram: Accelerate the transmission of CSV guest's encrypted pages +- target/i386: csv: add support to load incoming encrypted pages queued in the CMD list +- target/i386: csv: add support to queue the incoming page into a list +- target/i386: csv: add support to encrypt the outgoing pages in the list queued before. +- target/i386: csv: add support to queue the outgoing page into a list +- target/i386: csv: Read cert chain from file when prepared for CSV live migration +- target/i386: Introduce header file csv.h +- migration/ram: Fix calculation of gfn correpond to a page in ramblock +- target/i386: sev: Clear shared_regions_list when reboot CSV Guest +- migration/ram: Force encrypted status for VGA vram +- target/i386: sev: Return 0 if sev_send_get_packet_len() fails +- kvm: Add support for userspace MSR filtering and handling of MSR_KVM_MIGRATION_CONTROL. +- migration/ram: Force encrypted status for flash0 & flash1 devices. +- migration/ram: add support to send encrypted pages +- migration: add support to migrate shared regions list +- kvm: Add support for SEV shared regions list and KVM_EXIT_HYPERCALL. +- target/i386: sev: add support to load incoming encrypted page +- target/i386: sev: add support to encrypt the outgoing page +- target/i386: sev: do not create launch context for an incoming guest +- target/i386: sev: provide callback to setup outgoing context +- confidential guest support: introduce ConfidentialGuestMemoryEncryptionOps for encrypted VMs +- migration.json: add AMD SEV specific migration parameters +- doc: update AMD SEV to include Live migration flow +- crypto/tlscredspsk: Free username on finalize +- hw/nvme: fix leak of uninitialized memory in io_mgmt_recv +- hw/display/vhost-user-gpu.c: fix vhost_user_gpu_chr_read() +- cvm : Implement command blacklist for cvm security enhancement +- crypto: Introduce SM3 hash hmac pbkdf algorithm +- virtio-net: Use virtual time for RSC timers +- vvfat: Fix bug in writing to middle of file +- hw/core/ptimer: fix timer zero period condition for freq > 1GHz +- hw/misc: support vpsp + +* Thu Sep 5 2024 Jiabo Feng - 11:8.2.0-17 +- cvm : bug fix for undefined reference to 'virtcca_cvm_allowed' while compiling +- cvm : bug-fix for incorrect device name check for vhost-user-fs +- target/i386: add control bits support for LAM +- target/i386: add support for LAM in CPUID enumeration +- Add support for the virtcca cvm feature. +- target/sparc: use signed denominator in sdiv helper +- crypto: Introduce SM4 symmetric cipher algorithm +- ppc/vof: Fix unaligned FDT property access +- vl: fix "type is NULL" in -vga help +- hw/display/bcm2835_fb: fix fb_use_offsets condition +- aspeed/smc: Fix possible integer overflow +- hw/nvme: fix number of PIDs for FDP RUH update +- hw/nvme: fix memory leak in nvme_dsm +- hvf: arm: Do not advance PC when raising an exception +- physmem: Bail out qemu_ram_block_from_host() for invalid ram addrs + +* Tue Aug 13 2024 Jiabo Feng - 11:8.2.0-16 +- nbd/server: CVE-2024-7409: Close stray clients at server-stop +- nbd/server: CVE-2024-7409: Drop non-negotiating clients +- nbd/server: CVE-2024-7409: Cap default max-connections to 100 +- nbd/server: Plumb in new args to nbd_client_add() +- nbd: Minor style and typo fixes + +* Thu Jul 11 2024 Jiabo Feng - 11:8.2.0-15 +- block: Parse filenames only when explicitly requested (CVE-2024-4467) +- iotests/270: Don't store data-file with json: prefix in image (CVE-2024-4467) +- iotests/244: Don't store data-file with protocol in image (CVE-2024-4467) +- qcow2: Don't open data_file with BDRV_O_NO_IO (CVE-2024-4467) +- migration/dirtyrate: Fix segmentation fault +- target/hexagon: idef-parser fix leak of init_list + +* Sat Jun 15 2024 Jiabo Feng - 11:8.2.0-14 +- target/riscv/cpu.c: fix Zvkb extension config +- target/i386: Add new Hygon 'Dharma' CPU model +- target/i386: Add Hygon Dhyana-v3 CPU model +- ui/gtk: Fix mouse/motion event scaling issue with GTK display backend +- hw/ufs: Fix buffer overflow bug +- arm/virt: Set vcpus_count of CPU as 1 to compatible with libvirt +- ppc/pnv: I2C controller is not user creatablei + +* Mon May 20 2024 Song Gao - 11:8.2.0-13 +- target/loongarch: Fix qemu-system-loongarch64 assert +- target/loongarch: Fix qemu-loongarch64 hang when executing 'll.d $t0, $t0, 0' +- target/loongarch: Fix tlb huge page loading issue +- target/loongarch/kvm: Add software breakpoint support +- target/loongarch/kvm: sync kernel header files +- hw/intc/loongarch_extioi: Add virt extension support +- target/loongarch/kvm: Add pmu support +- target/loongarch/kvm: Fix vm restore failed +- target/loongarch/kvm: Add pv steal time support +- target/loongarch/kvm: fpu save the vreg registers high + +* Fri May 10 2024 zhangxianting - 11:8.2.0-12 +- target/i386: Export RFDS bit to guests +- target/i386: Add new CPU model SierraForest +- target/i386: Introduce Icelake-Server-v7 to enable TSX +- hw/isa/vt82c686: Keep track of PIRQ/PINT pins separately +- kvm/arm: Fix compatibility of cold-plug CPU with SVE +- arm/virt/acpi: Extend cpufreq to support max_cpus +- kvm/arm: Fix SVE related logic for vcpu hotplug feature +- arm/virt: Don't modify smp.max_cpus when vcpu hotplug disabled +- acpi/cpu: Fix detection of present cpu + +* Mon Apr 22 2024 Jiabo Feng - 11:8.2.0-11 +- hw/sd/sdhci: Do not update TRNMOD when Command Inhibit (DAT) is set(CVE-2024-3447) +- hw/virtio/virtio-crypto: Protect from DMA re-entrancy bugs(CVE-2024-3446) +- hw/char/virtio-serial-bus: Protect from DMA re-entrancy bugs(CVE-2024-3446) +- hw/display/virtio-gpu: Protect from DMA re-entrancy bugs(CVE-2024-3446) +- hw/virtio: Introduce virtio_bh_new_guarded() helper +- hw/net/net_tx_pkt: Fix overrun in update_sctp_checksum() +- hw/nvme: fix -Werror=maybe-uninitialized +- block/virtio-blk: Fix memory leak from virtio_blk_zone_report +- hw/net/virtio-net: fix qemu set used ring flag even vhost started +- hw/scsi/scsi-generic: Fix io_timeout property not applying +- tests: bios-tables-test: Rename smbios type 4 related test functions + +* Thu Apr 18 2024 Tao Yang - 11:8.2.0-10 +- add '--enable-slirp' compilation options + +* Wed Apr 17 2024 zhangxianting - 11:8.2.0-9 +- remove chrpath + +* Wed Apr 17 2024 Jiabo Feng - 11:8.2.0-8 +- include/ui/rect.h: fix qemu_rect_init() mis-assignment + +* Tue Apr 16 2024 Keqian Zhu - 11:8.2.0-7 +- arm/virt: Use max_cpus to calculate redist1_count +- arm/virt: Use separate filed to identify cpu-hotplug enable + +* Wed Apr 10 2024 Jiabo Feng - 11:8.2.0-6 +- coro: support live patch for libcare +- tests/acpi: Update expected ACPI tables for vcpu hotplug(Update BinDir) +- arm/virt: Require mc->has_hotpluggable_cpus for cold-plugged vcpu +- arm/virt: Consider has_ged when set mc->has_hotpluggable_cpus +- arm/virt-acpi: Require possible_cpu_arch_ids for build_cpus_aml() +- acpi/ged: Remove cpuhp field of ged +- acpi/ged: Init cpu hotplug only when machine support it +- intc/gicv3: Fixes for vcpu hotplug +- arm/kvm: Set psci smccc filter only with vcpu hotplug +- accel/kvm: Use correct id for parked vcpu +- arm/virt: Fix adjudgement of core_id for vcpu hotplugged +- arm/virt.c: Convey local_err when set psci-conduit +- system/cpus: Fix resume_all_vcpus() under vCPU hotplug condition +- system/cpus: Fix pause_all_vcpus() under concurrent environment +- acpi/cpu: Fix cpu_hotplug_hw_init() +- arm/cpu: Some fixes for arm_cpu_unrealizefn() +- system/physmem: Fix possible double free when destroy cpu as +- hw/arm/virt: Expose cold-booted CPUs as MADT GICC Enabled +- tcg/mttcg: enable threads to unregister in tcg_ctxs[] +- hw/arm: Support hotplug capability check using _OSC method +- target/arm/kvm,tcg: Register/Handle SMCCC hypercall exits to VMM/Qemu +- target/arm/kvm: Write CPU state back to KVM on reset +- target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug +- physmem,gdbstub: Common helping funcs/changes to *unrealize* vCPU +- hw/arm: Changes required for reset and to support next boot +- arm/virt: Update the guest(via GED) about CPU hot-(un)plug events +- hw/intc/arm-gicv3*: Changes required to (re)init the vCPU register info +- hw/arm,gicv3: Changes to update GIC with vCPU hot-plug notification +- arm/virt: Changes to (un)wire GICC<->vCPU IRQs during hot-(un)plug +- arm/virt: Add/update basic hot-(un)plug framework +- hw/acpi: Update ACPI GED framework to support vCPU Hotplug +- arm/virt: Release objects for *disabled* possible vCPUs after init +- hw/acpi: Make _MAT method optional +- hw/arm: MADT Tbl change to size the guest with possible vCPUs +- hw/acpi: Update GED _EVT method AML with cpu scan +- hw/acpi: ACPI/AML Changes to reflect the correct _STA.{PRES,ENA} Bits to Guest +- arm/virt: Make ARM vCPU *present* status ACPI *persistent* +- arm/virt/acpi: Build CPUs AML with CPU Hotplug support +- tests/acpi/bios-tables-test: Allow changes to virt/DSDT file +- acpi/cpu: Add cpu_cppc building support +- arm/virt/acpi: Factor out CPPC building from DSDT CPU aml +- hw/acpi: Update CPUs AML with cpu-(ctrl)dev change +- arm/virt: Create GED dev before *disabled* CPU Objs are destroyed +- arm/virt: Add cpu hotplug events to GED during creation +- hw/acpi: Init GED framework with cpu hotplug events +- hw/acpi: Use qemu_present_cpu() API in ACPI CPU hotplug init +- hw/acpi: Add ACPI CPU hotplug init stub +- arm/acpi: Enable ACPI support for vcpu hotplug +- hw/acpi: Move CPU ctrl-dev MMIO region len macro to common header file +- arm/virt: Init PMU at host for all possible vcpus +- arm/virt,gicv3: Changes to pre-size GIC with possible vcpus @machine init +- arm/virt,kvm: Pre-create disabled possible vCPUs @machine init +- accel/kvm: Extract common KVM vCPU {creation,parking} code +- arm/virt,target/arm: Machine init time change common to vCPU {cold|hot}-plug +- hw/arm/virt: Move setting of common CPU properties in a function +- cpus-common: Add common CPU utility for possible vCPUs +- arm/virt,target/arm: Add new ARMCPU {socket,cluster,core,thread}-id property + +* Sun Apr 7 2024 Jiabo Feng - 11:8.2.0-5 +- vfio/migration: Add support for manual clear vfio dirty log +- vfio: Maintain DMA mapping range for the container +- linux-headers: update against 5.10 and manual clear vfio dirty log series +- arm/acpi: Fix when make qemu-system-aarch64 at x86_64 host bios_tables_test fail reason: __aarch64__ macro let build_pptt at x86_64 and aarch64 host build different function that let bios_tables_test fail. +- pl031: support rtc-timer property for pl031 +- feature: Add logs for vm start and destroy +- feature: Add log for each modules +- log: Add log at boot & cpu init for aarch64 +- bugfix: irq: Avoid covering object refcount of qemu_irq +- i386: cache passthrough: Update AMD 8000_001D.EAX[25:14] based on vCPU topo +- freeclock: set rtc_date_diff for X86 +- freeclock: set rtc_date_diff for arm +- freeclock: add qmp command to get time offset of vm in seconds +- tests: Disable filemonitor testcase +- shadow_dev: introduce shadow dev for virtio-net device +- pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff +- tests: virt: Update expected ACPI tables for virt test(Update BinDir) +- arm64: Add the cpufreq device to show cpufreq info to guest +- hw/arm64: add vcpu cache info support +- tests: virt: Allow changes to PPTT test table +- cpu: add Cortex-A72 processor kvm target support +- cpu: add Kunpeng-920 cpu support +- net: eepro100: validate various address valuesi(CVE-2021-20255) +- ide: ahci: add check to avoid null dereference (CVE-2019-12067) +- vdpa: set vring enable only if the vring address has already been set +- docs: Add generic vhost-vdpa device documentation +- vdpa: don't suspend/resume device when vdpa device not started +- vdpa: correct param passed in when unregister save +- vdpa: suspend function return 0 when the vdpa device is stopped +- vdpa: support vdpa device suspend/resume +- vdpa: move memory listener to the realize stage +- vdpa: implement vdpa device migration +- vhost: implement migration state notifier for vdpa device +- vhost: implement post resume bh +- vhost: implement savevm_handler for vdpa device +- vhost: implement vhost_vdpa_device_suspend/resume +- vhost: implement vhost-vdpa suspend/resume +- vhost: add vhost_dev_suspend/resume_op +- vhost: introduce bytemap for vhost backend logging +- vhost-vdpa: add migration log ops for VhostOps +- vhost-vdpa: add VHOST_BACKEND_F_BYTEMAPLOG +- hw/usb: reduce the vpcu cost of UHCI when VNC disconnect +- virtio-net: update the default and max of rx/tx_queue_size +- virtio-net: set the max of queue size to 4096 +- virtio-net: fix max vring buf size when set ring num +- virtio-net: bugfix: do not delete netdev before virtio net +- monitor: Discard BLOCK_IO_ERROR event when VM rebooted +- vhost-user: add unregister_savevm when vhost-user cleanup +- vhost-user: add vhost_set_mem_table when vm load_setup at destination +- vhost-user: quit infinite loop while used memslots is more than the backend limit +- fix qemu-core when vhost-user-net config with server mode +- vhost-user: Add support reconnect vhost-user socket +- vhost-user: Set the acked_features to vm's featrue +- i6300esb watchdog: bugfix: Add a runstate transition +- hw/net/rocker_of_dpa: fix double free bug of rocker device +- net/dump.c: Suppress spurious compiler warning +- pcie: Add pcie-root-port fast plug/unplug feature +- pcie: Compat with devices which do not support Link Width, such as ioh3420 +- qdev/monitors: Fix reundant error_setg of qdev_add_device +- qemu-nbd: set timeout to qemu-nbd socket +- qemu-nbd: make native as the default aio mode +- nbd/server.c: fix invalid read after client was already free +- virtio-scsi: bugfix: fix qemu crash for hotplug scsi disk with dataplane +- virtio: bugfix: check the value of caches before accessing it +- virtio: print the guest virtio_net features that host does not support +- virtio: bugfix: add rcu_read_lock when vring_avail_idx is called +- virtio: check descriptor numbers +- migration: report multiFd related thread pid to libvirt +- migration: report migration related thread pid to libvirt +- cpu/features: fix bug for memory leakage - doc: Update multi-thread compression doc -- configure: Enable test and libs for zstd - -* Sat Jan 30 2021 Huawei Technologies Co., Ltd -- scsi-bus: Refactor the code that retries requests +- migration: Add compress_level sanity check +- migration: Add zstd support in multi-thread compression +- migration: Add multi-thread compress ops +- migration: Refactoring multi-thread compress migration +- migration: Add multi-thread compress method +- migration: skip cache_drop for bios bootloader and nvram template +- oslib-posix: optimise vm startup time for 1G hugepage +- monitor/qmp: drop inflight rsp if qmp client broken +- ps2: fix oob in ps2 kbd +- Currently, while kvm and qemu can not handle some kvm exit, qemu will do vm_stop, which will make vm in pause state. This action make vm unrecoverable, so send guest panic to libvirt instead. +- vhost: cancel migration when vhost-user restarted during migraiton + +* Mon Apr 1 2024 Jiabo Feng - 11:8.2.0-4 +- migration: Skip only empty block devicesi +- iotests: adapt to output change for recently introduced 'detached hea… +- travis-ci: Rename SOFTMMU -> SYSTEM +- block: disallow block jobs when there is a BDRV_O_INACTIVE flag +- [backup] memory: bakcup hugepages: hugepages files maybe leftover +- memory: [backup] Modify the VM's physical bits value set policy. +- ui/clipboard: mark type as not available when there is no data (CVE-2023-6683) +- virtio-net: correctly copy vnet header when flushing TX (CVE-2023-6693) + +* Wed Mar 27 2024 Jiabo Feng - 11:8.2.0-3 +- disable keyring option +- loongarch: Change the UEFI loading mode to loongarch +- target/loongarch: Fix qtest test-hmp error when KVM-only build +- target/loongarch/kvm: Enable LSX/LASX extension +- target/loongarch: Set cpuid CSR register only once with kvm mode +- configure: Add linux header compile support for LoongArch +- hw/intc/loongarch_extioi: Add vmstate post_load support +- hw/intc/loongarch_extioi: Add dynamic cpu number support +- hw/loongarch/virt: Set iocsr address space per-board rather than percpu +- hw/intc/loongarch_ipi: Use MemTxAttrs interface for ipi ops +- target/loongarch: Add loongarch kvm into meson build +- target/loongarch: Implement set vcpu intr for kvm +- target/loongarch: Restrict TCG-specific code +- target/loongarch: Implement kvm_arch_handle_exit +- target/loongarch: Implement kvm_arch_init_vcpu +- target/loongarch: Implement kvm_arch_init function +- target/loongarch: Implement kvm get/set registers +- target/loongarch: Supplement vcpu env initial when vcpu reset +- target/loongarch: Define some kvm_arch interfaces +- linux-headers: Synchronize linux headers from linux v6.7.0-rc8 +- linux-headers: Update to Linux v6.7-rc5 +- target/loongarch: move translate modules to tcg/ +- target/loongarch/meson: move gdbstub.c to loongarch.ss +- target/loongarch: Add timer information dump support +- hw/loongarch/virt: Align high memory base address with super page size + +* Sat Mar 23 2024 Jiabo Feng - 11:8.2.0-2 +- block: bugfix: Don't pause vm when NOSPACE EIO happened +- block: enable cache mode of empty cdrom +- block/mirror: fix file-system went to read-only after block-mirror +- scsi-bus: fix incorrect call for blk_error_retry_reset_timeout() +- scsi-bus: fix unmatched object_unref() +- block: Add sanity check when setting retry parameters +- block-backend: Stop retrying when draining - scsi-disk: Add support for retry on errors -- qapi/block-core: Add retry option for error action -- block-backend: Introduce retry timer -- block-backend: Add device specific retry callback -- block-backend: Enable retry action on errors -- block-backend: Add timeout support for retry -- block: Add error retry param setting -- virtio-blk: Refactor the code that processes queued requests -- virtio-blk: On restart, process queued requests in the proper context +- scsi-bus: Refactor the code that retries requests - virtio_blk: Add support for retry on errors - -* Mon Jan 18 2021 Huawei Technologies Co., Ltd -- feature: enable spice protocol - -* Mon Jan 18 2021 Huawei Technologies Co., Ltd -- reorder changelog in desceding order - -* Fri Jan 15 2021 Huawei Technologies Co., Ltd -- memory: clamp cached translation in case it points to an MMIO region - -* Wed Dec 9 2020 Huawei Technologies Co., Ltd -- target/arm: Fix write redundant values to kvm - -* Fri Dec 11 2020 Huawei Technologies Co., Ltd -- hostmem: Fix up free host_nodes list right after visited - -* Fri Dec 25 2020 Huawei Technologies Co., Ltd -- add qemu-block-rbd package -- add qemu-block-ssh package - -* Fri Dec 11 2020 Huawei Technologies Co., Ltd -- hostmem: Fix up free host_nodes list right after visited - -* Fri Dec 11 2020 Huawei Technologies Co., Ltd -- slirp: check pkt_len before reading protocol header for fixing CVE-2020-29129 and CVE-2020-29130 - -* Wed Dec 9 2020 Huawei Technologies Co., Ltd -- target/arm: Fix write redundant values to kvm - -* Wed Dec 2 2020 Huawei Technologies Co., Ltd -- migration/tls: save hostname into MigrationState -- migration/tls: extract migration_tls_client_create for common-use -- migration/tls: add tls_hostname into MultiFDSendParams -- migration/tls: extract cleanup function for common-use -- migration/tls: add support for multifd tls-handshake -- migration/tls: add trace points for multifd-tls -- qemu-file: Don't do IO after shutdown -- multifd: Make sure that we don't do any IO after an error -- migration: Don't send data if we have stopped -- migration: Create migration_is_running() -- migration: fix COLO broken caused by a previous commit -- migration/multifd: fix hangup with TLS-Multifd due to blocking handshake -- multifd/tls: fix memoryleak of the QIOChannelSocket object when cancelling migration - -* Wed Nov 18 2020 Huawei Technologies Co., Ltd -- ati: check x y display parameter values - -* Fri Nov 13 2020 Huawei Technologies Co., Ltd -- json: Fix a memleak in parse_pair() - -* Wed Nov 11 2020 Huawei Technologies Co., Ltd -- hw: usb: hcd-ohci: check for processed TD before retire -- hw: ehci: check return value of 'usb_packet_map' -- hw: usb: hcd-ohci: check len and frame_number variables -- hw/net/e1000e: advance desc_offset in case of null descriptor - -* Fri Oct 30 2020 Huawei Technologies Co., Ltd -- migration/dirtyrate: setup up query-dirtyrate framwork -- migration/dirtyrate: add DirtyRateStatus to denote calculation status -- migration/dirtyrate: Add RamblockDirtyInfo to store sampled page info -- migration/dirtyrate: Add dirtyrate statistics series functions -- migration/dirtyrate: move RAMBLOCK_FOREACH_MIGRATABLE into ram.h -- migration/dirtyrate: Record hash results for each sampled page -- migration/dirtyrate: Compare page hash results for recorded sampled page -- migration/dirtyrate: skip sampling ramblock with size below MIN_RAMBLOCK_SIZE -- migration/dirtyrate: Implement set_sample_page_period() and is_sample_period_valid() -- migration/dirtyrate: Implement calculate_dirtyrate() function -- migration/dirtyrate: Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function -- migration/dirtyrate: Add trace_calls to make it easier to debug -- migration/dirtyrate: record start_time and calc_time while at the measuring state -- migration/dirtyrate: present dirty rate only when querying the rate has completed -- migration/dirtyrate: simplify includes in dirtyrate.c - -* Fri Oct 30 2020 Huawei Technologies Co., Ltd -- elf2dmp: Fix memory leak on main() error paths -- io: Don't use '#' flag of printf format -- hw/display/omap_lcdc: Fix potential NULL pointer dereference -- hw/display/exynos4210_fimd: Fix potential NULL pointer dereference -- block/vvfat: Fix bad printf format specifiers -- block: Remove unused include -- ssi: Fix bad printf format specifiers -- net/l2tpv3: Remove redundant check in net_init_l2tpv3() - -* Thu Oct 29 2020 Huawei Technologies Co., Ltd -- Bugfix: hw/acpi: Use max_cpus instead of cpus when build PPTT table - -* Wed Oct 21 2020 Huawei Technologies Co., Ltd -- net: remove an assert call in eth_get_gso_type - -* Wed Oct 14 2020 Prasad J Pandit -- pci: check bus pointer before dereference -- hw/ide: check null block before _cancel_dma_sync - -* Mon Sep 28 2020 Huawei Technologies Co., Ltd -- sm501: Replace hand written implementation with pixman where possible -- sm501: Clean up local variables in sm501_2d_operation -- sm501: Use BIT(x) macro to shorten constant -- sm501: Shorten long variable names in sm501_2d_operation -- sm501: Convert printf + abort to qemu_log_mask -- hw/net/net_tx_pkt: fix assertion failure in net_tx_pkt_add_raw_fragment -- hw/net/xgmac: Fix buffer overflow in xgmac_enet_send() - -* Fri Sep 18 2020 Huawei Technologies Co., Ltd -- hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch -- hw-xhci-check-return-value-of-usb_packet_map.patch - -* Fri Sep 11 2020 Huawei Technologies Co., Ltd -- slirp/src/ip6_input.c: fix out-of-bounds read information vulnerability - -* Tue Sep 08 2020 Huawei Technologies Co., Ltd -- target/arm: ignore evtstrm and cpuid CPU features - -* Fri Aug 21 2020 Huawei Technologies Co., Ltd -- hw/usb/core.c: fix buffer overflow in do_token_setup function - -* Wed Aug 19 2020 Huawei Technologies Co., Ltd -- target-arm-convert-isar-regs-to-array.patch -- target-arm-parse-cpu-feature-related-options.patch -- target-arm-register-CPU-features-for-property.patch -- target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch -- target-arm-introduce-CPU-feature-dependency-mechanis.patch -- target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch -- target-arm-Add-CPU-features-to-query-cpu-model-expan.patch -- target-arm-Update-ID-fields.patch -- target-arm-Add-more-CPU-features.patch - -* Wed Aug 19 2020 Huawei Technologies Co., Ltd -- target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch -- target-arm-Add-ID_AA64MMFR2_EL1.patch -- target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch -- target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch -- target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch -- target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch -- target-arm-Stop-assuming-DBGDIDR-always-exists.patch -- target-arm-Move-DBGDIDR-into-ARMISARegisters.patch -- target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch -- target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch -- target-arm-Read-debug-related-ID-registers-from-KVM.patch -- target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch -- target-arm-monitor-query-cpu-model-expansion-crashed.patch - -* Tue Aug 18 2020 Huawei Technologies Co., Ltd -- hw/acpi/aml-build.c: build smt processor structure to support smt topology - -* Thu Aug 13 2020 Huawei Technologies Co., Ltd --target/arm: Aarch64 support vtpm - -* Wed Aug 12 2020 Huawei Technologies Co., Ltd -- backport upstream patch to support SHPCHotplug in arm - -* Thu Aug 6 2020 Huawei Technologies Co., Ltd -- es1370: check total frame count against current frame -- exec: set map length to zero when returning NULL -- ati-vga: check mm_index before recursive call (CVE-2020-13800) -- megasas: use unsigned type for reply_queue_head and check index -- megasas: avoid NULL pointer dereference -- megasas: use unsigned type for positive numeric fields -- hw/scsi/megasas: Fix possible out-of-bounds array access in tracepoints - -* Thu Aug 6 2020 Huawei Technologies Co., Ltd -- tests: Disalbe filemonitor testcase - -* Sat Jun 20 2020 Huawei Technologies Co., Ltd -- target/arm: Fix PAuth sbox functions -- fix two patches' format which can cause git am failed - -* Fri May 29 2020 Huawei Technologies Co., Ltd -- target/arm: Add the kvm_adjvtime vcpu property for Cortex-A72 - -* Wed May 27 2020 Huawei Technologies Co., Ltd. -- Revert: "vtimer: compat cross version migration from v4.0.1" -- ARM64: record vtimer tick when cpu is stopped -- hw/arm/virt: add missing compat for kvm-no-adjvtime -- migration: Compat virtual timer adjust for v4.0.1 and v4.1.0 -- vtimer: Drop vtimer virtual timer adjust - -* Fri May 22 2020 Huawei Technologies Co., Ltd. -- ip_reass: Fix use after free -- bt: use size_t type for length parameters instead of int -- log: Add some logs on VM runtime path - -* Fri May 15 2020 Huawei Technologies Co., Ltd. -- ide: Fix incorrect handling of some PRDTs in ide_dma_cb() -- ati-vga: Fix checks in ati_2d_blt() to avoid crash -- slirp: tftp: restrict relative path access - -* Tue May 12 2020 Huawei Technologies Co., Ltd. -- arm/virt: Support CPU cold plug - -* Sat May 9 2020 Huawei Technologies Co., Ltd. -- migration/ram: do error_free after migrate_set_error to avoid memleaks. -- migration/ram: fix memleaks in multifd_new_send_channel_async. -- migration/rdma: fix a memleak on error path in rdma_start_incoming_migration. - -* Fri May 8 2020 Huawei Technologies Co., Ltd. -- vtimer: compat cross version migration from v4.0.1 - -* Fri Apr 24 2020 Huawei Technologies Co., Ltd. -- migration: backport migration patches from upstream - -* Fri Apr 24 2020 Huawei Technologies Co., Ltd. -- arm/virt: Add CPU hotplug support - -* Wed Apr 22 2020 Huawei Technologies Co., Ltd. -- backport patch to enable arm/virt memory hotplug - -* Wed Apr 22 2020 Huawei Technologies Co., Ltd. -- backport patch to enable target/arm/kvm Adjust virtual time - -* Fri Apr 17 2020 Huawei Technologies Co., Ltd. -- backport patch bundles from qemu stable v4.1.1 - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- aio-wait: delegate polling of main AioContext if BQL not held -- async: use explicit memory barriers - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- pcie: Add pcie-root-port fast plug/unplug feature -- pcie: Compat with devices which do not support Link Width, such as ioh3420 - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- qcow2-bitmap: Fix uint64_t left-shift overflow - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- COLO-compare: Fix incorrect `if` logic - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- block/backup: fix max_transfer handling for copy_range -- block/backup: fix backup_cow_with_offload for last cluster -- qcow2: Limit total allocation range to INT_MAX -- mirror: Do not dereference invalid pointers - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- file-posix: Handle undetectable alignment - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- vhost: Fix memory region section comparison -- memory: Provide an equality function for MemoryRegionSections -- memory: Align MemoryRegionSections fields - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- block/iscsi: use MIN() between mx_sb_len and sb_len_wr -- moniter: fix memleak in monitor_fdset_dup_fd_find_remove - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- tcp_emu: fix unsafe snprintf() usages -- util: add slirp_fmt() helpers -- slirp: use correct size while emulating commands -- slirp: use correct size while emulating IRC commands -- tcp_emu: Fix oob access -- iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- 9pfs: local: Fix possible memory leak in local_link() +- block: Add error retry param setting +- block-backend: Add timeout support for retry +- block-backend: Enable retry action on errors +- block-backend: Add device specific retry callback +- block-backend: Introduce retry timer +- qapi/block-core: Add retry option for error action +- scsi: bugfix: fix division by zero +- scsi: cdrom: Fix crash after remote cdrom detached +- qemu-pr: fixed ioctl failed for multipath disk - scsi-disk: define props in scsi_block_disk to avoid memleaks -- arm/translate-a64: fix uninitialized variable warning -- block: fix memleaks in bdrv_refresh_filename -- vnc: fix memory leak when vnc disconnect -- block: fix memleaks in bdrv_refresh_filename - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- linux headers: update against "KVM/ARM: Fix >256 vcpus" -- intc/arm_gic: Support IRQ injection for more than 256 vcpus -- ARM: KVM: Check KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 for smp_cpus > - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- vnc: fix memory leak when vnc disconnect - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- pcie: disable the PCI_EXP_LINKSTA_DLLA cap for pcie-root-port by default - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- cpu: add Kunpeng-920 cpu support -- cpu: parse +/- feature to avoid failure -- cpu: add Cortex-A72 processor kvm target support - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- vhost-user-scsi: prevent using uninitialized vqs - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- util/async: hold AioContext ref to prevent use-after-free - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- xhci: Fix memory leak in xhci_address_slot -- xhci: Fix memory leak in xhci_kick_epctx -- ehci: fix queue->dev null ptr dereference - -* Thu Apr 16 2020 Huawei Technologies Co., Ltd. -- tests/bios-tables-test: disable this testcase -- hw/arm/virt: Introduce cpu topology support -- hw/arm64: add vcpu cache info support - -* Wed Apr 15 2020 Huawei Technologies Co., Ltd. -- smbios: Add missing member of type 4 for smbios 3.0 - -* Wed Apr 15 2020 Huawei Technologies Co., Ltd. -- bios-tables-test: prepare to change ARM virt ACPI DSDT -- arm64: Add the cpufreq device to show cpufreq info to guest - -* Wed Apr 15 2020 Huawei Technologies Co., Ltd. -- qcow2: fix memory leak in qcow2_read_extensions - -* Wed Apr 15 2020 Huawei Technologies Co., Ltd. -- pl011: reset read FIFIO when UARTTIMSC=0 & UARTICR=0xff -- pl031: support rtc-timer property for pl031 -- vhost: cancel migration when vhost-user restarted - -* Mon Apr 13 2020 openEuler Buildteam - version-release +- bugfix: fix possible memory leak +- bugfix: fix some illegal memory access and memory leak +- util/log: add CONFIG_DISABLE_QEMU_LOG macro +- log: Add some logs on VM runtime path +- bugfix: fix eventfds may double free when vm_id reused in ivshmem +- hw/display/macfb: Fix missing ERRP_GUARD() in macfb_nubus_realize() +- hw/cxl/cxl-host: Fix missing ERRP_GUARD() in cxl_fixed_memory_window_config() +- qemu-img create: 'cache' paramter only use for reg file image +- qemu-img: add qemu-img direct create +- qemu-img block: set zero flags only when discard_zeros of the block supported +- Revert "file-posix: Remove unused s->discard_zeroes" +- pcie_sriov: Validate NumVFs (CVE-2024-26327) +- hw/nvme: Use pcie_sriov_num_vfs() (CVE-2024-26328) +- hw/acpi/cpu: Use CPUState typedef +- target/i386/sev: Fix missing ERRP_GUARD() for error_prepend() +- virtio-gpu: remove needless condition +- hw/i2c/smbus_slave: Add object path on error prints +- vfio/pci: Ascend710 change to bar2 quirk +- vfio/pci: Ascend910 need 4Bytes quirk in bar0 +- vfio/pci: Ascend710 need 4Bytes quirk in bar0 +- vfio/pci: Ascend310 need 4Bytes quirk in bar4 +- chardev/char-socket: Fix TLS io channels sending too much data to the backend +- i386/cpuid: Move leaf 7 to correct group +- i386/cpuid: Decrease cpuid_i when skipping CPUID leaf 1F +- i386/cpu: Mask with XCR0/XSS mask for FEAT_XSAVE_XCR0_HI and FEAT_XSAVE_XSS_HI leafs +- i386/cpu: Clear FEAT_XSAVE_XSS_LO/HI leafs when CPUID_EXT_XSAVE is not available +- blkio: Respect memory-alignment for bounce buffer allocations +- virtio-gpu: Correct virgl_renderer_resource_get_info() error check +- hw/usb: Style cleanup +- tests/qemu-iotests: resolved the problem that the 108 test cases in the container fail + +* Thu Feb 29 2024 Tao Yang - 11:8.2.0-1 - Package init diff --git a/qga-Add-log-to-guest-fsfreeze-thaw-command.patch b/qga-Add-log-to-guest-fsfreeze-thaw-command.patch new file mode 100644 index 0000000000000000000000000000000000000000..693f089ff513589ddffe4bf2e0f267f51e658558 --- /dev/null +++ b/qga-Add-log-to-guest-fsfreeze-thaw-command.patch @@ -0,0 +1,48 @@ +From 3a14516128cf936906e5f519bf7808b9a977a757 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Fri, 7 Mar 2025 21:57:29 -0500 +Subject: [PATCH] qga: Add log to guest-fsfreeze-thaw command +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from ad1e6843632555c771dda6a9425930fa25b71fb3 + +Reviewed-by: Daniel P. Berrangé +Message-ID: <20241216154552.213961-2-kkostiuk@redhat.com> +Signed-off-by: Konstantin Kostiuk +Signed-off-by: qihao_yewu +--- + qga/commands-posix.c | 1 + + qga/commands-win32.c | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 6169bbf7a0..f0d8e9e9c5 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -759,6 +759,7 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp) + ret = qmp_guest_fsfreeze_do_thaw(errp); + if (ret >= 0) { + ga_unset_frozen(ga_state); ++ slog("guest-fsthaw called"); + execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp); + } else { + ret = 0; +diff --git a/qga/commands-win32.c b/qga/commands-win32.c +index 697c65507c..656d1459f1 100644 +--- a/qga/commands-win32.c ++++ b/qga/commands-win32.c +@@ -1275,6 +1275,9 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp) + qga_vss_fsfreeze(&i, false, NULL, errp); + + ga_unset_frozen(ga_state); ++ ++ slog("guest-fsthaw called"); ++ + return i; + } + +-- +2.41.0.windows.1 + diff --git a/qga-Don-t-daemonize-before-channel-is-initialized.patch b/qga-Don-t-daemonize-before-channel-is-initialized.patch new file mode 100644 index 0000000000000000000000000000000000000000..0f76c48486338ccfea82d0c65f8392d7d5e8b9af --- /dev/null +++ b/qga-Don-t-daemonize-before-channel-is-initialized.patch @@ -0,0 +1,106 @@ +From 752d98d93459c87817be5e02c39257e0fa5934f8 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Fri, 7 Mar 2025 21:07:11 -0500 +Subject: [PATCH] qga: Don't daemonize before channel is initialized +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from c6f5dd7ac8ef62dcdec4cdeda1467c658161afff + +If the agent is set to daemonize but for whatever reason fails to +init the channel, the error message is lost. Worse, the agent +daemonizes needlessly and returns success. For instance: + + # qemu-ga -m virtio-serial \ + -p /dev/nonexistent_device \ + -f /run/qemu-ga.pid \ + -t /run \ + -d + # echo $? + 0 + +This makes it needlessly hard for init scripts to detect a +failure in qemu-ga startup. Though, they shouldn't pass '-d' in +the first place. + +Let's open the channel first and only after that become a daemon. + +Related bug: https://bugs.gentoo.org/810628 + +Signed-off-by: Michal Privoznik +Reviewed-by: Ján Tomko +Reviewed-by: Konstantin Kostiuk +Message-ID: <7a42b0cbda5c7e01cf76bc1b29a1210cd018fa78.1736261360.git.mprivozn@redhat.com> +Signed-off-by: Konstantin Kostiuk +Signed-off-by: qihao_yewu +--- + qga/main.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +diff --git a/qga/main.c b/qga/main.c +index c4dcbb86be..8d341ffdf1 100644 +--- a/qga/main.c ++++ b/qga/main.c +@@ -1407,7 +1407,6 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + if (config->daemonize) { + /* delay opening/locking of pidfile till filesystems are unfrozen */ + s->deferred_options.pid_filepath = config->pid_filepath; +- become_daemon(NULL); + } + if (config->log_filepath) { + /* delay opening the log file till filesystems are unfrozen */ +@@ -1416,9 +1415,6 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + ga_disable_logging(s); + qmp_for_each_command(&ga_commands, ga_disable_not_allowed_freeze, NULL); + } else { +- if (config->daemonize) { +- become_daemon(config->pid_filepath); +- } + if (config->log_filepath) { + FILE *log_file = ga_open_logfile(config->log_filepath); + if (!log_file) { +@@ -1482,6 +1478,20 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + } + #endif + ++ if (!channel_init(s, s->config->method, s->config->channel_path, ++ s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) { ++ g_critical("failed to initialize guest agent channel"); ++ return NULL; ++ } ++ ++ if (config->daemonize) { ++ if (ga_is_frozen(s)) { ++ become_daemon(NULL); ++ } else { ++ become_daemon(config->pid_filepath); ++ } ++ } ++ + ga_state = s; + return s; + failed: +@@ -1516,8 +1526,9 @@ static void cleanup_agent(GAState *s) + + static int run_agent_once(GAState *s) + { +- if (!channel_init(s, s->config->method, s->config->channel_path, +- s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) { ++ if (!s->channel && ++ channel_init(s, s->config->method, s->config->channel_path, ++ s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) { + g_critical("failed to initialize guest agent channel"); + return EXIT_FAILURE; + } +@@ -1526,6 +1537,7 @@ static int run_agent_once(GAState *s) + + if (s->channel) { + ga_channel_free(s->channel); ++ s->channel = NULL; + } + + return EXIT_SUCCESS; +-- +2.41.0.windows.1 + diff --git a/qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch b/qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch deleted file mode 100644 index a901a500181bb9a36f9bd307d8bdee5929b6144f..0000000000000000000000000000000000000000 --- a/qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 1580682eafb489eaf417456778267662629cf696 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Tue, 30 Jun 2020 11:03:33 +0200 -Subject: [PATCH 05/11] qga: Plug unlikely memory leak in - guest-set-memory-blocks - -transfer_memory_block() leaks an Error object when reading file -/sys/devices/system/memory/memory/state fails with errno other -than ENOENT, and @sys2memblk is false, i.e. when the state file exists -but cannot be read (seems quite unlikely), and this is -guest-set-memory-blocks, not guest-get-memory-blocks. - -Plug the leak. - -Fixes: bd240fca42d5f072fb758a71720d9de9990ac553 -Cc: Michael Roth -Cc: Hailiang Zhang -Signed-off-by: Markus Armbruster -Reviewed-by: zhanghailiang -Message-Id: <20200630090351.1247703-9-armbru@redhat.com> -Signed-off-by: BiaoXiang Ye ---- - qga/commands-posix.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index dfc05f5b..c318cee7 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -2420,6 +2420,7 @@ static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk, - if (sys2memblk) { - error_propagate(errp, local_err); - } else { -+ error_free(local_err); - result->response = - GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED; - } --- -2.27.0.dirty - diff --git a/qga-commands-posix-fix-use-after-free-of-local_err.patch b/qga-commands-posix-fix-use-after-free-of-local_err.patch deleted file mode 100644 index 9628d0c59445c9d29ddaa39e6fb271fe73a5c274..0000000000000000000000000000000000000000 --- a/qga-commands-posix-fix-use-after-free-of-local_err.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 15847279f29b0bd67b95daefff395cab8fad80d3 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 24 Mar 2020 18:36:30 +0300 -Subject: [PATCH 4/5] qga/commands-posix: fix use after free of local_err - -local_err is used several times in guest_suspend(). Setting non-NULL -local_err will crash, so let's zero it after freeing. Also fix possible -leak of local_err in final if(). - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200324153630.11882-7-vsementsov@virtuozzo.com> -Reviewed-by: Richard Henderson -Signed-off-by: Markus Armbruster -Signed-off-by: Zhenyu Ye ---- - qga/commands-posix.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index dfc05f5b..66164e6c 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -1760,6 +1760,7 @@ static void guest_suspend(SuspendMode mode, Error **errp) - } - - error_free(local_err); -+ local_err = NULL; - - if (pmutils_supports_mode(mode, &local_err)) { - mode_supported = true; -@@ -1771,6 +1772,7 @@ static void guest_suspend(SuspendMode mode, Error **errp) - } - - error_free(local_err); -+ local_err = NULL; - - if (linux_sys_state_supports_mode(mode, &local_err)) { - mode_supported = true; -@@ -1778,6 +1780,7 @@ static void guest_suspend(SuspendMode mode, Error **errp) - } - - if (!mode_supported) { -+ error_free(local_err); - error_setg(errp, - "the requested suspend mode is not supported by the guest"); - } else { --- -2.22.0.windows.1 - diff --git a/qga-fix-assert-regression-on-guest-shutdown.patch b/qga-fix-assert-regression-on-guest-shutdown.patch deleted file mode 100644 index c5f1e1069b5097ff1adf2328bea6a25e9483cda1..0000000000000000000000000000000000000000 --- a/qga-fix-assert-regression-on-guest-shutdown.patch +++ /dev/null @@ -1,47 +0,0 @@ -From aeccff89333c565c7a894f99c17c0044d7d43be2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 4 Jun 2020 11:44:25 +0200 -Subject: [PATCH 02/11] qga: fix assert regression on guest-shutdown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Since commit 781f2b3d1e ("qga: process_event() simplification"), -send_response() is called unconditionally, but will assert when "rsp" is -NULL. This may happen with QCO_NO_SUCCESS_RESP commands, such as -"guest-shutdown". - -Fixes: 781f2b3d1e5ef389b44016a897fd55e7a780bf35 -Cc: Michael Roth -Reported-by: Christian Ehrhardt -Signed-off-by: Marc-André Lureau -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Christian Ehrhardt -Tested-by: Christian Ehrhardt -Cc: qemu-stable@nongnu.org -Signed-off-by: Michael Roth -Signed-off-by: BiaoXiang Ye ---- - qga/main.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/qga/main.c b/qga/main.c -index c35c2a21..12fa463f 100644 ---- a/qga/main.c -+++ b/qga/main.c -@@ -529,7 +529,11 @@ static int send_response(GAState *s, const QDict *rsp) - QString *payload_qstr, *response_qstr; - GIOStatus status; - -- g_assert(rsp && s->channel); -+ g_assert(s->channel); -+ -+ if (!rsp) { -+ return 0; -+ } - - payload_qstr = qobject_to_json(QOBJECT(rsp)); - if (!payload_qstr) { --- -2.27.0.dirty - diff --git a/qio-Inherit-follow_coroutine_ctx-across-TLS.patch b/qio-Inherit-follow_coroutine_ctx-across-TLS.patch new file mode 100644 index 0000000000000000000000000000000000000000..81909d57f9eb98f127c98128a2776da8b7a9933c --- /dev/null +++ b/qio-Inherit-follow_coroutine_ctx-across-TLS.patch @@ -0,0 +1,121 @@ +From 4dccc6603af2cd3deefb6ac94c3e7aec4b60485d Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 17 May 2024 21:50:14 -0500 +Subject: [PATCH] qio: Inherit follow_coroutine_ctx across TLS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Since qemu 8.2, the combination of NBD + TLS + iothread crashes on an +assertion failure: + +qemu-kvm: ../io/channel.c:534: void qio_channel_restart_read(void *): Assertion `qemu_get_current_aio_context() == qemu_coroutine_get_aio_context(co)' failed. + +It turns out that when we removed AioContext locking, we did so by +having NBD tell its qio channels that it wanted to opt in to +qio_channel_set_follow_coroutine_ctx(); but while we opted in on the +main channel, we did not opt in on the TLS wrapper channel. +qemu-iotests has coverage of NBD+iothread and NBD+TLS, but apparently +no coverage of NBD+TLS+iothread, or we would have noticed this +regression sooner. (I'll add that in the next patch) + +But while we could manually opt in to the TLS channel in nbd/server.c +(a one-line change), it is more generic if all qio channels that wrap +other channels inherit the follow status, in the same way that they +inherit feature bits. + +CC: Stefan Hajnoczi +CC: Daniel P. Berrangé +CC: qemu-stable@nongnu.org +Fixes: https://issues.redhat.com/browse/RHEL-34786 +Fixes: 06e0f098 ("io: follow coroutine AioContext in qio_channel_yield()", v8.2.0) +Signed-off-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-ID: <20240518025246.791593-5-eblake@redhat.com> +(cherry picked from commit 199e84de1c903ba5aa1f7256310bbc4a20dd930b) +Signed-off-by: zhujun2 +--- + io/channel-tls.c | 26 +++++++++++++++----------- + io/channel-websock.c | 1 + + 2 files changed, 16 insertions(+), 11 deletions(-) + +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 58fe1aceee..a8ad89c3d1 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -69,37 +69,40 @@ qio_channel_tls_new_server(QIOChannel *master, + const char *aclname, + Error **errp) + { +- QIOChannelTLS *ioc; ++ QIOChannelTLS *tioc; ++ QIOChannel *ioc; + +- ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ tioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ ioc = QIO_CHANNEL(tioc); + +- ioc->master = master; ++ tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { +- qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SHUTDOWN); ++ qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } + object_ref(OBJECT(master)); + +- ioc->session = qcrypto_tls_session_new( ++ tioc->session = qcrypto_tls_session_new( + creds, + NULL, + aclname, + QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, + errp); +- if (!ioc->session) { ++ if (!tioc->session) { + goto error; + } + + qcrypto_tls_session_set_callbacks( +- ioc->session, ++ tioc->session, + qio_channel_tls_write_handler, + qio_channel_tls_read_handler, +- ioc); ++ tioc); + +- trace_qio_channel_tls_new_server(ioc, master, creds, aclname); +- return ioc; ++ trace_qio_channel_tls_new_server(tioc, master, creds, aclname); ++ return tioc; + + error: +- object_unref(OBJECT(ioc)); ++ object_unref(OBJECT(tioc)); + return NULL; + } + +@@ -116,6 +119,7 @@ qio_channel_tls_new_client(QIOChannel *master, + ioc = QIO_CHANNEL(tioc); + + tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +diff --git a/io/channel-websock.c b/io/channel-websock.c +index a12acc27cf..de39f0d182 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -883,6 +883,7 @@ qio_channel_websock_new_server(QIOChannel *master) + ioc = QIO_CHANNEL(wioc); + + wioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +-- +2.41.0.windows.1 + diff --git a/qmp-fix-leak-on-callbacks-that-return-both-value-and.patch b/qmp-fix-leak-on-callbacks-that-return-both-value-and.patch deleted file mode 100644 index 1ceb1e70b84f1e1a9a3f785ff2d4d55b697a7cb4..0000000000000000000000000000000000000000 --- a/qmp-fix-leak-on-callbacks-that-return-both-value-and.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 1f1949368d4ac7a18973aa83a074daf01daf97ad Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 25 Mar 2020 19:47:22 +0100 -Subject: [PATCH 3/5] qmp: fix leak on callbacks that return both value and - error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Direct leak of 4120 byte(s) in 1 object(s) allocated from: - #0 0x7fa114931887 in __interceptor_calloc (/lib64/libasan.so.6+0xb0887) - #1 0x7fa1144ad8f0 in g_malloc0 (/lib64/libglib-2.0.so.0+0x588f0) - #2 0x561e3c9c8897 in qmp_object_add /home/elmarco/src/qemu/qom/qom-qmp-cmds.c:291 - #3 0x561e3cf48736 in qmp_dispatch /home/elmarco/src/qemu/qapi/qmp-dispatch.c:155 - #4 0x561e3c8efb36 in monitor_qmp_dispatch /home/elmarco/src/qemu/monitor/qmp.c:145 - #5 0x561e3c8f09ed in monitor_qmp_bh_dispatcher /home/elmarco/src/qemu/monitor/qmp.c:234 - #6 0x561e3d08c993 in aio_bh_call /home/elmarco/src/qemu/util/async.c:136 - #7 0x561e3d08d0a5 in aio_bh_poll /home/elmarco/src/qemu/util/async.c:164 - #8 0x561e3d0a535a in aio_dispatch /home/elmarco/src/qemu/util/aio-posix.c:380 - #9 0x561e3d08e3ca in aio_ctx_dispatch /home/elmarco/src/qemu/util/async.c:298 - #10 0x7fa1144a776e in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x5276e) - -Signed-off-by: Marc-André Lureau -Message-Id: <20200325184723.2029630-3-marcandre.lureau@redhat.com> -Reviewed-by: Markus Armbruster -Signed-off-by: Paolo Bonzini -Signed-off-by: Zhenyu Ye ---- - qapi/qmp-dispatch.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c -index 6dfdad57..a635abb9 100644 ---- a/qapi/qmp-dispatch.c -+++ b/qapi/qmp-dispatch.c -@@ -189,6 +189,8 @@ QDict *qmp_dispatch(QmpCommandList *cmds, QObject *request, - - ret = do_qmp_dispatch(cmds, request, allow_oob, &err); - if (err) { -+ /* or assert(!ret) after reviewing all handlers: */ -+ qobject_unref(ret); - rsp = qmp_error_response(err); - } else if (ret) { - rsp = qdict_new(); --- -2.22.0.windows.1 - diff --git a/ram-block-attribute-Add-priority-listener-support-fo.patch b/ram-block-attribute-Add-priority-listener-support-fo.patch new file mode 100644 index 0000000000000000000000000000000000000000..908c904b1d87900798706f01858c4f7d3f932172 --- /dev/null +++ b/ram-block-attribute-Add-priority-listener-support-fo.patch @@ -0,0 +1,99 @@ +From 71e7d77e5724b77fdba7bab48ef44e92b8e0c1ee Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:32 +0800 +Subject: [PATCH] ram-block-attribute: Add priority listener support for + PrivateSharedListener + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/ed4157b155b571b62c4d88ca297909dbcb3922ed + +In-place page conversion requires operations to follow a specific +sequence: unmap-before-conversion-to-private and +map-after-conversion-to-shared. Currently, both attribute changes and +VFIO DMA map/unmap operations are handled by PrivateSharedListeners, +they need to be invoked in a specific order. + +For private to shared conversion: +- Change attribute to shared. +- VFIO populates the shared mappings into the IOMMU. +- Restore attribute if the operation fails. + +For shared to private conversion: +- VFIO discards shared mapping from the IOMMU. +- Change attribute to private. + +To faciliate this sequence, priority support is added to +PrivateSharedListener so that listeners are stored in a determined +order based on priority. A tail queue is used to store listeners, +allowing traversal in either direction. + +Signed-off-by: Chenyi Qiang +Conflicts: + include/exec/ramblock.h + system/ram-block-attribute.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/vfio/common.c | 3 ++- + include/exec/memory.h | 19 +++++++++++++++++-- + 2 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 182874eccb..c0bc61fdee 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -530,7 +530,8 @@ static void vfio_register_private_shared_listener(VFIOContainerBase *bcontainer, + + psl = &vpsl->listener; + private_shared_listener_init(psl, vfio_private_shared_notify_to_shared, +- vfio_private_shared_notify_to_private); ++ vfio_private_shared_notify_to_private, ++ PRIVATE_SHARED_LISTENER_PRIORITY_COMMON); + generic_state_manager_register_listener(gsm, &psl->scl, section); + QLIST_INSERT_HEAD(&bcontainer->vpsl_list, vpsl, next); + } +diff --git a/include/exec/memory.h b/include/exec/memory.h +index b93ffb533e..51fe10d4a0 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -827,11 +827,24 @@ struct RamDiscardManagerClass { + GenericStateManagerClass parent_class; + }; + ++#define PRIVATE_SHARED_LISTENER_PRIORITY_MIN 0 ++#define PRIVATE_SHARED_LISTENER_PRIORITY_COMMON 10 ++ + typedef struct PrivateSharedListener PrivateSharedListener; + struct PrivateSharedListener { + struct StateChangeListener scl; + +- QLIST_ENTRY(PrivateSharedListener) next; ++ /* ++ * @priority: ++ * ++ * Govern the order in which ram discard listeners are invoked. Lower priorities ++ * are invoked earlier. ++ * The listener priority can help to undo the effects of previous listeners in ++ * a reverse order in case of a failure callback. ++ */ ++ int priority; ++ ++ QTAILQ_ENTRY(PrivateSharedListener) next; + }; + + struct PrivateSharedManagerClass { +@@ -841,9 +854,11 @@ struct PrivateSharedManagerClass { + + static inline void private_shared_listener_init(PrivateSharedListener *psl, + NotifyStateSet populate_fn, +- NotifyStateClear discard_fn) ++ NotifyStateClear discard_fn, ++ int priority) + { + state_change_listener_init(&psl->scl, populate_fn, discard_fn); ++ psl->priority = priority; + } + + bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, +-- +2.33.0 + diff --git a/range-Introduce-range_get_last_bit.patch b/range-Introduce-range_get_last_bit.patch new file mode 100644 index 0000000000000000000000000000000000000000..427a14d8d1c8660c84a075f3436d5166a512c4fb --- /dev/null +++ b/range-Introduce-range_get_last_bit.patch @@ -0,0 +1,52 @@ +From 30150b8727e9ec41f83c4dfcd93f04b766357469 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:31 +0800 +Subject: [PATCH] range: Introduce range_get_last_bit() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This helper get the highest 1 bit position of the upper bound. + +If the range is empty or upper bound is zero, -1 is returned. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + include/qemu/range.h | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/include/qemu/range.h b/include/qemu/range.h +index 205e1da76d..4ce694a398 100644 +--- a/include/qemu/range.h ++++ b/include/qemu/range.h +@@ -20,6 +20,8 @@ + #ifndef QEMU_RANGE_H + #define QEMU_RANGE_H + ++#include "qemu/bitops.h" ++ + /* + * Operations on 64 bit address ranges. + * Notes: +@@ -217,6 +219,15 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1, + return !(last2 < first1 || last1 < first2); + } + ++/* Get highest non-zero bit position of a range */ ++static inline int range_get_last_bit(Range *range) ++{ ++ if (range_is_empty(range)) { ++ return -1; ++ } ++ return 63 - clz64(range->upb); ++} ++ + /* + * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap. + * Both @a and @b must not be empty. +-- +2.41.0.windows.1 + diff --git a/raw-format-Fix-error-message-for-invalid-offset-size.patch b/raw-format-Fix-error-message-for-invalid-offset-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..723aee8a8adccaefd905911026e0a850d29b6b6d --- /dev/null +++ b/raw-format-Fix-error-message-for-invalid-offset-size.patch @@ -0,0 +1,48 @@ +From 16fb3ec642af7ec7980b7ceff1b25abee3fecee2 Mon Sep 17 00:00:00 2001 +From: Susanooo +Date: Fri, 25 Oct 2024 09:20:38 +0800 +Subject: [PATCH] raw-format: Fix error message for invalid offset/size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +s->offset and s->size are only set at the end of the function and still +contain the old values when formatting the error message. Print the +parameters with the new values that we actually checked instead. + +Fixes: 500e243 ('raw-format: Split raw_read_options()') +Signed-off-by: Kevin Wolf +Message-ID: <20240829185527.47152-1-kwolf@redhat.com> +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +Signed-off-by: zhangchujun +--- + block/raw-format.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/raw-format.c b/block/raw-format.c +index 1111dffd54..8195ed87cc 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -111,7 +111,7 @@ raw_apply_options(BlockDriverState *bs, BDRVRawState *s, uint64_t offset, + if (offset > real_size) { + error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than " + "size of the containing file (%" PRId64 ")", +- s->offset, real_size); ++ offset, real_size); + return -EINVAL; + } + +@@ -119,7 +119,7 @@ raw_apply_options(BlockDriverState *bs, BDRVRawState *s, uint64_t offset, + error_setg(errp, "The sum of offset (%" PRIu64 ") and size " + "(%" PRIu64 ") has to be smaller or equal to the " + " actual size of the containing file (%" PRId64 ")", +- s->offset, s->size, real_size); ++ offset, size, real_size); + return -EINVAL; + } + +-- +2.41.0.windows.1 + diff --git a/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch b/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch deleted file mode 100644 index 00e672662ddd5d848fc031967a0efdcf9dc4432b..0000000000000000000000000000000000000000 --- a/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch +++ /dev/null @@ -1,54 +0,0 @@ -From fc5afb1a9230fe21d76bcef527b0d3cee90a2cd3 Mon Sep 17 00:00:00 2001 -From: Michael Roth -Date: Thu, 12 Sep 2019 18:12:02 -0500 -Subject: [PATCH] roms/Makefile.edk2: don't pull in submodules when building - from tarball -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently the `make efi` target pulls submodules nested under the -roms/edk2 submodule as dependencies. However, when we attempt to build -from a tarball this fails since we are no longer in a git tree. - -A preceding patch will pre-populate these submodules in the tarball, -so assume this build dependency is only needed when building from a -git tree. - -Cc: Laszlo Ersek -Cc: Bruce Rogers -Cc: qemu-stable@nongnu.org # v4.1.0 -Reported-by: Bruce Rogers -Reviewed-by: Laszlo Ersek -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Michael Roth -Message-Id: <20190912231202.12327-3-mdroth@linux.vnet.ibm.com> -Signed-off-by: Philippe Mathieu-Daudé -(cherry picked from commit f3e330e3c319160ac04954399b5a10afc965098c) -Signed-off-by: Michael Roth ---- - roms/Makefile.edk2 | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/roms/Makefile.edk2 b/roms/Makefile.edk2 -index c2f2ff59d5..33a074d3a4 100644 ---- a/roms/Makefile.edk2 -+++ b/roms/Makefile.edk2 -@@ -46,8 +46,13 @@ all: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd.bz2) \ - # files. - .INTERMEDIATE: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd) - -+# Fetch edk2 submodule's submodules. If it is not in a git tree, assume -+# we're building from a tarball and that they've already been fetched by -+# make-release/tarball scripts. - submodules: -- cd edk2 && git submodule update --init --force -+ if test -d edk2/.git; then \ -+ cd edk2 && git submodule update --init --force; \ -+ fi - - # See notes on the ".NOTPARALLEL" target and the "+" indicator in - # "tests/uefi-test-tools/Makefile". --- -2.23.0 diff --git a/s390x-sclp-Simplify-get_sclp_device.patch b/s390x-sclp-Simplify-get_sclp_device.patch new file mode 100644 index 0000000000000000000000000000000000000000..297c6b3ea113915f8c14eea33484234042c360d9 --- /dev/null +++ b/s390x-sclp-Simplify-get_sclp_device.patch @@ -0,0 +1,48 @@ +From 358b772c1289c1bf42dfe8c62b04b8a28d60ebf1 Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Mon, 21 Oct 2024 14:28:13 +0800 +Subject: [PATCH] s390x/sclp: Simplify get_sclp_device() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 3d9836e46dbe1e46c39fe76a62d3085a71ddbf7a + +get_sclp_device() scans the whole machine to find a TYPE_SCLP object. +Now that the SCLPDevice instance is available under the machine state, +use it to simplify the lookup. While at it, remove the inline to let +the compiler decide on how to optimize. + +Signed-off-by: Cédric Le Goater +Message-ID: <20240502131533.377719-4-clg@redhat.com> +Reviewed-by: Thomas Huth +Signed-off-by: Thomas Huth +Signed-off-by: Zhang Jiao +--- + hw/s390x/sclp.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index 893e71a41b..69bf04e23a 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -21,13 +21,14 @@ + #include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/ipl.h" + #include "hw/s390x/cpu-topology.h" ++#include "hw/s390x/s390-virtio-ccw.h" + +-static inline SCLPDevice *get_sclp_device(void) ++static SCLPDevice *get_sclp_device(void) + { + static SCLPDevice *sclp; + + if (!sclp) { +- sclp = SCLP(object_resolve_path_type("", TYPE_SCLP, NULL)); ++ sclp = S390_CCW_MACHINE(qdev_get_machine())->sclp; + } + return sclp; + } +-- +2.41.0.windows.1 + diff --git a/scsi-bugfix-fix-division-by-zero.patch b/scsi-bugfix-fix-division-by-zero.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d6d18bb9d106aed5ccd724667f19a187bf41f1a --- /dev/null +++ b/scsi-bugfix-fix-division-by-zero.patch @@ -0,0 +1,58 @@ +From f2837d186532fb82ed01dbe32bdcf9dda6b06258 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 16:34:05 +0800 +Subject: [PATCH] scsi: bugfix: fix division by zero + +Error of PRDM disk may cause divide by zero in +scsi_read_complete(), so add LOG and assert(). + +Signed-off-by: wangjian161 +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-generic.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 2417f0ad84..22efcd09a6 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -192,6 +192,10 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + (r->req.cmd.buf[1] & 0x01)) { + page = r->req.cmd.buf[2]; + if (page == 0xb0 && r->buflen >= 8) { ++ if (s->blocksize == 0) { ++ qemu_log("device blocksize is 0!\n"); ++ abort(); ++ } + uint8_t buf[16] = {}; + uint8_t buf_used = MIN(r->buflen, 16); + uint64_t max_transfer = calculate_max_transfer(s); +@@ -326,11 +330,23 @@ static void scsi_read_complete(void * opaque, int ret) + /* Snoop READ CAPACITY output to set the blocksize. */ + if (r->req.cmd.buf[0] == READ_CAPACITY_10 && + (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) { +- s->blocksize = ldl_be_p(&r->buf[4]); ++ int new_blocksize = ldl_be_p(&r->buf[4]); ++ if (s->blocksize != new_blocksize) { ++ qemu_log("device id=%s type=%d: blocksize %d change to %d\n", ++ s->qdev.id ? s->qdev.id : "null", s->type, ++ s->blocksize, new_blocksize); ++ } ++ s->blocksize = new_blocksize; + s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL; + } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 && + (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) { +- s->blocksize = ldl_be_p(&r->buf[8]); ++ int new_blocksize = ldl_be_p(&r->buf[8]); ++ if (s->blocksize != new_blocksize) { ++ qemu_log("device id=%s type=%d: blocksize %d change to %d\n", ++ s->qdev.id ? s->qdev.id : "null", s->type, ++ s->blocksize, new_blocksize); ++ } ++ s->blocksize = new_blocksize; + s->max_lba = ldq_be_p(&r->buf[0]); + } + +-- +2.27.0 + diff --git a/scsi-bus-Refactor-the-code-that-retries-requests.patch b/scsi-bus-Refactor-the-code-that-retries-requests.patch index eae42b854e2ba7818ff3c5e9812c7e3ed7f94ac9..0226238a63c083bbab32c94ccc1033fec91bb3f2 100644 --- a/scsi-bus-Refactor-the-code-that-retries-requests.patch +++ b/scsi-bus-Refactor-the-code-that-retries-requests.patch @@ -1,4 +1,4 @@ -From eb55d7c4f6e0adae2aab8bd750dccf9cd7a8c784 Mon Sep 17 00:00:00 2001 +From d69428c793ca7311c55d0efdaa82100247e35dcc Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:54 +0800 Subject: [PATCH] scsi-bus: Refactor the code that retries requests @@ -9,17 +9,18 @@ retry_request_cb() of scsi-disk in a future patch. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- hw/scsi/scsi-bus.c | 16 +++++++++++----- include/hw/scsi/scsi.h | 1 + 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index fdc3a0e4e0..9dc09b5f3e 100644 +index fc4b77fdb0..cecb02ae7e 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c -@@ -99,14 +99,10 @@ void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, - qbus_set_bus_hotplug_handler(BUS(bus), &error_abort); +@@ -144,14 +144,10 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, + qbus_set_bus_hotplug_handler(BUS(bus)); } -static void scsi_dma_restart_bh(void *opaque) @@ -34,8 +35,8 @@ index fdc3a0e4e0..9dc09b5f3e 100644 aio_context_acquire(blk_get_aio_context(s->conf.blk)); QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { scsi_req_ref(req); -@@ -128,6 +124,16 @@ static void scsi_dma_restart_bh(void *opaque) - aio_context_release(blk_get_aio_context(s->conf.blk)); +@@ -175,6 +171,16 @@ static void scsi_dma_restart_bh(void *opaque) + object_unref(OBJECT(s)); } +static void scsi_dma_restart_bh(void *opaque) @@ -52,17 +53,17 @@ index fdc3a0e4e0..9dc09b5f3e 100644 { /* No need to save a reference, because scsi_dma_restart_bh just diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h -index 426566a5c6..1231d30b35 100644 +index 3692ca82f3..6ec18bf12b 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h -@@ -184,6 +184,7 @@ void scsi_req_cancel_complete(SCSIRequest *req); +@@ -226,6 +226,7 @@ void scsi_req_cancel_complete(SCSIRequest *req); void scsi_req_cancel(SCSIRequest *req); void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier); void scsi_req_retry(SCSIRequest *req); +void scsi_retry_requests(SCSIDevice *s); + void scsi_device_drained_begin(SCSIDevice *sdev); + void scsi_device_drained_end(SCSIDevice *sdev); void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense); - void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense); - void scsi_device_report_change(SCSIDevice *dev, SCSISense sense); -- 2.27.0 diff --git a/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch b/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch new file mode 100644 index 0000000000000000000000000000000000000000..e6a960a3e62698688b1227110eccb7bef0b582f8 --- /dev/null +++ b/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch @@ -0,0 +1,81 @@ +From 60181b02c77f533105f904ab9e023bc22f65ad48 Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 29 Mar 2022 12:05:56 +0800 +Subject: [PATCH] scsi-bus: fix incorrect call for + blk_error_retry_reset_timeout() + +Fix commit 52115ca0("scsi-disk: Add support for retry on errors"). +Call Stack: + ... + scsi_read_data() + scsi_do_read(r, 0) + scsi_disk_req_check_error() + blk_error_retry_reset_timeout() + blk->retry_start_time = 0; + +It will cause IO hang when storage network disconnected. Before the +storage network recovered, the upper call stack will reset the +retry_start_time, and cause the next IO operation not returned immediately. + +Signed-off-by: Yan Wang +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-disk.c | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 97d8c5bb30..845a2a7d5d 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -258,10 +258,8 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } + } + +-static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) ++static bool scsi_disk_req_handle_error(SCSIDiskReq *r, int ret, bool acct_failed) + { +- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); +- + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + return true; +@@ -271,6 +269,17 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return scsi_handle_rw_error(r, ret, acct_failed); + } + ++ return false; ++} ++ ++static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) ++{ ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); ++ ++ if (r->req.io_canceled || ret < 0) { ++ return scsi_disk_req_handle_error(r, ret, acct_failed); ++ } ++ + blk_error_retry_reset_timeout(s->qdev.conf.blk); + return false; + } +@@ -423,7 +432,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + + assert (r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_handle_error(r, ret, false)) { + goto done; + } + +@@ -464,6 +473,9 @@ static void scsi_do_read_cb(void *opaque, int ret) + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); ++ if (!r->req.io_canceled) { ++ blk_error_retry_reset_timeout(s->qdev.conf.blk); ++ } + } + scsi_do_read(opaque, ret); + aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); +-- +2.27.0 + diff --git a/scsi-bus-fix-unmatched-object_unref.patch b/scsi-bus-fix-unmatched-object_unref.patch new file mode 100644 index 0000000000000000000000000000000000000000..006400c59b478b8ba5290cda83355f557053bce2 --- /dev/null +++ b/scsi-bus-fix-unmatched-object_unref.patch @@ -0,0 +1,43 @@ +From c2f55f210d4e021121865ea31037d2751188befd Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 1 Mar 2022 20:12:12 +0800 +Subject: [PATCH] scsi-bus: fix unmatched object_unref() + +Fix commit 391dd8f1("scsi-bus: Refactor the code that retries requests"), +which split scsi_dma_restart_bh(), but the object_unref() belongs to +scsi_dma_restart_bh(). +So, we should mv object_unref() from scsi_retry_requests() to +scsi_dma_restart_bh(). + +Signed-off-by: Yan Wang +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-bus.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index cecb02ae7e..7b60ac11f5 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -167,8 +167,6 @@ void scsi_retry_requests(SCSIDevice *s) + scsi_req_unref(req); + } + aio_context_release(blk_get_aio_context(s->conf.blk)); +- /* Drop the reference that was acquired in scsi_dma_restart_cb */ +- object_unref(OBJECT(s)); + } + + static void scsi_dma_restart_bh(void *opaque) +@@ -179,6 +177,9 @@ static void scsi_dma_restart_bh(void *opaque) + s->bh = NULL; + + scsi_retry_requests(s); ++ ++ /* Drop the reference that was acquired in scsi_dma_restart_cb */ ++ object_unref(OBJECT(s)); + } + + void scsi_req_retry(SCSIRequest *req) +-- +2.27.0 + diff --git a/scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch b/scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch new file mode 100644 index 0000000000000000000000000000000000000000..70ec66267d03822265d6cabde1a71b1c845e34f1 --- /dev/null +++ b/scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch @@ -0,0 +1,36 @@ +From aac11bd40369aa31c9b3efb701242cc307ce5645 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:42:47 +0800 +Subject: [PATCH] scsi: cdrom: Fix crash after remote cdrom detached + +There is a small window between the twice blk_is_available in +scsi_disk_emulate_command which would cause crash due to the later +assertion if the remote cdrom is detached in this window. + +So this patch replaces assertions with return to avoid qemu crash. + +Signed-off-by: wangjian161 +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-disk.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index f638854ebf..7f581efce8 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2021,7 +2021,10 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) + memset(outbuf, 0, r->buflen); + switch (req->cmd.buf[0]) { + case TEST_UNIT_READY: +- assert(blk_is_available(s->qdev.conf.blk)); ++ if (!blk_is_available(s->qdev.conf.blk)) { ++ scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); ++ return 0; ++ } + break; + case INQUIRY: + buflen = scsi_disk_emulate_inquiry(req, outbuf); +-- +2.27.0 + diff --git a/scsi-disk-Add-support-for-retry-on-errors.patch b/scsi-disk-Add-support-for-retry-on-errors.patch index e0bd91ec86fd13c70499567df0c18f9761e06c35..29fbc7e5e56ecf1f121f4488f63990afa1f8aba9 100644 --- a/scsi-disk-Add-support-for-retry-on-errors.patch +++ b/scsi-disk-Add-support-for-retry-on-errors.patch @@ -1,4 +1,4 @@ -From 34f1552a6d7e05f2f2146ebc6d50deb2de7e5fd4 Mon Sep 17 00:00:00 2001 +From 6100f909506025563ecec29b25f64cce75fc2353 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:55 +0800 Subject: [PATCH] scsi-disk: Add support for retry on errors @@ -8,15 +8,27 @@ handle these requests. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- hw/scsi/scsi-disk.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index cd90cd780e..93fdd913fe 100644 +index 6691f5edb8..97d8c5bb30 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c -@@ -184,6 +184,8 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) +@@ -249,6 +249,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + scsi_req_retry(&r->req); + return true; + ++ case BLOCK_ERROR_ACTION_RETRY: ++ scsi_req_retry(&r->req); ++ return true; ++ + default: + g_assert_not_reached(); + } +@@ -256,6 +260,8 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) { @@ -25,26 +37,15 @@ index cd90cd780e..93fdd913fe 100644 if (r->req.io_canceled) { scsi_req_cancel_complete(&r->req); return true; -@@ -193,6 +195,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) - return scsi_handle_rw_error(r, -ret, acct_failed); +@@ -265,6 +271,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return scsi_handle_rw_error(r, ret, acct_failed); } + blk_error_retry_reset_timeout(s->qdev.conf.blk); return false; } -@@ -480,6 +483,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) - } - } - -+ if (action == BLOCK_ERROR_ACTION_RETRY) { -+ scsi_req_retry(&r->req); -+ } -+ - blk_error_action(s->qdev.conf.blk, action, is_read, error); - if (action == BLOCK_ERROR_ACTION_IGNORE) { - scsi_req_complete(&r->req, 0); -@@ -2252,6 +2259,13 @@ static void scsi_disk_resize_cb(void *opaque) +@@ -2391,6 +2398,13 @@ static void scsi_disk_resize_cb(void *opaque) } } @@ -58,15 +59,17 @@ index cd90cd780e..93fdd913fe 100644 static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) { SCSIDiskState *s = opaque; -@@ -2300,10 +2314,12 @@ static const BlockDevOps scsi_disk_removable_block_ops = { +@@ -2440,12 +2454,14 @@ static const BlockDevOps scsi_disk_removable_block_ops = { .is_medium_locked = scsi_cd_is_medium_locked, - - .resize_cb = scsi_disk_resize_cb, + .is_tray_open = scsi_cd_is_tray_open, + .resize_cb = scsi_disk_resize_cb, + .retry_request_cb = scsi_disk_retry_request, }; static const BlockDevOps scsi_disk_block_ops = { - .resize_cb = scsi_disk_resize_cb, + .drained_begin = scsi_disk_drained_begin, + .drained_end = scsi_disk_drained_end, + .resize_cb = scsi_disk_resize_cb, + .retry_request_cb = scsi_disk_retry_request, }; diff --git a/scsi-disk-define-props-in-scsi_block_disk-to-avoid-memleaks.patch b/scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch similarity index 70% rename from scsi-disk-define-props-in-scsi_block_disk-to-avoid-memleaks.patch rename to scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch index a180aa1a08062f11cd313f5019d35933cd55747c..36e16d60cd4f5c37103032ccc77144d7b6d90d38 100644 --- a/scsi-disk-define-props-in-scsi_block_disk-to-avoid-memleaks.patch +++ b/scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch @@ -1,29 +1,30 @@ -From 79da8e2e18610ae22a3bd640c117ba56b911038d Mon Sep 17 00:00:00 2001 +From 85307e997e4ee7a50a87ac2ac218911c0058d8e3 Mon Sep 17 00:00:00 2001 From: Pan Nengyuan Date: Mon, 13 Jan 2020 15:53:32 +0800 -Subject: [PATCH] scsi-disk: define props in scsi_block_disk to avoid - memleaks +Subject: [PATCH] scsi-disk: define props in scsi_block_disk to avoid memleaks scsi_block_realize() use scsi_realize() to init some props, but -these props is not defined in scsi_block_disk_properties, so they will +these props is not defined in scsi_block_properties, so they will not be freed. This patch defines these prop in scsi_block_disk_properties to avoid memleaks. Signed-off-by: Pan Nengyuan +Signed-off-by: Yan Wang +Signed-off-by: shaodenghui --- hw/scsi/scsi-disk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index e7e865ab..233afb4a 100644 +index 6691f5edb8..f638854ebf 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c -@@ -3026,9 +3026,7 @@ static const TypeInfo scsi_cd_info = { +@@ -3241,9 +3241,7 @@ static const TypeInfo scsi_cd_info = { #ifdef __linux__ static Property scsi_block_properties[] = { -- DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), \ +- DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), - DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk), - DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false), + DEFINE_SCSI_DISK_PROPERTIES(), @@ -31,6 +32,5 @@ index e7e865ab..233afb4a 100644 DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size, DEFAULT_MAX_UNMAP_SIZE), -- -2.18.1 - +2.27.0 diff --git a/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch b/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch deleted file mode 100644 index 5d20a9f009c9bd52f9eef578344c5b0012ee8942..0000000000000000000000000000000000000000 --- a/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 051c9b3cbcb4beb42a6ed017c2146ec3e7a754fb Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Wed, 14 Aug 2019 17:35:21 +0530 -Subject: [PATCH] scsi: lsi: exit infinite loop while executing script - (CVE-2019-12068) - -When executing script in lsi_execute_script(), the LSI scsi adapter -emulator advances 's->dsp' index to read next opcode. This can lead -to an infinite loop if the next opcode is empty. Move the existing -loop exit after 10k iterations so that it covers no-op opcodes as -well. - -Reported-by: Bugs SysSec -Signed-off-by: Paolo Bonzini -Signed-off-by: Prasad J Pandit -Signed-off-by: Paolo Bonzini -(cherry picked from commit de594e47659029316bbf9391efb79da0a1a08e08) -Signed-off-by: Michael Roth ---- - hw/scsi/lsi53c895a.c | 41 +++++++++++++++++++++++++++-------------- - 1 file changed, 27 insertions(+), 14 deletions(-) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index 10468c1ec1..72f7b59ab5 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -185,6 +185,9 @@ static const char *names[] = { - /* Flag set if this is a tagged command. */ - #define LSI_TAG_VALID (1 << 16) - -+/* Maximum instructions to process. */ -+#define LSI_MAX_INSN 10000 -+ - typedef struct lsi_request { - SCSIRequest *req; - uint32_t tag; -@@ -1132,7 +1135,21 @@ static void lsi_execute_script(LSIState *s) - - s->istat1 |= LSI_ISTAT1_SRUN; - again: -- insn_processed++; -+ if (++insn_processed > LSI_MAX_INSN) { -+ /* Some windows drivers make the device spin waiting for a memory -+ location to change. If we have been executed a lot of code then -+ assume this is the case and force an unexpected device disconnect. -+ This is apparently sufficient to beat the drivers into submission. -+ */ -+ if (!(s->sien0 & LSI_SIST0_UDC)) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "lsi_scsi: inf. loop with UDC masked"); -+ } -+ lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); -+ lsi_disconnect(s); -+ trace_lsi_execute_script_stop(); -+ return; -+ } - insn = read_dword(s, s->dsp); - if (!insn) { - /* If we receive an empty opcode increment the DSP by 4 bytes -@@ -1569,19 +1586,7 @@ again: - } - } - } -- if (insn_processed > 10000 && s->waiting == LSI_NOWAIT) { -- /* Some windows drivers make the device spin waiting for a memory -- location to change. If we have been executed a lot of code then -- assume this is the case and force an unexpected device disconnect. -- This is apparently sufficient to beat the drivers into submission. -- */ -- if (!(s->sien0 & LSI_SIST0_UDC)) { -- qemu_log_mask(LOG_GUEST_ERROR, -- "lsi_scsi: inf. loop with UDC masked"); -- } -- lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); -- lsi_disconnect(s); -- } else if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { -+ if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { - if (s->dcntl & LSI_DCNTL_SSM) { - lsi_script_dma_interrupt(s, LSI_DSTAT_SSI); - } else { -@@ -1969,6 +1974,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) - case 0x2f: /* DSP[24:31] */ - s->dsp &= 0x00ffffff; - s->dsp |= val << 24; -+ /* -+ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one -+ * instruction. Is this correct? -+ */ - if ((s->dmode & LSI_DMODE_MAN) == 0 - && (s->istat1 & LSI_ISTAT1_SRUN) == 0) - lsi_execute_script(s); -@@ -1987,6 +1996,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) - break; - case 0x3b: /* DCNTL */ - s->dcntl = val & ~(LSI_DCNTL_PFF | LSI_DCNTL_STD); -+ /* -+ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one -+ * instruction. Is this correct? -+ */ - if ((val & LSI_DCNTL_STD) && (s->istat1 & LSI_ISTAT1_SRUN) == 0) - lsi_execute_script(s); - break; --- -2.23.0 diff --git a/sd-sdhci-assert-data_count-is-within-fifo_buffer.patch b/sd-sdhci-assert-data_count-is-within-fifo_buffer.patch deleted file mode 100644 index e38bfaa471d280a7de334e81906747938ab57b7c..0000000000000000000000000000000000000000 --- a/sd-sdhci-assert-data_count-is-within-fifo_buffer.patch +++ /dev/null @@ -1,65 +0,0 @@ -From e8d2655821caa2b8efce429c0036a93342b8383d Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Mon, 8 Feb 2021 17:14:21 +0800 -Subject: [PATCH] sd: sdhci: assert data_count is within fifo_buffer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2020-17380 - -While doing multi block SDMA, transfer block size may exceed -the 's->fifo_buffer[s->buf_maxsz]' size. It may leave the -current element pointer 's->data_count' pointing out of bounds. -Leading the subsequent DMA r/w operation to OOB access issue. -Assert that 's->data_count' is within fifo_buffer. - - -> https://ruhr-uni-bochum.sciebo.de/s/NNWP2GfwzYKeKwE?path=%2Fsdhci_oob_write1 - ==1459837==ERROR: AddressSanitizer: heap-buffer-overflow - WRITE of size 54722048 at 0x61500001e280 thread T3 - #0 __interceptor_memcpy (/lib64/libasan.so.6+0x3a71d) - #1 flatview_read_continue ../exec.c:3245 - #2 flatview_read ../exec.c:3278 - #3 address_space_read_full ../exec.c:3291 - #4 address_space_rw ../exec.c:3319 - #5 dma_memory_rw_relaxed ../include/sysemu/dma.h:87 - #6 dma_memory_rw ../include/sysemu/dma.h:110 - #7 dma_memory_read ../include/sysemu/dma.h:116 - #8 sdhci_sdma_transfer_multi_blocks ../hw/sd/sdhci.c:629 - #9 sdhci_write ../hw/sd/sdhci.c:1097 - #10 memory_region_write_accessor ../softmmu/memory.c:483 - ... - -Reported-by: Ruhr-University -Suggested-by: Philippe Mathieu-Daudé -Signed-off-by: Prasad J Pandit - -patch link: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg01175.html -Signed-off-by: Jiajie Li ---- - hw/sd/sdhci.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c -index 7b80b1d93f..e51573fe3c 100644 ---- a/hw/sd/sdhci.c -+++ b/hw/sd/sdhci.c -@@ -613,6 +613,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) - s->blkcnt--; - } - } -+ assert(s->data_count <= s->buf_maxsz && s->data_count > begin); - dma_memory_write(s->dma_as, s->sdmasysad, - &s->fifo_buffer[begin], s->data_count - begin); - s->sdmasysad += s->data_count - begin; -@@ -635,6 +636,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) - s->data_count = block_size; - boundary_count -= block_size - begin; - } -+ assert(s->data_count <= s->buf_maxsz && s->data_count > begin); - dma_memory_read(s->dma_as, s->sdmasysad, - &s->fifo_buffer[begin], s->data_count - begin); - s->sdmasysad += s->data_count - begin; --- -2.27.0 - diff --git a/seqlock-fix-seqlock_write_unlock_impl-function.patch b/seqlock-fix-seqlock_write_unlock_impl-function.patch deleted file mode 100644 index f7f8c7cf6e044a663886db1b89f6c8bda36e2d25..0000000000000000000000000000000000000000 --- a/seqlock-fix-seqlock_write_unlock_impl-function.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 96e00e040cd8ae23cebf183cf3a8dc9cf1f6149d Mon Sep 17 00:00:00 2001 -From: Luc Michel -Date: Wed, 29 Jan 2020 15:49:48 +0100 -Subject: [PATCH] seqlock: fix seqlock_write_unlock_impl function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The seqlock write unlock function was incorrectly calling -seqlock_write_begin() instead of seqlock_write_end(), and was releasing -the lock before incrementing the sequence. This could lead to a race -condition and a corrupted sequence number becoming odd even though the -lock is not held. - -Signed-off-by: Luc Michel -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20200129144948.2161551-1-luc.michel@greensocs.com> -Fixes: 988fcafc73 ("seqlock: add QemuLockable support", 2018-08-23) -Signed-off-by: Paolo Bonzini ---- - include/qemu/seqlock.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h -index fd408b7ec5..8b6b4ee4bb 100644 ---- a/include/qemu/seqlock.h -+++ b/include/qemu/seqlock.h -@@ -55,11 +55,11 @@ static inline void seqlock_write_lock_impl(QemuSeqLock *sl, QemuLockable *lock) - #define seqlock_write_lock(sl, lock) \ - seqlock_write_lock_impl(sl, QEMU_MAKE_LOCKABLE(lock)) - --/* Lock out other writers and update the count. */ -+/* Update the count and release the lock. */ - static inline void seqlock_write_unlock_impl(QemuSeqLock *sl, QemuLockable *lock) - { -+ seqlock_write_end(sl); - qemu_lockable_unlock(lock); -- seqlock_write_begin(sl); - } - #define seqlock_write_unlock(sl, lock) \ - seqlock_write_unlock_impl(sl, QEMU_MAKE_LOCKABLE(lock)) --- -2.27.0 - diff --git a/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch b/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ba1e54d8bafed805bdc0212dcb6add9bf99fa87 --- /dev/null +++ b/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch @@ -0,0 +1,196 @@ +From c4829aa6fce007c995b21cfbd86de0473263c19a Mon Sep 17 00:00:00 2001 +From: Dongxu Sun +Date: Sat, 30 Mar 2024 12:49:05 +0800 +Subject: [PATCH] shadow_dev: introduce shadow dev for virtio-net device + +for virtio net devices, create the shadow device for vlpi +bypass inject supported. + +Signed-off-by: Wang Haibin +Signed-off-by: Yu Zenghui +Signed-off-by: Chen Qun +Signed-off-by: KunKun Jiang +Signed-off-by: Dongxu Sun +Signed-off-by: Yuan Zhang +--- + hw/virtio/virtio-pci.c | 32 ++++++++++++++++++++++++++ + include/sysemu/kvm.h | 5 +++++ + linux-headers/linux/kvm.h | 13 +++++++++++ + target/arm/kvm.c | 47 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 97 insertions(+) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 134a8eaef6..f8adb0520a 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -922,18 +922,44 @@ undo: + } + return ret; + } ++ ++#ifdef __aarch64__ ++int __attribute__((weak)) kvm_create_shadow_device(PCIDevice *dev) ++{ ++ return 0; ++} ++ ++int __attribute__((weak)) kvm_delete_shadow_device(PCIDevice *dev) ++{ ++ return 0; ++} ++#endif ++ + static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + int ret = 0; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ kvm_create_shadow_device(&proxy->pci_dev); ++ } ++#endif ++ + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; + } + ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } ++ ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net") && ret != 0) { ++ kvm_delete_shadow_device(&proxy->pci_dev); ++ } ++#endif ++ + return ret; + } + +@@ -976,6 +1002,12 @@ static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) + } + kvm_virtio_pci_vector_release_one(proxy, queue_no); + } ++ ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ kvm_delete_shadow_device(&proxy->pci_dev); ++ } ++#endif + } + + static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index d614878164..b46d6203b4 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -538,4 +538,9 @@ bool kvm_arch_cpu_check_are_resettable(void); + bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); ++ ++#ifdef __aarch64__ ++int kvm_create_shadow_device(PCIDevice *dev); ++int kvm_delete_shadow_device(PCIDevice *dev); ++#endif + #endif +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 549fea3a97..56f6b2583f 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1198,6 +1198,8 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 + #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + ++#define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 ++ + #ifdef KVM_CAP_IRQ_ROUTING + + struct kvm_irq_routing_irqchip { +@@ -1524,6 +1526,17 @@ struct kvm_s390_ucas_mapping { + #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) + #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) + #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) ++ ++#ifdef __aarch64__ ++struct kvm_master_dev_info ++{ ++ __u32 nvectors; /* number of msi vectors */ ++ struct kvm_msi msi[0]; ++}; ++#define KVM_CREATE_SHADOW_DEV _IOW(KVMIO, 0xf0, struct kvm_master_dev_info) ++#define KVM_DEL_SHADOW_DEV _IOW(KVMIO, 0xf1, __u32) ++#endif ++ + /* Available with KVM_CAP_PIT_STATE2 */ + #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) + #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 7903e2ddde..f59f4f81b2 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -26,6 +26,8 @@ + #include "trace.h" + #include "internals.h" + #include "hw/pci/pci.h" ++#include "hw/pci/msi.h" ++#include "hw/pci/msix.h" + #include "exec/memattrs.h" + #include "exec/address-spaces.h" + #include "hw/boards.h" +@@ -1053,6 +1055,51 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + return 0; + } + ++int kvm_create_shadow_device(PCIDevice *dev) ++{ ++ KVMState *s = kvm_state; ++ struct kvm_master_dev_info *mdi; ++ MSIMessage msg; ++ uint32_t vector, nvectors = msix_nr_vectors_allocated(dev); ++ uint32_t request_id; ++ int ret; ++ ++ if (!kvm_vm_check_extension(s, KVM_CAP_ARM_VIRT_MSI_BYPASS) || !nvectors) { ++ return 0; ++ } ++ ++ mdi = g_malloc0(sizeof(uint32_t) + sizeof(struct kvm_msi) * nvectors); ++ mdi->nvectors = nvectors; ++ request_id = pci_requester_id(dev); ++ ++ for (vector = 0; vector < nvectors; vector++) { ++ msg = msix_get_message(dev, vector); ++ mdi->msi[vector].address_lo = extract64(msg.address, 0, 32); ++ mdi->msi[vector].address_hi = extract64(msg.address, 32, 32); ++ mdi->msi[vector].data = le32_to_cpu(msg.data); ++ mdi->msi[vector].flags = KVM_MSI_VALID_DEVID; ++ mdi->msi[vector].devid = request_id; ++ memset(mdi->msi[vector].pad, 0, sizeof(mdi->msi[vector].pad)); ++ } ++ ++ ret = kvm_vm_ioctl(s, KVM_CREATE_SHADOW_DEV, mdi); ++ g_free(mdi); ++ return ret; ++} ++ ++int kvm_delete_shadow_device(PCIDevice *dev) ++{ ++ KVMState *s = kvm_state; ++ uint32_t request_id, nvectors = msix_nr_vectors_allocated(dev); ++ ++ if (!kvm_vm_check_extension(s, KVM_CAP_ARM_VIRT_MSI_BYPASS) || !nvectors) { ++ return 0; ++ } ++ ++ request_id = pci_requester_id(dev); ++ return kvm_vm_ioctl(s, KVM_DEL_SHADOW_DEV, &request_id); ++} ++ + int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) + { +-- +2.27.0 + diff --git a/slirp-check-pkt_len-before-reading-protocol-header.patch b/slirp-check-pkt_len-before-reading-protocol-header.patch deleted file mode 100644 index 506e31e1cb809bba769e857177c4e2ad70f4293f..0000000000000000000000000000000000000000 --- a/slirp-check-pkt_len-before-reading-protocol-header.patch +++ /dev/null @@ -1,61 +0,0 @@ -From c2df0d478b2605da10363ab57825cdbc34caa680 Mon Sep 17 00:00:00 2001 -From: Alex Chen -Date: Mon, 14 Dec 2020 15:39:46 +0800 -Subject: [PATCH] slirp: check pkt_len before reading protocol header -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -While processing ARP/NCSI packets in 'arp_input' or 'ncsi_input' -routines, ensure that pkt_len is large enough to accommodate the -respective protocol headers, lest it should do an OOB access. -Add check to avoid it. - -CVE-2020-29129 CVE-2020-29130 - QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets - -> https://www.openwall.com/lists/oss-security/2020/11/27/1 - -Reported-by: Qiuhao Li -Signed-off-by: Prasad J Pandit -Message-Id: <20201126135706.273950-1-ppandit@redhat.com> -Reviewed-by: Marc-André Lureau -(cherry-picked from 2e1dcbc0) -Signed-off-by: Alex Chen ---- - slirp/src/ncsi.c | 4 ++++ - slirp/src/slirp.c | 4 ++++ - 2 files changed, 8 insertions(+) - -diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c -index 6864b735..251c0d2b 100644 ---- a/slirp/src/ncsi.c -+++ b/slirp/src/ncsi.c -@@ -147,6 +147,10 @@ void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) - uint32_t checksum; - uint32_t *pchecksum; - -+ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { -+ return; /* packet too short */ -+ } -+ - memset(ncsi_reply, 0, sizeof(ncsi_reply)); - - memset(reh->h_dest, 0xff, ETH_ALEN); -diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c -index b0194cb3..86b0f52d 100644 ---- a/slirp/src/slirp.c -+++ b/slirp/src/slirp.c -@@ -700,6 +700,10 @@ static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) - return; - } - -+ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { -+ return; /* packet too short */ -+ } -+ - ar_op = ntohs(ah->ar_op); - switch (ar_op) { - case ARPOP_REQUEST: --- -2.23.0 - diff --git a/slirp-tftp-restrict-relative-path-access.patch b/slirp-tftp-restrict-relative-path-access.patch deleted file mode 100644 index b7f09462525437c9048b4ab249b6e4208adda4ef..0000000000000000000000000000000000000000 --- a/slirp-tftp-restrict-relative-path-access.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 2fc07f4ce31a2cc9973cfb1c20897c6a4babd8b8 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Fri, 15 May 2020 16:45:28 +0800 -Subject: [PATCH] slirp: tftp: restrict relative path access - -tftp restricts relative or directory path access on Linux systems. -Apply same restrictions on Windows systems too. It helps to avoid -directory traversal issue. - -Fixes: https://bugs.launchpad.net/qemu/+bug/1812451Reported-by: default avatarPeter Maydell -Signed-off-by: default avatarPrasad J Pandit -Reviewed-by: Samuel Thibault's avatarSamuel Thibault -Message-Id: <20200113121431.156708-1-ppandit@redhat.com> - -diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c -index 093c2e06..2b4176cc 100644 ---- a/slirp/src/tftp.c -+++ b/slirp/src/tftp.c -@@ -344,8 +344,13 @@ static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, - k += 6; /* skipping octet */ - - /* do sanity checks on the filename */ -- if (!strncmp(req_fname, "../", 3) || -- req_fname[strlen(req_fname) - 1] == '/' || strstr(req_fname, "/../")) { -+ if ( -+#ifdef G_OS_WIN32 -+ strstr(req_fname, "..\\") || -+ req_fname[strlen(req_fname) - 1] == '\\' || -+#endif -+ strstr(req_fname, "../") || -+ req_fname[strlen(req_fname) -1] == '/') { - tftp_send_error(spt, 2, "Access violation", tp); - return; - } --- -2.23.0 - diff --git a/slirp-use-correct-size-while-emulating-IRC-commands.patch b/slirp-use-correct-size-while-emulating-IRC-commands.patch deleted file mode 100644 index 1b4039e1da3c0bcd08b97f4c61983ef3adac3823..0000000000000000000000000000000000000000 --- a/slirp-use-correct-size-while-emulating-IRC-commands.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 011880f527ff317a40769ea8673a6353e5db53ac Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 14 Apr 2020 18:23:23 +0800 -Subject: [PATCH] slirp: use correct size while emulating IRC commands - -While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size -'m->m_size' to write DCC commands via snprintf(3). This may -lead to OOB write access, because 'bptr' points somewhere in -the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) -size to avoid OOB access. -Reported-by: default avatarVishnu Dev TJ -Signed-off-by: default avatarPrasad J Pandit -Reviewed-by: Samuel Thibault's avatarSamuel Thibault -Message-Id: <20200109094228.79764-2-ppandit@redhat.com> ---- - slirp/src/tcp_subr.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 9c94c03a..2a15b16a 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), 1); - } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, -@@ -789,7 +790,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - } - m->m_len = bptr - m->m_data; /* Adjust length */ - m->m_len += -- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, -+ snprintf(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, -@@ -800,7 +802,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - } - m->m_len = bptr - m->m_data; /* Adjust length */ - m->m_len += -- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, -+ snprintf(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } --- -2.23.0 diff --git a/slirp-use-correct-size-while-emulating-commands.patch b/slirp-use-correct-size-while-emulating-commands.patch deleted file mode 100644 index 25f64e2738e2ae3cbff541719b726dff963007d2..0000000000000000000000000000000000000000 --- a/slirp-use-correct-size-while-emulating-commands.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 662aa4f1d168b32335a4dc40782e816329afcac0 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 14 Apr 2020 18:36:12 +0800 -Subject: [PATCH] slirp: use correct size while emulating commands - -While emulating services in tcp_emu(), it uses 'mbuf' size -'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) -size to avoid possible OOB access. -Signed-off-by: default avatarPrasad J Pandit -Signed-off-by: Samuel Thibault's avatarSamuel Thibault -Message-Id: <20200109094228.79764-3-ppandit@redhat.com> ---- - slirp/src/tcp_subr.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 2a15b16a..019b637a 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, - n5, n6, x == 7 ? buff : ""); - return 1; -@@ -732,7 +732,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - - m->m_len = bptr - m->m_data; /* Adjust length */ - m->m_len += -- snprintf(bptr, m->m_size - m->m_len, -+ snprintf(bptr, M_FREEROOM(m), - "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", - n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - -@@ -759,7 +759,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) - m->m_len = -- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; -+ snprintf(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)) + 1; - return 1; - - case EMU_IRC: --- -2.23.0 diff --git a/sm501-Clean-up-local-variables-in-sm501_2d_operation.patch b/sm501-Clean-up-local-variables-in-sm501_2d_operation.patch deleted file mode 100644 index 66e54cdd42053d31cfa05ffa9ee15fca183254ea..0000000000000000000000000000000000000000 --- a/sm501-Clean-up-local-variables-in-sm501_2d_operation.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 6186d3de416825e3a737dd3da31da475f50d66d0 Mon Sep 17 00:00:00 2001 -From: BALATON Zoltan -Date: Thu, 21 May 2020 21:39:44 +0200 -Subject: [PATCH] sm501: Clean up local variables in sm501_2d_operation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Make variables local to the block they are used in to make it clearer -which operation they are needed for. - -Signed-off-by: BALATON Zoltan -Reviewed-by: Philippe Mathieu-Daudé -Message-id: ae59f8138afe7f6a5a4a82539d0f61496a906b06.1590089984.git.balaton@eik.bme.hu -Signed-off-by: Gerd Hoffmann ---- - hw/display/sm501.c | 31 ++++++++++++++++--------------- - 1 file changed, 16 insertions(+), 15 deletions(-) - -diff --git a/hw/display/sm501.c b/hw/display/sm501.c -index f3d11d0b23..98b3b97f7b 100644 ---- a/hw/display/sm501.c -+++ b/hw/display/sm501.c -@@ -699,28 +699,19 @@ static inline void hwc_invalidate(SM501State *s, int crt) - - static void sm501_2d_operation(SM501State *s) - { -- /* obtain operation parameters */ - int cmd = (s->twoD_control >> 16) & 0x1F; - int rtl = s->twoD_control & BIT(27); -- int src_x = (s->twoD_source >> 16) & 0x01FFF; -- int src_y = s->twoD_source & 0xFFFF; -- int dst_x = (s->twoD_destination >> 16) & 0x01FFF; -- int dst_y = s->twoD_destination & 0xFFFF; -- int width = (s->twoD_dimension >> 16) & 0x1FFF; -- int height = s->twoD_dimension & 0xFFFF; -- uint32_t color = s->twoD_foreground; - int format = (s->twoD_stretch >> 20) & 0x3; - int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */ - /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ - int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; - int rop = s->twoD_control & 0xFF; -- uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; -+ int dst_x = (s->twoD_destination >> 16) & 0x01FFF; -+ int dst_y = s->twoD_destination & 0xFFFF; -+ int width = (s->twoD_dimension >> 16) & 0x1FFF; -+ int height = s->twoD_dimension & 0xFFFF; - uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF; -- -- /* get frame buffer info */ -- uint8_t *src = s->local_mem + src_base; - uint8_t *dst = s->local_mem + dst_base; -- int src_pitch = s->twoD_pitch & 0x1FFF; - int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; - int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; - int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); -@@ -758,6 +749,13 @@ static void sm501_2d_operation(SM501State *s) - - switch (cmd) { - case 0x00: /* copy area */ -+ { -+ int src_x = (s->twoD_source >> 16) & 0x01FFF; -+ int src_y = s->twoD_source & 0xFFFF; -+ uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; -+ uint8_t *src = s->local_mem + src_base; -+ int src_pitch = s->twoD_pitch & 0x1FFF; -+ - #define COPY_AREA(_bpp, _pixel_type, rtl) { \ - int y, x, index_d, index_s; \ - for (y = 0; y < height; y++) { \ -@@ -793,8 +791,11 @@ static void sm501_2d_operation(SM501State *s) - break; - } - break; -- -+ } - case 0x01: /* fill rectangle */ -+ { -+ uint32_t color = s->twoD_foreground; -+ - #define FILL_RECT(_bpp, _pixel_type) { \ - int y, x; \ - for (y = 0; y < height; y++) { \ -@@ -819,7 +820,7 @@ static void sm501_2d_operation(SM501State *s) - break; - } - break; -- -+ } - default: - qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", - cmd); --- -2.23.0 - diff --git a/sm501-Convert-printf-abort-to-qemu_log_mask.patch b/sm501-Convert-printf-abort-to-qemu_log_mask.patch deleted file mode 100644 index 14a530bd78510f91daf5f78e9b9103f1b729f9cf..0000000000000000000000000000000000000000 --- a/sm501-Convert-printf-abort-to-qemu_log_mask.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 428e3a78ddf1de3dfb914043d6a8668f73ef8bb3 Mon Sep 17 00:00:00 2001 -From: BALATON Zoltan -Date: Thu, 21 May 2020 21:39:44 +0200 -Subject: [PATCH] sm501: Convert printf + abort to qemu_log_mask -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Some places already use qemu_log_mask() to log unimplemented features -or errors but some others have printf() then abort(). Convert these to -qemu_log_mask() and avoid aborting to prevent guests to easily cause -denial of service. - -Signed-off-by: BALATON Zoltan -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 305af87f59d81e92f2aaff09eb8a3603b8baa322.1590089984.git.balaton@eik.bme.hu -Signed-off-by: Gerd Hoffmann ---- - hw/display/sm501.c | 57 ++++++++++++++++++++++------------------------ - 1 file changed, 27 insertions(+), 30 deletions(-) - -diff --git a/hw/display/sm501.c b/hw/display/sm501.c -index 5918f59b2b..aa4b202a48 100644 ---- a/hw/display/sm501.c -+++ b/hw/display/sm501.c -@@ -727,8 +727,8 @@ static void sm501_2d_operation(SM501State *s) - int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); - - if (addressing != 0x0) { -- printf("%s: only XY addressing is supported.\n", __func__); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); -+ return; - } - - if (rop_mode == 0) { -@@ -754,8 +754,8 @@ static void sm501_2d_operation(SM501State *s) - - if ((s->twoD_source_base & 0x08000000) || - (s->twoD_destination_base & 0x08000000)) { -- printf("%s: only local memory is supported.\n", __func__); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); -+ return; - } - - switch (operation) { -@@ -823,9 +823,9 @@ static void sm501_2d_operation(SM501State *s) - break; - - default: -- printf("non-implemented SM501 2D operation. %d\n", operation); -- abort(); -- break; -+ qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", -+ operation); -+ return; - } - - if (dst_base >= get_fb_addr(s, crt) && -@@ -892,9 +892,8 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr, - break; - - default: -- printf("sm501 system config : not implemented register read." -- " addr=%x\n", (int)addr); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config" -+ "register read. addr=%" HWADDR_PRIx "\n", addr); - } - - return ret; -@@ -948,15 +947,15 @@ static void sm501_system_config_write(void *opaque, hwaddr addr, - break; - case SM501_ENDIAN_CONTROL: - if (value & 0x00000001) { -- printf("sm501 system config : big endian mode not implemented.\n"); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: system config big endian mode not" -+ " implemented.\n"); - } - break; - - default: -- printf("sm501 system config : not implemented register write." -- " addr=%x, val=%x\n", (int)addr, (uint32_t)value); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config" -+ "register write. addr=%" HWADDR_PRIx -+ ", val=%" PRIx64 "\n", addr, value); - } - } - -@@ -1207,9 +1206,8 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr, - break; - - default: -- printf("sm501 disp ctrl : not implemented register read." -- " addr=%x\n", (int)addr); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " -+ "read. addr=%" HWADDR_PRIx "\n", addr); - } - - return ret; -@@ -1345,9 +1343,9 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr, - break; - - default: -- printf("sm501 disp ctrl : not implemented register write." -- " addr=%x, val=%x\n", (int)addr, (unsigned)value); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " -+ "write. addr=%" HWADDR_PRIx -+ ", val=%" PRIx64 "\n", addr, value); - } - } - -@@ -1433,9 +1431,8 @@ static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr, - ret = 0; /* Should return interrupt status */ - break; - default: -- printf("sm501 disp ctrl : not implemented register read." -- " addr=%x\n", (int)addr); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " -+ "read. addr=%" HWADDR_PRIx "\n", addr); - } - - return ret; -@@ -1520,9 +1517,9 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr, - /* ignored, writing 0 should clear interrupt status */ - break; - default: -- printf("sm501 2d engine : not implemented register write." -- " addr=%x, val=%x\n", (int)addr, (unsigned)value); -- abort(); -+ qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2d engine register " -+ "write. addr=%" HWADDR_PRIx -+ ", val=%" PRIx64 "\n", addr, value); - } - } - -@@ -1670,9 +1667,9 @@ static void sm501_update_display(void *opaque) - draw_line = draw_line32_funcs[dst_depth_index]; - break; - default: -- printf("sm501 update display : invalid control register value.\n"); -- abort(); -- break; -+ qemu_log_mask(LOG_GUEST_ERROR, "sm501: update display" -+ "invalid control register value.\n"); -+ return; - } - - /* set up to draw hardware cursor */ --- -2.23.0 - diff --git a/sm501-Replace-hand-written-implementation-with-pixma.patch b/sm501-Replace-hand-written-implementation-with-pixma.patch deleted file mode 100644 index 42fa23aa934b29c19921f869530b93f1f24e1def..0000000000000000000000000000000000000000 --- a/sm501-Replace-hand-written-implementation-with-pixma.patch +++ /dev/null @@ -1,261 +0,0 @@ -From bbbf2c2f4201eb84a5bcd07a92399fe166d682e9 Mon Sep 17 00:00:00 2001 -From: BALATON Zoltan -Date: Thu, 21 May 2020 21:39:44 +0200 -Subject: [PATCH] sm501: Replace hand written implementation with pixman where - possible - -Besides being faster this should also prevent malicious guests to -abuse 2D engine to overwrite data or cause a crash. - -Signed-off-by: BALATON Zoltan -Message-id: 58666389b6cae256e4e972a32c05cf8aa51bffc0.1590089984.git.balaton@eik.bme.hu -Signed-off-by: Gerd Hoffmann ---- - hw/display/sm501.c | 207 ++++++++++++++++++++++++++------------------- - 1 file changed, 119 insertions(+), 88 deletions(-) - -diff --git a/hw/display/sm501.c b/hw/display/sm501.c -index 98b3b97f7b..7dc4bb18b7 100644 ---- a/hw/display/sm501.c -+++ b/hw/display/sm501.c -@@ -706,13 +706,12 @@ static void sm501_2d_operation(SM501State *s) - /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ - int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; - int rop = s->twoD_control & 0xFF; -- int dst_x = (s->twoD_destination >> 16) & 0x01FFF; -- int dst_y = s->twoD_destination & 0xFFFF; -- int width = (s->twoD_dimension >> 16) & 0x1FFF; -- int height = s->twoD_dimension & 0xFFFF; -+ unsigned int dst_x = (s->twoD_destination >> 16) & 0x01FFF; -+ unsigned int dst_y = s->twoD_destination & 0xFFFF; -+ unsigned int width = (s->twoD_dimension >> 16) & 0x1FFF; -+ unsigned int height = s->twoD_dimension & 0xFFFF; - uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF; -- uint8_t *dst = s->local_mem + dst_base; -- int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; -+ unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; - int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; - int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); - -@@ -721,104 +720,136 @@ static void sm501_2d_operation(SM501State *s) - return; - } - -- if (rop_mode == 0) { -- if (rop != 0xcc) { -- /* Anything other than plain copies are not supported */ -- qemu_log_mask(LOG_UNIMP, "sm501: rop3 mode with rop %x is not " -- "supported.\n", rop); -- } -- } else { -- if (rop2_source_is_pattern && rop != 0x5) { -- /* For pattern source, we support only inverse dest */ -- qemu_log_mask(LOG_UNIMP, "sm501: rop2 source being the pattern and " -- "rop %x is not supported.\n", rop); -- } else { -- if (rop != 0x5 && rop != 0xc) { -- /* Anything other than plain copies or inverse dest is not -- * supported */ -- qemu_log_mask(LOG_UNIMP, "sm501: rop mode %x is not " -- "supported.\n", rop); -- } -- } -- } -- - if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) { - qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); - return; - } - -+ if (!dst_pitch) { -+ qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero dest pitch.\n"); -+ return; -+ } -+ -+ if (!width || !height) { -+ qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero size 2D op.\n"); -+ return; -+ } -+ -+ if (rtl) { -+ dst_x -= width - 1; -+ dst_y -= height - 1; -+ } -+ -+ if (dst_base >= get_local_mem_size(s) || dst_base + -+ (dst_x + width + (dst_y + height) * (dst_pitch + width)) * -+ (1 << format) >= get_local_mem_size(s)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "sm501: 2D op dest is outside vram.\n"); -+ return; -+ } -+ - switch (cmd) { -- case 0x00: /* copy area */ -+ case 0: /* BitBlt */ - { -- int src_x = (s->twoD_source >> 16) & 0x01FFF; -- int src_y = s->twoD_source & 0xFFFF; -+ unsigned int src_x = (s->twoD_source >> 16) & 0x01FFF; -+ unsigned int src_y = s->twoD_source & 0xFFFF; - uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; -- uint8_t *src = s->local_mem + src_base; -- int src_pitch = s->twoD_pitch & 0x1FFF; -- --#define COPY_AREA(_bpp, _pixel_type, rtl) { \ -- int y, x, index_d, index_s; \ -- for (y = 0; y < height; y++) { \ -- for (x = 0; x < width; x++) { \ -- _pixel_type val; \ -- \ -- if (rtl) { \ -- index_s = ((src_y - y) * src_pitch + src_x - x) * _bpp; \ -- index_d = ((dst_y - y) * dst_pitch + dst_x - x) * _bpp; \ -- } else { \ -- index_s = ((src_y + y) * src_pitch + src_x + x) * _bpp; \ -- index_d = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ -- } \ -- if (rop_mode == 1 && rop == 5) { \ -- /* Invert dest */ \ -- val = ~*(_pixel_type *)&dst[index_d]; \ -- } else { \ -- val = *(_pixel_type *)&src[index_s]; \ -- } \ -- *(_pixel_type *)&dst[index_d] = val; \ -- } \ -- } \ -- } -- switch (format) { -- case 0: -- COPY_AREA(1, uint8_t, rtl); -- break; -- case 1: -- COPY_AREA(2, uint16_t, rtl); -- break; -- case 2: -- COPY_AREA(4, uint32_t, rtl); -- break; -+ unsigned int src_pitch = s->twoD_pitch & 0x1FFF; -+ -+ if (!src_pitch) { -+ qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero src pitch.\n"); -+ return; -+ } -+ -+ if (rtl) { -+ src_x -= width - 1; -+ src_y -= height - 1; -+ } -+ -+ if (src_base >= get_local_mem_size(s) || src_base + -+ (src_x + width + (src_y + height) * (src_pitch + width)) * -+ (1 << format) >= get_local_mem_size(s)) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "sm501: 2D op src is outside vram.\n"); -+ return; -+ } -+ -+ if ((rop_mode && rop == 0x5) || (!rop_mode && rop == 0x55)) { -+ /* Invert dest, is there a way to do this with pixman? */ -+ unsigned int x, y, i; -+ uint8_t *d = s->local_mem + dst_base; -+ -+ for (y = 0; y < height; y++) { -+ i = (dst_x + (dst_y + y) * dst_pitch) * (1 << format); -+ for (x = 0; x < width; x++, i += (1 << format)) { -+ switch (format) { -+ case 0: -+ d[i] = ~d[i]; -+ break; -+ case 1: -+ *(uint16_t *)&d[i] = ~*(uint16_t *)&d[i]; -+ break; -+ case 2: -+ *(uint32_t *)&d[i] = ~*(uint32_t *)&d[i]; -+ break; -+ } -+ } -+ } -+ } else { -+ /* Do copy src for unimplemented ops, better than unpainted area */ -+ if ((rop_mode && (rop != 0xc || rop2_source_is_pattern)) || -+ (!rop_mode && rop != 0xcc)) { -+ qemu_log_mask(LOG_UNIMP, -+ "sm501: rop%d op %x%s not implemented\n", -+ (rop_mode ? 2 : 3), rop, -+ (rop2_source_is_pattern ? -+ " with pattern source" : "")); -+ } -+ /* Check for overlaps, this could be made more exact */ -+ uint32_t sb, se, db, de; -+ sb = src_base + src_x + src_y * (width + src_pitch); -+ se = sb + width + height * (width + src_pitch); -+ db = dst_base + dst_x + dst_y * (width + dst_pitch); -+ de = db + width + height * (width + dst_pitch); -+ if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { -+ /* regions may overlap: copy via temporary */ -+ int llb = width * (1 << format); -+ int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); -+ uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) * -+ height); -+ pixman_blt((uint32_t *)&s->local_mem[src_base], tmp, -+ src_pitch * (1 << format) / sizeof(uint32_t), -+ tmp_stride, 8 * (1 << format), 8 * (1 << format), -+ src_x, src_y, 0, 0, width, height); -+ pixman_blt(tmp, (uint32_t *)&s->local_mem[dst_base], -+ tmp_stride, -+ dst_pitch * (1 << format) / sizeof(uint32_t), -+ 8 * (1 << format), 8 * (1 << format), -+ 0, 0, dst_x, dst_y, width, height); -+ g_free(tmp); -+ } else { -+ pixman_blt((uint32_t *)&s->local_mem[src_base], -+ (uint32_t *)&s->local_mem[dst_base], -+ src_pitch * (1 << format) / sizeof(uint32_t), -+ dst_pitch * (1 << format) / sizeof(uint32_t), -+ 8 * (1 << format), 8 * (1 << format), -+ src_x, src_y, dst_x, dst_y, width, height); -+ } - } - break; - } -- case 0x01: /* fill rectangle */ -+ case 1: /* Rectangle Fill */ - { - uint32_t color = s->twoD_foreground; - --#define FILL_RECT(_bpp, _pixel_type) { \ -- int y, x; \ -- for (y = 0; y < height; y++) { \ -- for (x = 0; x < width; x++) { \ -- int index = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ -- *(_pixel_type *)&dst[index] = (_pixel_type)color; \ -- } \ -- } \ -- } -- -- switch (format) { -- case 0: -- FILL_RECT(1, uint8_t); -- break; -- case 1: -- color = cpu_to_le16(color); -- FILL_RECT(2, uint16_t); -- break; -- case 2: -+ if (format == 2) { - color = cpu_to_le32(color); -- FILL_RECT(4, uint32_t); -- break; -+ } else if (format == 1) { -+ color = cpu_to_le16(color); - } -+ -+ pixman_fill((uint32_t *)&s->local_mem[dst_base], -+ dst_pitch * (1 << format) / sizeof(uint32_t), -+ 8 * (1 << format), dst_x, dst_y, width, height, color); - break; - } - default: --- -2.23.0 - diff --git a/sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch b/sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch deleted file mode 100644 index a2eefcba1e31e338ad6d934bf9e112a98d013b04..0000000000000000000000000000000000000000 --- a/sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch +++ /dev/null @@ -1,134 +0,0 @@ -From bc472e56b985db1de73a7ddab5ea8568d6e7f327 Mon Sep 17 00:00:00 2001 -From: BALATON Zoltan -Date: Thu, 21 May 2020 21:39:44 +0200 -Subject: [PATCH] sm501: Shorten long variable names in sm501_2d_operation - -This increases readability and cleans up some confusing naming. - -Signed-off-by: BALATON Zoltan -Message-id: b9b67b94c46e945252a73c77dfd117132c63c4fb.1590089984.git.balaton@eik.bme.hu -Signed-off-by: Gerd Hoffmann ---- - hw/display/sm501.c | 45 ++++++++++++++++++++++----------------------- - 1 file changed, 22 insertions(+), 23 deletions(-) - -diff --git a/hw/display/sm501.c b/hw/display/sm501.c -index aa4b202a48..51e7ccc39d 100644 ---- a/hw/display/sm501.c -+++ b/hw/display/sm501.c -@@ -700,17 +700,16 @@ static inline void hwc_invalidate(SM501State *s, int crt) - static void sm501_2d_operation(SM501State *s) - { - /* obtain operation parameters */ -- int operation = (s->twoD_control >> 16) & 0x1f; -+ int cmd = (s->twoD_control >> 16) & 0x1F; - int rtl = s->twoD_control & 0x8000000; - int src_x = (s->twoD_source >> 16) & 0x01FFF; - int src_y = s->twoD_source & 0xFFFF; - int dst_x = (s->twoD_destination >> 16) & 0x01FFF; - int dst_y = s->twoD_destination & 0xFFFF; -- int operation_width = (s->twoD_dimension >> 16) & 0x1FFF; -- int operation_height = s->twoD_dimension & 0xFFFF; -+ int width = (s->twoD_dimension >> 16) & 0x1FFF; -+ int height = s->twoD_dimension & 0xFFFF; - uint32_t color = s->twoD_foreground; -- int format_flags = (s->twoD_stretch >> 20) & 0x3; -- int addressing = (s->twoD_stretch >> 16) & 0xF; -+ int format = (s->twoD_stretch >> 20) & 0x3; - int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */ - /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ - int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; -@@ -721,12 +720,12 @@ static void sm501_2d_operation(SM501State *s) - /* get frame buffer info */ - uint8_t *src = s->local_mem + src_base; - uint8_t *dst = s->local_mem + dst_base; -- int src_width = s->twoD_pitch & 0x1FFF; -- int dst_width = (s->twoD_pitch >> 16) & 0x1FFF; -+ int src_pitch = s->twoD_pitch & 0x1FFF; -+ int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; - int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; - int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); - -- if (addressing != 0x0) { -+ if ((s->twoD_stretch >> 16) & 0xF) { - qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); - return; - } -@@ -758,20 +757,20 @@ static void sm501_2d_operation(SM501State *s) - return; - } - -- switch (operation) { -+ switch (cmd) { - case 0x00: /* copy area */ - #define COPY_AREA(_bpp, _pixel_type, rtl) { \ - int y, x, index_d, index_s; \ -- for (y = 0; y < operation_height; y++) { \ -- for (x = 0; x < operation_width; x++) { \ -+ for (y = 0; y < height; y++) { \ -+ for (x = 0; x < width; x++) { \ - _pixel_type val; \ - \ - if (rtl) { \ -- index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \ -- index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \ -+ index_s = ((src_y - y) * src_pitch + src_x - x) * _bpp; \ -+ index_d = ((dst_y - y) * dst_pitch + dst_x - x) * _bpp; \ - } else { \ -- index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \ -- index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ -+ index_s = ((src_y + y) * src_pitch + src_x + x) * _bpp; \ -+ index_d = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ - } \ - if (rop_mode == 1 && rop == 5) { \ - /* Invert dest */ \ -@@ -783,7 +782,7 @@ static void sm501_2d_operation(SM501State *s) - } \ - } \ - } -- switch (format_flags) { -+ switch (format) { - case 0: - COPY_AREA(1, uint8_t, rtl); - break; -@@ -799,15 +798,15 @@ static void sm501_2d_operation(SM501State *s) - case 0x01: /* fill rectangle */ - #define FILL_RECT(_bpp, _pixel_type) { \ - int y, x; \ -- for (y = 0; y < operation_height; y++) { \ -- for (x = 0; x < operation_width; x++) { \ -- int index = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ -+ for (y = 0; y < height; y++) { \ -+ for (x = 0; x < width; x++) { \ -+ int index = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ - *(_pixel_type *)&dst[index] = (_pixel_type)color; \ - } \ - } \ - } - -- switch (format_flags) { -+ switch (format) { - case 0: - FILL_RECT(1, uint8_t); - break; -@@ -824,14 +823,14 @@ static void sm501_2d_operation(SM501State *s) - - default: - qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", -- operation); -+ cmd); - return; - } - - if (dst_base >= get_fb_addr(s, crt) && - dst_base <= get_fb_addr(s, crt) + fb_len) { -- int dst_len = MIN(fb_len, ((dst_y + operation_height - 1) * dst_width + -- dst_x + operation_width) * (1 << format_flags)); -+ int dst_len = MIN(fb_len, ((dst_y + height - 1) * dst_pitch + -+ dst_x + width) * (1 << format)); - if (dst_len) { - memory_region_set_dirty(&s->local_mem_region, dst_base, dst_len); - } --- -2.23.0 - diff --git a/sm501-Use-BIT-x-macro-to-shorten-constant.patch b/sm501-Use-BIT-x-macro-to-shorten-constant.patch deleted file mode 100644 index 697d0ee61d5cba82611b05f037a25397de5073b6..0000000000000000000000000000000000000000 --- a/sm501-Use-BIT-x-macro-to-shorten-constant.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 9f1e9012047639121eb275a4f8f5693d340e91f6 Mon Sep 17 00:00:00 2001 -From: BALATON Zoltan -Date: Thu, 21 May 2020 21:39:44 +0200 -Subject: [PATCH] sm501: Use BIT(x) macro to shorten constant -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: BALATON Zoltan -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 124bf5de8d7cf503b32b377d0445029a76bfbd49.1590089984.git.balaton@eik.bme.hu -Signed-off-by: Gerd Hoffmann ---- - hw/display/sm501.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/hw/display/sm501.c b/hw/display/sm501.c -index 51e7ccc39d..f3d11d0b23 100644 ---- a/hw/display/sm501.c -+++ b/hw/display/sm501.c -@@ -701,7 +701,7 @@ static void sm501_2d_operation(SM501State *s) - { - /* obtain operation parameters */ - int cmd = (s->twoD_control >> 16) & 0x1F; -- int rtl = s->twoD_control & 0x8000000; -+ int rtl = s->twoD_control & BIT(27); - int src_x = (s->twoD_source >> 16) & 0x01FFF; - int src_y = s->twoD_source & 0xFFFF; - int dst_x = (s->twoD_destination >> 16) & 0x01FFF; -@@ -751,8 +751,7 @@ static void sm501_2d_operation(SM501State *s) - } - } - -- if ((s->twoD_source_base & 0x08000000) || -- (s->twoD_destination_base & 0x08000000)) { -+ if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) { - qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); - return; - } --- -2.23.0 - diff --git a/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch b/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch deleted file mode 100644 index cfd1842d3f199ec1db6e324e448df37b4553ea2a..0000000000000000000000000000000000000000 --- a/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 2b8ad77678da175cb92c902955cb85827e661de3 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Tue, 14 Apr 2020 14:53:44 +0800 -Subject: [PATCH] smbios: Add missing member of type 4 for smbios 3.0 - -According to smbios 3.0 spec, for processor information (type 4), -it adds three new members (Core Count 2, Core enabled 2, thread count 2) for 3.0, Without this three members, we can not get correct cpu frequency from dmi, -Because it will failed to check the length of Processor Infomation in DMI. - -The corresponding codes in kernel is like: - if (dm->type == DMI_ENTRY_PROCESSOR && - dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { - u16 val = (u16)get_unaligned((const u16 *) - (dmi_data + DMI_PROCESSOR_MAX_SPEED)); - *mhz = val > *mhz ? val : *mhz; - } - -Signed-off-by: zhanghailiang ---- - hw/smbios/smbios.c | 4 +++- - include/hw/firmware/smbios.h | 3 +++ - 2 files changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 7bcd67b0..51b00d44 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -603,7 +603,9 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) - t->thread_count = ms->smp.threads; - t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ - t->processor_family2 = cpu_to_le16(0x01); /* Other */ -- -+ t->corecount2 = 0; -+ t->enabledcorecount2 = 0; -+ t->threadcount2 = 0; - SMBIOS_BUILD_TABLE_POST; - smbios_type4_count++; - } -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced0..6887bca4 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -193,6 +193,9 @@ struct smbios_type_4 { - uint8_t thread_count; - uint16_t processor_characteristics; - uint16_t processor_family2; -+ uint16_t corecount2; -+ uint16_t enabledcorecount2; -+ uint16_t threadcount2; - } QEMU_PACKED; - - /* SMBIOS type 11 - OEM strings */ --- -2.23.0 diff --git a/smbios-Fix-buffer-overrun-when-using-path-option.patch b/smbios-Fix-buffer-overrun-when-using-path-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..b96a186b83148124dc7e74e5cc7afa3168ee8506 --- /dev/null +++ b/smbios-Fix-buffer-overrun-when-using-path-option.patch @@ -0,0 +1,42 @@ +From 7c76516fee790add2ba308b38999e5cebbd24523 Mon Sep 17 00:00:00 2001 +From: jiesong +Date: Wed, 13 Aug 2025 23:11:18 +0800 +Subject: [PATCH] smbios: Fix buffer overrun when using path= option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We have to make sure the array of bytes read from the path= file +is null-terminated, otherwise we run into a buffer overrun later on. + +Fixes: bb99f477 ("hw/smbios: support loading OEM strings values from a file") +Resolves: #2879 + +Signed-off-by: default avatarDaan De Meyer +Reviewed-by: default avatarDaniel P. Berrangé +Tested-by: default avatarValentin David +Message-ID: <20250323213622.2581013-1-daan.j.demeyer@gmail.com> +Signed-off-by: default avatarPhilippe Mathieu-Daudé +(cherry picked from commit a7a05f5f) +Signed-off-by: default avatarMichael Tokarev +--- + hw/smbios/smbios.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index c0c5a81e66..be726ce4ac 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1223,6 +1223,9 @@ static int save_opt_one(void *opaque, + g_byte_array_append(data, (guint8 *)buf, ret); + } + ++ buf[0] = '\0'; ++ g_byte_array_append(data, (guint8 *)buf, 1); ++ + qemu_close(fd); + + *opt->dest = g_renew(char *, *opt->dest, (*opt->ndest) + 1); +-- +2.33.0 + diff --git a/smbios-add-processor-family-option.patch b/smbios-add-processor-family-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..c367176ff8ee7f67bfcae273b55b8223a043c78d --- /dev/null +++ b/smbios-add-processor-family-option.patch @@ -0,0 +1,110 @@ +From 8bbff5547eb88886ee13fa8eb95658318c457298 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Fri, 30 May 2025 09:07:29 +0800 +Subject: [PATCH] smbios: add processor-family option + +commit b5831d79671cea3f7bd42cffab93fe6eab8c3db0 upstream + +For RISC-V the SMBIOS standard requires specific values of the processor +family value depending on the bitness of the CPU. + +Add a processor-family option for SMBIOS table 4. + +The value of processor-family may exceed 255 and therefore must be provided +in the Processor Family 2 field. Set the Processor Family field to 0xFE +which signals that the Processor Family 2 is used. + +Signed-off-by: Heinrich Schuchardt +Reviewed-by: Alistair Francis +Reviewed-by: Andrew Jones +Message-ID: <20240123184229.10415-2-heinrich.schuchardt@canonical.com> +Signed-off-by: Alistair Francis +--- + hw/smbios/smbios.c | 13 +++++++++++-- + qemu-options.hx | 4 ++-- + 2 files changed, 13 insertions(+), 4 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 2a90601ac5..647bc6d603 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -102,6 +102,7 @@ static struct { + #define DEFAULT_CPU_SPEED 2000 + + static struct { ++ uint16_t processor_family; + const char *sock_pfx, *manufacturer, *version, *serial, *asset, *part; + uint64_t max_speed; + uint64_t current_speed; +@@ -110,6 +111,7 @@ static struct { + .max_speed = DEFAULT_CPU_SPEED, + .current_speed = DEFAULT_CPU_SPEED, + .processor_id = 0, ++ .processor_family = 0x01, /* Other */ + }; + + struct type8_instance { +@@ -337,6 +339,10 @@ static const QemuOptDesc qemu_smbios_type4_opts[] = { + .name = "part", + .type = QEMU_OPT_STRING, + .help = "part number", ++ }, { ++ .name = "processor-family", ++ .type = QEMU_OPT_NUMBER, ++ .help = "processor family", + }, { + .name = "processor-id", + .type = QEMU_OPT_NUMBER, +@@ -726,7 +732,7 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) + snprintf(sock_str, sizeof(sock_str), "%s%2x", type4.sock_pfx, instance); + SMBIOS_TABLE_SET_STR(4, socket_designation_str, sock_str); + t->processor_type = 0x03; /* CPU */ +- t->processor_family = 0x01; /* Other */ ++ t->processor_family = 0xfe; /* use Processor Family 2 field */ + SMBIOS_TABLE_SET_STR(4, processor_manufacturer_str, type4.manufacturer); + if (type4.processor_id == 0) { + t->processor_id[0] = cpu_to_le32(smbios_cpuid_version); +@@ -758,7 +764,7 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) + t->thread_count = (threads_per_socket > 255) ? 0xFF : threads_per_socket; + + t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ +- t->processor_family2 = cpu_to_le16(0x01); /* Other */ ++ t->processor_family2 = cpu_to_le16(type4.processor_family); + + if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { + t->core_count2 = t->core_enabled2 = cpu_to_le16(cores_per_socket); +@@ -1402,6 +1408,9 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + return; + } + save_opt(&type4.sock_pfx, opts, "sock_pfx"); ++ type4.processor_family = qemu_opt_get_number(opts, ++ "processor-family", ++ 0x01 /* Other */); + save_opt(&type4.manufacturer, opts, "manufacturer"); + save_opt(&type4.version, opts, "version"); + save_opt(&type4.serial, opts, "serial"); +diff --git a/qemu-options.hx b/qemu-options.hx +index 7fe76c4b1d..cbaa2e5367 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2679,7 +2679,7 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios, + " specify SMBIOS type 3 fields\n" + "-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str]\n" + " [,asset=str][,part=str][,max-speed=%d][,current-speed=%d]\n" +- " [,processor-id=%d]\n" ++ " [,processor-family=%d,processor-id=%d]\n" + " specify SMBIOS type 4 fields\n" + "-smbios type=8[,external_reference=str][,internal_reference=str][,connector_type=%d][,port_type=%d]\n" + " specify SMBIOS type 8 fields\n" +@@ -2707,7 +2707,7 @@ SRST + ``-smbios type=3[,manufacturer=str][,version=str][,serial=str][,asset=str][,sku=str]`` + Specify SMBIOS type 3 fields + +-``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]`` ++``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-family=%d][,processor-id=%d]`` + Specify SMBIOS type 4 fields + + ``-smbios type=11[,value=str][,path=filename]`` +-- +2.33.0 + diff --git a/smbios-function-to-set-default-processor-family.patch b/smbios-function-to-set-default-processor-family.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b312dc230c5be8474ba333383c12e899af5eecc --- /dev/null +++ b/smbios-function-to-set-default-processor-family.patch @@ -0,0 +1,51 @@ +From 4e1255411ea509a014d860f3cab1b5425b6556c8 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Fri, 30 May 2025 09:08:40 +0800 +Subject: [PATCH] smbios: function to set default processor family + +commit 6f3b727bcc867688034ef1489a58e958142973b1 upstream + +Provide a function to set the default processor family. + +Signed-off-by: Heinrich Schuchardt +Reviewed-by: Andrew Jones +Message-ID: <20240123184229.10415-3-heinrich.schuchardt@canonical.com> +Signed-off-by: Alistair Francis +--- + hw/smbios/smbios.c | 7 +++++++ + include/hw/firmware/smbios.h | 1 + + 2 files changed, 8 insertions(+) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 647bc6d603..c0c5a81e66 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -989,6 +989,13 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + field = value; \ + } + ++void smbios_set_default_processor_family(uint16_t processor_family) ++{ ++ if (type4.processor_family <= 0x01) { ++ type4.processor_family = processor_family; ++ } ++} ++ + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, + bool uuid_encoded, SmbiosEntryPointType ep_type) +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 7f3259a630..6e514982d4 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -295,6 +295,7 @@ void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, + bool uuid_encoded, SmbiosEntryPointType ep_type); ++void smbios_set_default_processor_family(uint16_t processor_family); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +-- +2.33.0 + diff --git a/smmu-common-Return-sysmem-address-space-only-for-vfi.patch b/smmu-common-Return-sysmem-address-space-only-for-vfi.patch new file mode 100644 index 0000000000000000000000000000000000000000..d68ab699d84cb1b67896ba70d1a7c1c7888cb464 --- /dev/null +++ b/smmu-common-Return-sysmem-address-space-only-for-vfi.patch @@ -0,0 +1,39 @@ +From 58f66c2581b3c4a45a02717330f1b2188424889b Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 15 Jan 2025 16:11:21 +0000 +Subject: [PATCH] smmu-common: Return sysmem address space only for vfio-pci + +This will enable pcie-root-port hotplug event irq to work. + +Discussion Link: https://lore.kernel.org/qemu-devel/74114c0db34b420a90e9fe5bd991767e@huawei.com/ + +Signed-off-by: Shameer Kolothum +--- + hw/arm/smmu-common.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 3a257a5b0e..6c4b82757f 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -639,9 +639,16 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) + SMMUState *s = opaque; + SMMUPciBus *sbus = smmu_get_sbus(s, bus); + SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn); ++ bool is_vfio = false; ++ PCIDevice *pdev; ++ ++ pdev = pci_find_device(bus, pci_bus_num(bus), devfn); ++ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { ++ is_vfio = true; ++ } + + /* Return the system as if the device uses stage-2 only */ +- if (s->nested && !sdev->s1_hwpt) { ++ if (s->nested && !sdev->s1_hwpt && is_vfio) { + return &sdev->as_sysmem; + } else { + return &sdev->as; +-- +2.41.0.windows.1 + diff --git a/smmuv3-Add-support-for-page-fault-handling.patch b/smmuv3-Add-support-for-page-fault-handling.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e4ce9dc21df5d7ec6925c995bf6fe7e26d630e3 --- /dev/null +++ b/smmuv3-Add-support-for-page-fault-handling.patch @@ -0,0 +1,462 @@ +From ebfa7213e32faafd5532d6f5b3cb873018b671ae Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Thu, 10 Oct 2024 06:19:31 +0000 +Subject: [PATCH] smmuv3: Add support for page fault handling + +Handle page fault from host and send response back. + +Signed-off-by: Shameer Kolothum +--- + backends/iommufd.c | 20 +++- + hw/arm/smmu-common.c | 39 ++++++-- + hw/arm/smmuv3.c | 188 ++++++++++++++++++++++++++++++++++- + hw/vfio/iommufd.c | 2 +- + include/hw/arm/smmu-common.h | 24 ++++- + include/sysemu/iommufd.h | 2 +- + 6 files changed, 263 insertions(+), 12 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index ee6f5bcf65..e9ce82297b 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -228,7 +228,7 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + uint32_t pt_id, uint32_t flags, + uint32_t data_type, uint32_t data_len, + void *data_ptr, uint32_t *out_hwpt, +- Error **errp) ++ uint32_t *out_fault_fd, Error **errp) + { + int ret, fd = be->fd; + struct iommu_hwpt_alloc alloc_hwpt = { +@@ -241,6 +241,24 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + .data_uptr = (uintptr_t)data_ptr, + }; + ++ if (flags & IOMMU_HWPT_FAULT_ID_VALID) { ++ ++ struct iommu_fault_alloc cmd = { ++ .size = sizeof(cmd), ++ }; ++ ++ ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd); ++ if (ret) { ++ ret = -errno; ++ error_report("IOMMU_FAULT_ALLOC failed: %m"); ++ } else { ++ alloc_hwpt.fault_id = cmd.out_fault_id; ++ if (out_fault_fd) { ++ *out_fault_fd = cmd.out_fault_fd; ++ } ++ } ++ } ++ + ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt); + trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type, + data_len, (uintptr_t)data_ptr, +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index d0bc620606..c382fa16e5 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -670,7 +670,7 @@ static bool smmu_dev_attach_viommu(SMMUDevice *sdev, + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, + IOMMU_HWPT_DATA_NONE, 0, NULL, +- &s2_hwpt_id, errp)) { ++ &s2_hwpt_id, NULL, errp)) { + error_setg(errp, "failed to allocate an S2 hwpt"); + return false; + } +@@ -695,7 +695,7 @@ static bool smmu_dev_attach_viommu(SMMUDevice *sdev, + viommu->core->viommu_id, 0, + IOMMU_HWPT_DATA_ARM_SMMUV3, + sizeof(abort_data), &abort_data, +- &viommu->abort_hwpt_id, errp)) { ++ &viommu->abort_hwpt_id, NULL, errp)) { + error_setg(errp, "failed to allocate an abort pagetable"); + goto free_viommu_core; + } +@@ -704,7 +704,7 @@ static bool smmu_dev_attach_viommu(SMMUDevice *sdev, + viommu->core->viommu_id, 0, + IOMMU_HWPT_DATA_ARM_SMMUV3, + sizeof(bypass_data), &bypass_data, +- &viommu->bypass_hwpt_id, errp)) { ++ &viommu->bypass_hwpt_id, NULL, errp)) { + error_setg(errp, "failed to allocate a bypass pagetable"); + goto free_abort_hwpt; + } +@@ -882,6 +882,25 @@ void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort) + hwpt_id = sdev->viommu->bypass_hwpt_id; + } + ++ /* ToDo: May be better to move the below to smmuv3. */ ++ if (s1_hwpt->out_fault_fd) { ++ struct io_uring *ring = &s1_hwpt->fault_ring; ++ struct io_uring_sqe *sqe; ++ struct __kernel_timespec ts = {.tv_sec = 0, .tv_nsec = 1}; ++ ++ s1_hwpt->exiting = true; ++ /* Send out a timeout sqe for the read handler to exit */ ++ sqe = io_uring_get_sqe(ring); ++ io_uring_prep_timeout(sqe, &ts, 0, 0); ++ io_uring_submit(ring); ++ ++ qemu_cond_signal(&s1_hwpt->fault_cond); ++ qemu_thread_join(&s1_hwpt->read_fault_thread); ++ qemu_thread_join(&s1_hwpt->write_fault_thread); ++ qemu_mutex_destroy(&s1_hwpt->fault_mutex); ++ io_uring_queue_exit(&s1_hwpt->fault_ring); ++ } ++ + if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, NULL)) { + return; + } +@@ -892,11 +911,13 @@ void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort) + } + + int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type, +- uint32_t data_len, void *data) ++ uint32_t data_len, void *data, ++ bool req_fault_fd) + { + SMMUViommu *viommu = sdev->viommu; + SMMUS1Hwpt *s1_hwpt = sdev->s1_hwpt; + HostIOMMUDeviceIOMMUFD *idev = sdev->idev; ++ uint32_t flags = 0; + + if (!idev || !viommu) { + return -ENOENT; +@@ -912,12 +933,18 @@ int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type, + } + + s1_hwpt->smmu = sdev->smmu; ++ s1_hwpt->sdev = sdev; + s1_hwpt->viommu = viommu; + s1_hwpt->iommufd = idev->iommufd; + ++ if (req_fault_fd) { ++ flags |= IOMMU_HWPT_FAULT_ID_VALID; ++ } ++ + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, +- viommu->core->viommu_id, 0, data_type, +- data_len, data, &s1_hwpt->hwpt_id, NULL)) { ++ viommu->core->viommu_id, flags, data_type, ++ data_len, data, &s1_hwpt->hwpt_id, ++ &s1_hwpt->out_fault_fd, NULL)) { + goto free; + } + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 8d8dcccd48..30c0ae4c3b 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -34,6 +34,9 @@ + #include "hw/arm/smmuv3.h" + #include "smmuv3-internal.h" + #include "smmu-internal.h" ++#ifdef CONFIG_LINUX_IO_URING ++#include ++#endif + + #define PTW_RECORD_FAULT(cfg) (((cfg)->stage == 1) ? (cfg)->record_faults : \ + (cfg)->s2cfg.record_faults) +@@ -1258,6 +1261,165 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) + } + } + ++static void smmuv3_report_iommu_fault(SMMUS1Hwpt *hwpt, ++ struct iommu_hwpt_pgfault *fault) ++{ ++ PendFaultEntry *pend; ++ SMMUDevice *sdev = hwpt->sdev; ++ SMMUv3State *s3 = sdev->smmu; ++ uint32_t sid = smmu_get_sid(sdev); ++ SMMUEventInfo info = {0}; ++ ++ info.sid = sid; ++ info.type = SMMU_EVT_F_TRANSLATION; ++ info.u.f_translation.addr = fault->addr; ++ info.u.f_translation.stall = true; ++ info.u.f_translation.ssid = fault->pasid; ++ info.u.f_translation.stag = fault->grpid; ++ ++ if (fault->flags | IOMMU_PGFAULT_FLAGS_PASID_VALID) { ++ info.u.f_translation.ssv = true; ++ } ++ if (fault->perm & IOMMU_PGFAULT_PERM_READ) { ++ info.u.f_translation.rnw = true; ++ } ++ if (fault->perm & IOMMU_PGFAULT_PERM_PRIV) { ++ info.u.f_translation.pnu = true; ++ } ++ if (fault->perm & IOMMU_PGFAULT_PERM_EXEC) { ++ info.u.f_translation.ind = true; ++ } ++ ++ pend = g_new0(PendFaultEntry, 1); ++ memcpy(&pend->fault, fault, sizeof(*fault)); ++ qemu_mutex_lock(&hwpt->fault_mutex); ++ QTAILQ_INSERT_TAIL(&hwpt->pendfault, pend, entry); ++ qemu_mutex_unlock(&hwpt->fault_mutex); ++ smmuv3_record_event(s3, &info); ++ return; ++} ++ ++static void smmuv3_notify_stall_resume(SMMUState *bs, uint32_t sid, ++ uint32_t stag, uint32_t code) ++{ ++ SMMUDevice *sdev = smmu_find_sdev(bs, sid); ++ PageRespEntry *msg; ++ PendFaultEntry *pend, *tmp; ++ SMMUS1Hwpt *hwpt; ++ bool found = false; ++ ++ if (!sdev) { ++ return; ++ } ++ ++ hwpt = sdev->s1_hwpt; ++ msg = g_new0(PageRespEntry, 1); ++ ++ /* Kernel expects addr and pasid info for page response */ ++ qemu_mutex_lock(&hwpt->fault_mutex); ++ QTAILQ_FOREACH_SAFE(pend, &hwpt->pendfault, entry, tmp) { ++ if (pend->fault.grpid == stag) { ++ QTAILQ_REMOVE(&hwpt->pendfault, pend, entry); ++ msg->resp.cookie = pend->fault.cookie; ++ msg->resp.code = code; ++ QTAILQ_INSERT_TAIL(&hwpt->pageresp, msg, entry); ++ qemu_cond_signal(&hwpt->fault_cond); ++ ++ g_free(pend); ++ found = true; ++ break; ++ } ++ } ++ ++ qemu_mutex_unlock(&hwpt->fault_mutex); ++ if (!found) { ++ warn_report("No matching fault for resume(stag 0x%x), drop!", stag); ++ return; ++ } ++} ++ ++static void *write_fault_handler(void *opaque) ++{ ++ SMMUS1Hwpt *hwpt = opaque; ++ PageRespEntry *msg, *tmp; ++ struct iommu_hwpt_page_response *resp; ++ int ret; ++ ++ resp = g_new0(struct iommu_hwpt_page_response, 1); ++ while (!hwpt->exiting) { ++ /* Check we have any pending responses */ ++ qemu_mutex_lock(&hwpt->fault_mutex); ++ qemu_cond_wait(&hwpt->fault_cond, &hwpt->fault_mutex); ++ QTAILQ_FOREACH_SAFE(msg, &hwpt->pageresp, entry, tmp) { ++ QTAILQ_REMOVE(&hwpt->pageresp, msg, entry); ++ memcpy(resp, &msg->resp, sizeof(*resp)); ++ g_free(msg); ++ ++ ret = write(hwpt->out_fault_fd, resp, sizeof(*resp)); ++ if (ret != sizeof(*resp)) { ++ warn_report("Write resp[cookie 0x%x] fail %d", ++ resp->cookie, ret); ++ } ++ } ++ qemu_mutex_unlock(&hwpt->fault_mutex); ++ } ++ g_free(resp); ++ return NULL; ++} ++ ++static void *read_fault_handler(void *opaque) ++{ ++ SMMUS1Hwpt *hwpt = opaque; ++ struct io_uring_sqe *sqe; ++ struct io_uring_cqe *cqe; ++ struct iommu_hwpt_pgfault *fault; ++ struct io_uring *ring = &hwpt->fault_ring; ++ void *data; ++ int ret; ++ ++ fault = g_new0(struct iommu_hwpt_pgfault, 1); ++ while (!hwpt->exiting) { ++ sqe = io_uring_get_sqe(ring); ++ io_uring_prep_read(sqe, hwpt->out_fault_fd, fault, ++ sizeof(*fault), 0); ++ io_uring_sqe_set_data(sqe, fault); ++ io_uring_submit(ring); ++ ++ ret = io_uring_wait_cqe(ring, &cqe); ++ if (ret == 0) { ++ if (cqe->res == sizeof(*fault)) { ++ data = io_uring_cqe_get_data(cqe); ++ smmuv3_report_iommu_fault(hwpt, data); ++ } ++ } else { ++ warn_report("Read fault[hwpt_id 0x%x] failed %d", ++ hwpt->hwpt_id, ret); ++ } ++ io_uring_cqe_seen(ring, cqe); ++ } ++ g_free(fault); ++ return NULL; ++} ++ ++static void create_fault_handlers(SMMUS1Hwpt *hwpt) ++{ ++ if (!hwpt->out_fault_fd) { ++ warn_report("No fault fd for hwpt id: %d", hwpt->hwpt_id); ++ return; ++ } ++ ++ io_uring_queue_init(1024, &hwpt->fault_ring, 0); ++ qemu_mutex_init(&hwpt->fault_mutex); ++ qemu_cond_init(&hwpt->fault_cond); ++ QTAILQ_INIT(&hwpt->pageresp); ++ QTAILQ_INIT(&hwpt->pendfault); ++ qemu_thread_create(&hwpt->read_fault_thread, "io fault read", ++ read_fault_handler, ++ hwpt, QEMU_THREAD_JOINABLE); ++ qemu_thread_create(&hwpt->write_fault_thread, "io fault write", ++ write_fault_handler, ++ hwpt, QEMU_THREAD_JOINABLE); ++} + static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid) + { + #ifdef __linux__ +@@ -1266,6 +1428,7 @@ static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid) + struct iommu_hwpt_arm_smmuv3 nested_data = {}; + SMMUv3State *s = sdev->smmu; + SMMUState *bs = &s->smmu_state; ++ bool req_fault_fd = false; + uint32_t config; + STE ste; + int ret; +@@ -1309,13 +1472,22 @@ static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid) + /* S1DSS | S1CIR | S1COR | S1CSH | S1STALLD | EATS */ + nested_data.ste[1] &= 0x380000ffULL; + ++ if (STE_S1CDMAX(&ste)) { ++ req_fault_fd = true; ++ } ++ + ret = smmu_dev_install_nested_ste(sdev, IOMMU_HWPT_DATA_ARM_SMMUV3, +- sizeof(nested_data), &nested_data); ++ sizeof(nested_data), &nested_data, ++ req_fault_fd); + if (ret) { + error_report("Unable to install nested STE=%16LX:%16LX, ret=%d", + nested_data.ste[1], nested_data.ste[0], ret); + } + ++ if (req_fault_fd) { ++ create_fault_handlers(sdev->s1_hwpt); ++ } ++ + trace_smmuv3_install_nested_ste(sid, nested_data.ste[1], nested_data.ste[0]); + #endif + } +@@ -1631,10 +1803,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + case SMMU_CMD_TLBI_EL2_VA: + case SMMU_CMD_TLBI_EL2_VAA: + case SMMU_CMD_PRI_RESP: +- case SMMU_CMD_RESUME: + case SMMU_CMD_STALL_TERM: + trace_smmuv3_unhandled_cmd(type); + break; ++ case SMMU_CMD_RESUME: ++ { ++ uint32_t sid = CMD_SID(&cmd); ++ uint16_t stag = CMD_RESUME_STAG(&cmd); ++ uint8_t action = CMD_RESUME_AC(&cmd); ++ uint32_t code = IOMMUFD_PAGE_RESP_INVALID; ++ ++ if (action) { ++ code = IOMMUFD_PAGE_RESP_SUCCESS; ++ } ++ smmuv3_notify_stall_resume(bs, sid, stag, code); ++ break; ++ } + default: + cmd_error = SMMU_CERROR_ILL; + break; +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 528023b95b..c0eb87c78c 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -344,7 +344,7 @@ static int iommufd_cdev_autodomains_get(VFIODevice *vbasedev, + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, + container->ioas_id, flags, + IOMMU_HWPT_DATA_NONE, 0, NULL, +- &hwpt_id, errp)) { ++ &hwpt_id, NULL, errp)) { + return -EINVAL; + } + +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index e30539a8d4..087a11efc7 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -138,13 +138,34 @@ typedef struct SMMUVdev { + uint32_t sid; + }SMMUVdev; + ++typedef struct PendFaultEntry { ++ struct iommu_hwpt_pgfault fault; ++ QTAILQ_ENTRY(PendFaultEntry) entry; ++} PendFaultEntry; ++ ++typedef struct PageRespEntry { ++ struct iommu_hwpt_page_response resp; ++ QTAILQ_ENTRY(PageRespEntry) entry; ++} PageRespEntry; ++ + typedef struct SMMUS1Hwpt { ++ void *sdev; + void *smmu; + IOMMUFDBackend *iommufd; + SMMUViommu *viommu; + uint32_t hwpt_id; ++ uint32_t out_fault_fd; + QLIST_HEAD(, SMMUDevice) device_list; + QLIST_ENTRY(SMMUViommu) next; ++ /* fault handling */ ++ struct io_uring fault_ring; ++ QemuThread read_fault_thread; ++ QemuThread write_fault_thread; ++ QemuMutex fault_mutex; ++ QemuCond fault_cond; ++ QTAILQ_HEAD(, PageRespEntry) pageresp; ++ QTAILQ_HEAD(, PendFaultEntry) pendfault; ++ bool exiting; + } SMMUS1Hwpt; + + typedef struct SMMUDevice { +@@ -258,7 +279,8 @@ int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type, + uint32_t data_len, void *data); + void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort); + int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type, +- uint32_t data_len, void *data); ++ uint32_t data_len, void *data, ++ bool req_fault_fd); + int smmu_hwpt_invalidate_cache(SMMUS1Hwpt *s1_hwpt, uint32_t type, uint32_t len, + uint32_t *num, void *reqs); + int smmu_viommu_invalidate_cache(IOMMUFDViommu *viommu, uint32_t type, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 0f2c826036..b279184974 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -62,7 +62,7 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + uint32_t pt_id, uint32_t flags, + uint32_t data_type, uint32_t data_len, + void *data_ptr, uint32_t *out_hwpt, +- Error **errp); ++ uint32_t *out_fault_fd, Error **errp); + bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id, + bool start, Error **errp); + bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, +-- +2.41.0.windows.1 + diff --git a/smmuv3-Change-arm-smmuv3-nested-name-to-arm-smmuv3-a.patch b/smmuv3-Change-arm-smmuv3-nested-name-to-arm-smmuv3-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..6f09425545265544e561601f7a0d16f50c737348 --- /dev/null +++ b/smmuv3-Change-arm-smmuv3-nested-name-to-arm-smmuv3-a.patch @@ -0,0 +1,325 @@ +From 2697e7418c1e0d87c82feca33800e3a093546a90 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Thu, 16 Jan 2025 15:20:18 +0000 +Subject: [PATCH] smmuv3: Change arm-smmuv3-nested name to arm-smmuv3-accel + +This is based on feedback received for RFC v1. + +Signed-off-by: Shameer Kolothum +--- + hw/arm/smmuv3.c | 38 +++++++++++++++++++------------------- + hw/arm/virt-acpi-build.c | 16 ++++++++-------- + hw/arm/virt.c | 24 ++++++++++++------------ + hw/core/sysbus-fdt.c | 2 +- + include/hw/arm/smmuv3.h | 8 ++++---- + include/hw/arm/virt.h | 10 +++++----- + 6 files changed, 49 insertions(+), 49 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 6964ab000d..ecdad6bda4 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -2253,14 +2253,14 @@ static void smmu_realize(DeviceState *d, Error **errp) + smmu_init_irq(s, dev); + } + +-static int smmuv3_nested_pci_host_bridge(Object *obj, void *opaque) ++static int smmuv3_accel_pci_host_bridge(Object *obj, void *opaque) + { + DeviceState *d = opaque; +- SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d); ++ SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(d); + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; +- if (s_nested->pci_bus && !strcmp(bus->qbus.name, s_nested->pci_bus)) { ++ if (s_accel->pci_bus && !strcmp(bus->qbus.name, s_accel->pci_bus)) { + object_property_set_link(OBJECT(d), "primary-bus", OBJECT(bus), + &error_abort); + } +@@ -2268,15 +2268,15 @@ static int smmuv3_nested_pci_host_bridge(Object *obj, void *opaque) + return 0; + } + +-static void smmu_nested_realize(DeviceState *d, Error **errp) ++static void smmu_accel_realize(DeviceState *d, Error **errp) + { +- SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d); +- SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested); ++ SMMUv3AccelState *s_nested = ARM_SMMUV3_ACCEL(d); ++ SMMUv3AccelClass *c = ARM_SMMUV3_ACCEL_GET_CLASS(s_nested); + SysBusDevice *dev = SYS_BUS_DEVICE(d); + Error *local_err = NULL; + + object_child_foreach_recursive(object_get_root(), +- smmuv3_nested_pci_host_bridge, d); ++ smmuv3_accel_pci_host_bridge, d); + object_property_set_bool(OBJECT(dev), "nested", true, &error_abort); + + c->parent_realize(d, &local_err); +@@ -2365,8 +2365,8 @@ static Property smmuv3_properties[] = { + DEFINE_PROP_END_OF_LIST() + }; + +-static Property smmuv3_nested_properties[] = { +- DEFINE_PROP_STRING("pci-bus", SMMUv3NestedState, pci_bus), ++static Property smmuv3_accel_properties[] = { ++ DEFINE_PROP_STRING("pci-bus", SMMUv3AccelState, pci_bus), + DEFINE_PROP_END_OF_LIST() + }; + +@@ -2389,15 +2389,15 @@ static void smmuv3_class_init(ObjectClass *klass, void *data) + device_class_set_props(dc, smmuv3_properties); + } + +-static void smmuv3_nested_class_init(ObjectClass *klass, void *data) ++static void smmuv3_accel_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +- SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass); ++ SMMUv3AccelClass *c = ARM_SMMUV3_ACCEL_CLASS(klass); + + dc->vmsd = &vmstate_smmuv3; +- device_class_set_parent_realize(dc, smmu_nested_realize, ++ device_class_set_parent_realize(dc, smmu_accel_realize, + &c->parent_realize); +- device_class_set_props(dc, smmuv3_nested_properties); ++ device_class_set_props(dc, smmuv3_accel_properties); + dc->user_creatable = true; + dc->hotpluggable = false; + } +@@ -2440,12 +2440,12 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, + imrc->notify_flag_changed = smmuv3_notify_flag_changed; + } + +-static const TypeInfo smmuv3_nested_type_info = { +- .name = TYPE_ARM_SMMUV3_NESTED, ++static const TypeInfo smmuv3_accel_type_info = { ++ .name = TYPE_ARM_SMMUV3_ACCEL, + .parent = TYPE_ARM_SMMUV3, +- .instance_size = sizeof(SMMUv3NestedState), +- .class_size = sizeof(SMMUv3NestedClass), +- .class_init = smmuv3_nested_class_init, ++ .instance_size = sizeof(SMMUv3AccelState), ++ .class_size = sizeof(SMMUv3AccelClass), ++ .class_init = smmuv3_accel_class_init, + }; + + static const TypeInfo smmuv3_type_info = { +@@ -2466,7 +2466,7 @@ static const TypeInfo smmuv3_iommu_memory_region_info = { + static void smmuv3_register_types(void) + { + type_register(&smmuv3_type_info); +- type_register(&smmuv3_nested_type_info); ++ type_register(&smmuv3_accel_type_info); + type_register(&smmuv3_iommu_memory_region_info); + } + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index ad0f79e03d..db635120f9 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -418,10 +418,10 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, + }; + + /* +- * Nested SMMU requires RMRs for MSI 1-1 mapping, which ++ * Accel SMMU requires RMRs for MSI 1-1 mapping, which + * require _DSM for PreservingPCI Boot Configurations + */ +- if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) { ++ if (vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL) { + cfg.preserve_config = true; + } + +@@ -619,10 +619,10 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + /* Table 2 The IORT */ + acpi_table_begin(&table, table_data); + +- if (vms->smmu_nested_count) { +- irq = vms->irqmap[VIRT_SMMU_NESTED] + ARM_SPI_BASE; +- base = vms->memmap[VIRT_SMMU_NESTED].base; +- num_smmus = vms->smmu_nested_count; ++ if (vms->smmu_accel_count) { ++ irq = vms->irqmap[VIRT_SMMU_ACCEL] + ARM_SPI_BASE; ++ base = vms->memmap[VIRT_SMMU_ACCEL].base; ++ num_smmus = vms->smmu_accel_count; + } else if (virt_has_smmuv3(vms)) { + irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; + base = vms->memmap[VIRT_SMMU].base; +@@ -655,7 +655,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + } + + next_range.input_base = idmap->input_base + idmap->id_count; +- if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) { ++ if (vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL) { + nb_nodes++; /* RMR node per SMMU */ + } + } +@@ -775,7 +775,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET, 0); + } + +- if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) { ++ if (vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL) { + build_iort_rmr_nodes(table_data, smmu_idmaps, smmu_offset, &id); + } + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a55f297af2..57d00acd48 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -166,7 +166,7 @@ static const MemMapEntry base_memmap[] = { + /* In the virtCCA scenario, this space is used for MSI interrupt mapping */ + [VIRT_CVM_MSI] = { 0x0a001000, 0x00fff000 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, +- [VIRT_SMMU_NESTED] = { 0x0b010000, 0x00ff0000}, ++ [VIRT_SMMU_ACCEL] = { 0x0b010000, 0x00ff0000}, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ + [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, + [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 }, +@@ -212,7 +212,7 @@ static const int a15irqmap[] = { + [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */ + [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */ + [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */ +- [VIRT_SMMU_NESTED] = 200, ++ [VIRT_SMMU_ACCEL] = 200, + }; + + static const char *valid_cpus[] = { +@@ -3619,27 +3619,27 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + + /* For smmuv3-nested devices we need to set the mem & irq */ + if (device_is_dynamic_sysbus(mc, dev) && +- object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) { +- hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base; +- int irq = vms->irqmap[VIRT_SMMU_NESTED]; ++ object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_ACCEL)) { ++ hwaddr base = vms->memmap[VIRT_SMMU_ACCEL].base; ++ int irq = vms->irqmap[VIRT_SMMU_ACCEL]; + +- if (vms->smmu_nested_count >= MAX_SMMU_NESTED) { ++ if (vms->smmu_accel_count >= MAX_SMMU_ACCEL) { + error_setg(errp, "smmuv3-nested max count reached!"); + return; + } + +- base += (vms->smmu_nested_count * SMMU_IO_LEN); +- irq += (vms->smmu_nested_count * NUM_SMMU_IRQS); ++ base += (vms->smmu_accel_count * SMMU_IO_LEN); ++ irq += (vms->smmu_accel_count * NUM_SMMU_IRQS); + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); + for (int i = 0; i < 4; i++) { + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, + qdev_get_gpio_in(vms->gic, irq + i)); + } +- if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) { +- vms->iommu = VIRT_IOMMU_SMMUV3_NESTED; ++ if (vms->iommu != VIRT_IOMMU_SMMUV3_ACCEL) { ++ vms->iommu = VIRT_IOMMU_SMMUV3_ACCEL; + } +- vms->smmu_nested_count++; ++ vms->smmu_accel_count++; + } + + if (vms->platform_bus_dev) { +@@ -3815,7 +3815,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM); +- machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_NESTED); ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_ACCEL); + #ifdef CONFIG_TPM + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); + #endif +diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c +index 0f0d0b3e58..58f4dc614c 100644 +--- a/hw/core/sysbus-fdt.c ++++ b/hw/core/sysbus-fdt.c +@@ -489,7 +489,7 @@ static const BindingEntry bindings[] = { + #ifdef CONFIG_LINUX + TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node), + TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node), +- TYPE_BINDING("arm-smmuv3-nested", no_fdt_node), ++ TYPE_BINDING("arm-smmuv3-accel", no_fdt_node), + VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node), + #endif + #ifdef CONFIG_TPM +diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h +index 96513fce56..79b6fcd8e7 100644 +--- a/include/hw/arm/smmuv3.h ++++ b/include/hw/arm/smmuv3.h +@@ -84,16 +84,16 @@ struct SMMUv3Class { + #define TYPE_ARM_SMMUV3 "arm-smmuv3" + OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3) + +-#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested" +-OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED) ++#define TYPE_ARM_SMMUV3_ACCEL "arm-smmuv3-accel" ++OBJECT_DECLARE_TYPE(SMMUv3AccelState, SMMUv3AccelClass, ARM_SMMUV3_ACCEL) + +-struct SMMUv3NestedState { ++struct SMMUv3AccelState { + SMMUv3State smmuv3_state; + + char *pci_bus; + }; + +-struct SMMUv3NestedClass { ++struct SMMUv3AccelClass { + /*< private >*/ + SMMUv3Class smmuv3_class; + /*< public >*/ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index bc3c8b70da..3e2759d225 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -110,7 +110,7 @@ typedef enum { + #define SMMU_IO_LEN 0x20000 + + /* Max supported nested SMMUv3 */ +-#define MAX_SMMU_NESTED 64 ++#define MAX_SMMU_ACCEL 64 + + enum { + VIRT_FLASH, +@@ -124,7 +124,7 @@ enum { + VIRT_GIC_ITS, + VIRT_GIC_REDIST, + VIRT_SMMU, +- VIRT_SMMU_NESTED, ++ VIRT_SMMU_ACCEL, + VIRT_UART, + VIRT_CPUFREQ, + VIRT_MMIO, +@@ -159,7 +159,7 @@ enum { + typedef enum VirtIOMMUType { + VIRT_IOMMU_NONE, + VIRT_IOMMU_SMMUV3, +- VIRT_IOMMU_SMMUV3_NESTED, ++ VIRT_IOMMU_SMMUV3_ACCEL, + VIRT_IOMMU_VIRTIO, + } VirtIOMMUType; + +@@ -227,7 +227,7 @@ struct VirtMachineState { + bool mte; + bool dtb_randomness; + bool pmu; +- int smmu_nested_count; ++ int smmu_accel_count; + OnOffAuto acpi; + VirtGICType gic_version; + VirtIOMMUType iommu; +@@ -298,7 +298,7 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + static inline bool virt_has_smmuv3(const VirtMachineState *vms) + { + return vms->iommu == VIRT_IOMMU_SMMUV3 || +- vms->iommu == VIRT_IOMMU_SMMUV3_NESTED; ++ vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL; + } + + #endif /* QEMU_ARM_VIRT_H */ +-- +2.41.0.windows.1 + diff --git a/smmuv3-Use-default-bus-for-arm-smmuv3-accel.patch b/smmuv3-Use-default-bus-for-arm-smmuv3-accel.patch new file mode 100644 index 0000000000000000000000000000000000000000..46c7c77ee9e00a9c54fbcd840cfac5dac37162a7 --- /dev/null +++ b/smmuv3-Use-default-bus-for-arm-smmuv3-accel.patch @@ -0,0 +1,55 @@ +From 5e83bdd94533c91d69c7154d967f3bdd2fa86054 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Thu, 16 Jan 2025 15:29:49 +0000 +Subject: [PATCH] smmuv3: Use default bus for arm-smmuv3-accel + +This is based on feedback on RFC v1. + +Signed-off-by: Shameer Kolothum +--- + hw/arm/smmuv3.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index ecdad6bda4..c0fcdd7574 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -2256,11 +2256,10 @@ static void smmu_realize(DeviceState *d, Error **errp) + static int smmuv3_accel_pci_host_bridge(Object *obj, void *opaque) + { + DeviceState *d = opaque; +- SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(d); + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; +- if (s_accel->pci_bus && !strcmp(bus->qbus.name, s_accel->pci_bus)) { ++ if (d->parent_bus && !strcmp(bus->qbus.name, d->parent_bus->name)) { + object_property_set_link(OBJECT(d), "primary-bus", OBJECT(bus), + &error_abort); + } +@@ -2365,11 +2364,6 @@ static Property smmuv3_properties[] = { + DEFINE_PROP_END_OF_LIST() + }; + +-static Property smmuv3_accel_properties[] = { +- DEFINE_PROP_STRING("pci-bus", SMMUv3AccelState, pci_bus), +- DEFINE_PROP_END_OF_LIST() +-}; +- + static void smmuv3_instance_init(Object *obj) + { + /* Nothing much to do here as of now */ +@@ -2397,9 +2391,9 @@ static void smmuv3_accel_class_init(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_smmuv3; + device_class_set_parent_realize(dc, smmu_accel_realize, + &c->parent_realize); +- device_class_set_props(dc, smmuv3_accel_properties); + dc->user_creatable = true; + dc->hotpluggable = false; ++ dc->bus_type = TYPE_PCIE_BUS; + } + + static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, +-- +2.41.0.windows.1 + diff --git a/smmuv3-realize-get_pasid_cap-and-set-ssidsize-with-p.patch b/smmuv3-realize-get_pasid_cap-and-set-ssidsize-with-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..cbfa611012cfa74daaf8b9e584aeb62c9a97091a --- /dev/null +++ b/smmuv3-realize-get_pasid_cap-and-set-ssidsize-with-p.patch @@ -0,0 +1,52 @@ +From d4d0d15716a3f4c89ca9532e6b598b14db76ae0c Mon Sep 17 00:00:00 2001 +From: Zhangfei Gao +Date: Sat, 26 Oct 2024 08:40:11 +0000 +Subject: [PATCH] smmuv3: realize get_pasid_cap and set ssidsize with pasid + +Signed-off-by: Zhangfei Gao +--- + hw/arm/smmu-common.c | 9 +++++++++ + hw/arm/smmuv3.c | 3 +-- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index e7028bd4ec..3a257a5b0e 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -831,10 +831,19 @@ static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) + } + } + ++static bool smmu_dev_get_pasid_cap(PCIBus *bus, ++ void *opaque, int devfn) ++{ ++ assert(0 <= devfn && devfn < PCI_DEVFN_MAX); ++ ++ return true; ++} ++ + static const PCIIOMMUOps smmu_ops = { + .get_address_space = smmu_find_add_as, + .set_iommu_device = smmu_dev_set_iommu_device, + .unset_iommu_device = smmu_dev_unset_iommu_device, ++ .get_pasid_cap = smmu_dev_get_pasid_cap, + }; + + SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid) +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 0ca0e96fcc..6964ab000d 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -312,8 +312,7 @@ out: + + val = FIELD_EX32(sdev->info.idr[1], IDR1, SIDSIZE); + s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SIDSIZE, val); +- val = FIELD_EX32(sdev->info.idr[1], IDR1, SSIDSIZE); +- s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, val); ++ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, pasid); + + val = FIELD_EX32(sdev->info.idr[3], IDR3, HAD); + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, val); +-- +2.41.0.windows.1 + diff --git a/softmmu-Support-concurrent-bounce-buffers-CVE-2024-8.patch b/softmmu-Support-concurrent-bounce-buffers-CVE-2024-8.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6ac32dd09e4db7bc4e63953c9b4f9c546917572 --- /dev/null +++ b/softmmu-Support-concurrent-bounce-buffers-CVE-2024-8.patch @@ -0,0 +1,289 @@ +From 17ba0dab19bd20d6388ce26e71b02c211e1d4690 Mon Sep 17 00:00:00 2001 +From: Mattias Nissler +Date: Mon, 19 Aug 2024 06:54:54 -0700 +Subject: [PATCH 3/4] softmmu: Support concurrent bounce buffers(CVE-2024-8612) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 637b0aa139565cb82a7b9269e62214f87082635c + +When DMA memory can't be directly accessed, as is the case when +running the device model in a separate process without shareable DMA +file descriptors, bounce buffering is used. + +It is not uncommon for device models to request mapping of several DMA +regions at the same time. Examples include: + * net devices, e.g. when transmitting a packet that is split across + several TX descriptors (observed with igb) + * USB host controllers, when handling a packet with multiple data TRBs + (observed with xhci) + +Previously, qemu only provided a single bounce buffer per AddressSpace +and would fail DMA map requests while the buffer was already in use. In +turn, this would cause DMA failures that ultimately manifest as hardware +errors from the guest perspective. + +This change allocates DMA bounce buffers dynamically instead of +supporting only a single buffer. Thus, multiple DMA mappings work +correctly also when RAM can't be mmap()-ed. + +The total bounce buffer allocation size is limited individually for each +AddressSpace. The default limit is 4096 bytes, matching the previous +maximum buffer size. A new x-max-bounce-buffer-size parameter is +provided to configure the limit for PCI devices. + +Signed-off-by: Mattias Nissler +Reviewed-by: Philippe Mathieu-Daudé +Acked-by: Peter Xu +Link: https://lore.kernel.org/r/20240819135455.2957406-1-mnissler@rivosinc.com +Signed-off-by: Peter Xu +--- + hw/pci/pci.c | 8 ++++ + include/exec/memory.h | 14 +++---- + include/hw/pci/pci_device.h | 3 ++ + system/memory.c | 5 ++- + system/physmem.c | 82 ++++++++++++++++++++++++++----------- + 5 files changed, 76 insertions(+), 36 deletions(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index 9da41088df..7467a2a9de 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -85,6 +85,8 @@ static Property pci_props[] = { + QEMU_PCIE_ERR_UNC_MASK_BITNR, true), + DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, + QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), ++ DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice, ++ max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE), + DEFINE_PROP_END_OF_LIST() + }; + +@@ -1201,6 +1203,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, + "bus master container", UINT64_MAX); + address_space_init(&pci_dev->bus_master_as, + &pci_dev->bus_master_container_region, pci_dev->name); ++ pci_dev->bus_master_as.max_bounce_buffer_size = ++ pci_dev->max_bounce_buffer_size; + + if (phase_check(PHASE_MACHINE_READY)) { + pci_init_bus_master(pci_dev); +@@ -2658,6 +2662,10 @@ static void pci_device_class_init(ObjectClass *klass, void *data) + k->unrealize = pci_qdev_unrealize; + k->bus_type = TYPE_PCI_BUS; + device_class_set_props(k, pci_props); ++ object_class_property_set_description( ++ klass, "x-max-bounce-buffer-size", ++ "Maximum buffer size allocated for bounce buffers used for mapped " ++ "access to indirect DMA memory"); + } + + static void pci_device_class_base_init(ObjectClass *klass, void *data) +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 40dcf70530..73d274d8f3 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1111,13 +1111,7 @@ typedef struct AddressSpaceMapClient { + QLIST_ENTRY(AddressSpaceMapClient) link; + } AddressSpaceMapClient; + +-typedef struct { +- MemoryRegion *mr; +- void *buffer; +- hwaddr addr; +- hwaddr len; +- bool in_use; +-} BounceBuffer; ++#define DEFAULT_MAX_BOUNCE_BUFFER_SIZE (4096) + + /** + * struct AddressSpace: describes a mapping of addresses to #MemoryRegion objects +@@ -1138,8 +1132,10 @@ struct AddressSpace { + QTAILQ_HEAD(, MemoryListener) listeners; + QTAILQ_ENTRY(AddressSpace) address_spaces_link; + +- /* Bounce buffer to use for this address space. */ +- BounceBuffer bounce; ++ /* Maximum DMA bounce buffer size used for indirect memory map requests */ ++ size_t max_bounce_buffer_size; ++ /* Total size of bounce buffers currently allocated, atomically accessed */ ++ size_t bounce_buffer_size; + /* List of callbacks to invoke when buffers free up */ + QemuMutex map_client_list_lock; + QLIST_HEAD(, AddressSpaceMapClient) map_client_list; +diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h +index d3dd0f64b2..253b48a688 100644 +--- a/include/hw/pci/pci_device.h ++++ b/include/hw/pci/pci_device.h +@@ -160,6 +160,9 @@ struct PCIDevice { + /* ID of standby device in net_failover pair */ + char *failover_pair_id; + uint32_t acpi_index; ++ ++ /* Maximum DMA bounce buffer size used for indirect memory map requests */ ++ uint32_t max_bounce_buffer_size; + }; + + static inline int pci_intx(PCIDevice *pci_dev) +diff --git a/system/memory.c b/system/memory.c +index 026e47dcb8..1ae03074f3 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3117,7 +3117,8 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) + as->ioeventfds = NULL; + QTAILQ_INIT(&as->listeners); + QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link); +- as->bounce.in_use = false; ++ as->max_bounce_buffer_size = DEFAULT_MAX_BOUNCE_BUFFER_SIZE; ++ as->bounce_buffer_size = 0; + qemu_mutex_init(&as->map_client_list_lock); + QLIST_INIT(&as->map_client_list); + as->name = g_strdup(name ? name : "anonymous"); +@@ -3127,7 +3128,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) + + static void do_address_space_destroy(AddressSpace *as) + { +- assert(!qatomic_read(&as->bounce.in_use)); ++ assert(qatomic_read(&as->bounce_buffer_size) == 0); + assert(QLIST_EMPTY(&as->map_client_list)); + qemu_mutex_destroy(&as->map_client_list_lock); + +diff --git a/system/physmem.c b/system/physmem.c +index 4491a7dbd1..2c8b83f811 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -3021,6 +3021,20 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) + NULL, len, FLUSH_CACHE); + } + ++/* ++ * A magic value stored in the first 8 bytes of the bounce buffer struct. Used ++ * to detect illegal pointers passed to address_space_unmap. ++ */ ++#define BOUNCE_BUFFER_MAGIC 0xb4017ceb4ffe12ed ++ ++typedef struct { ++ uint64_t magic; ++ MemoryRegion *mr; ++ hwaddr addr; ++ size_t len; ++ uint8_t buffer[]; ++} BounceBuffer; ++ + static void + address_space_unregister_map_client_do(AddressSpaceMapClient *client) + { +@@ -3046,9 +3060,9 @@ void address_space_register_map_client(AddressSpace *as, QEMUBH *bh) + qemu_mutex_lock(&as->map_client_list_lock); + client->bh = bh; + QLIST_INSERT_HEAD(&as->map_client_list, client, link); +- /* Write map_client_list before reading in_use. */ ++ /* Write map_client_list before reading bounce_buffer_size. */ + smp_mb(); +- if (!qatomic_read(&as->bounce.in_use)) { ++ if (qatomic_read(&as->bounce_buffer_size) < as->max_bounce_buffer_size) { + address_space_notify_map_clients_locked(as); + } + qemu_mutex_unlock(&as->map_client_list_lock); +@@ -3178,28 +3192,40 @@ void *address_space_map(AddressSpace *as, + mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); + + if (!memory_access_is_direct(mr, is_write)) { +- if (qatomic_xchg(&as->bounce.in_use, true)) { ++ size_t used = qatomic_read(&as->bounce_buffer_size); ++ for (;;) { ++ hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l); ++ size_t new_size = used + alloc; ++ size_t actual = ++ qatomic_cmpxchg(&as->bounce_buffer_size, used, new_size); ++ if (actual == used) { ++ l = alloc; ++ break; ++ } ++ used = actual; ++ } ++ ++ if (l == 0) { + *plen = 0; + return NULL; + } +- /* Avoid unbounded allocations */ +- l = MIN(l, TARGET_PAGE_SIZE); +- as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); +- as->bounce.addr = addr; +- as->bounce.len = l; + ++ BounceBuffer *bounce = g_malloc0(l + sizeof(BounceBuffer)); ++ bounce->magic = BOUNCE_BUFFER_MAGIC; + memory_region_ref(mr); +- as->bounce.mr = mr; ++ bounce->mr = mr; ++ bounce->addr = addr; ++ bounce->len = l; ++ + if (!is_write) { + flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, +- as->bounce.buffer, l); ++ bounce->buffer, l); + } + + *plen = l; +- return as->bounce.buffer; ++ return bounce->buffer; + } + +- + memory_region_ref(mr); + *plen = flatview_extend_translation(fv, addr, len, mr, xlat, + l, is_write, attrs); +@@ -3214,12 +3240,11 @@ void *address_space_map(AddressSpace *as, + void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + bool is_write, hwaddr access_len) + { +- if (buffer != as->bounce.buffer) { +- MemoryRegion *mr; +- ram_addr_t addr1; ++ MemoryRegion *mr; ++ ram_addr_t addr1; + +- mr = memory_region_from_host(buffer, &addr1); +- assert(mr != NULL); ++ mr = memory_region_from_host(buffer, &addr1); ++ if (mr != NULL) { + if (is_write) { + invalidate_and_set_dirty(mr, addr1, access_len); + } +@@ -3229,15 +3254,22 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + memory_region_unref(mr); + return; + } ++ ++ ++ BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer); ++ assert(bounce->magic == BOUNCE_BUFFER_MAGIC); ++ + if (is_write) { +- address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED, +- as->bounce.buffer, access_len); +- } +- qemu_vfree(as->bounce.buffer); +- as->bounce.buffer = NULL; +- memory_region_unref(as->bounce.mr); +- /* Clear in_use before reading map_client_list. */ +- qatomic_set_mb(&as->bounce.in_use, false); ++ address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED, ++ bounce->buffer, access_len); ++ } ++ ++ qatomic_sub(&as->bounce_buffer_size, bounce->len); ++ bounce->magic = ~BOUNCE_BUFFER_MAGIC; ++ memory_region_unref(bounce->mr); ++ g_free(bounce); ++ /* Write bounce_buffer_size before reading map_client_list. */ ++ smp_mb(); + address_space_notify_map_clients(as); + } + +-- +2.45.1.windows.1 + diff --git a/softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch b/softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch new file mode 100644 index 0000000000000000000000000000000000000000..73b85f6865c1c17cbd16773c8afbec08273ab18f --- /dev/null +++ b/softmmu-physmem-fix-memory-leak-in-dirty_memory_exte.patch @@ -0,0 +1,139 @@ +From c6d6cbb2c33c3c7b2574c3baa2d2477d9d4ac91c Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 28 Aug 2024 11:07:43 +0200 +Subject: [PATCH] softmmu/physmem: fix memory leak in dirty_memory_extend() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +As reported by Peter, we might be leaking memory when removing the +highest RAMBlock (in the weird ram_addr_t space), and adding a new one. + +We will fail to realize that we already allocated bitmaps for more +dirty memory blocks, and effectively discard the pointers to them. + +Fix it by getting rid of last_ram_page() and by remembering the number +of dirty memory blocks that have been allocated already. + +While at it, let's use "unsigned int" for the number of blocks, which +should be sufficient until we reach ~32 exabytes. + +Looks like this leak was introduced as we switched from using a single +bitmap_zero_extend() to allocating multiple bitmaps: +bitmap_zero_extend() relies on g_renew() which should have taken care of +this. + +Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com +Reported-by: Peter Maydell +Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Peter Xu +Tested-by: Peter Maydell +Cc: qemu-stable@nongnu.org +Cc: Stefan Hajnoczi +Cc: Paolo Bonzini +Cc: Peter Xu +Cc: Philippe Mathieu-Daudé +Signed-off-by: David Hildenbrand +Link: https://lore.kernel.org/r/20240828090743.128647-1-david@redhat.com +Signed-off-by: Peter Xu +(cherry picked from commit b84f06c2bee727b3870b4eeccbe3a45c5aea14c1) +Signed-off-by: Michael Tokarev +Signed-off-by: zhujun2 +--- + include/exec/ramlist.h | 1 + + system/physmem.c | 35 +++++++++-------------------------- + 2 files changed, 10 insertions(+), 26 deletions(-) + +diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h +index 2ad2a81acc..d9cfe530be 100644 +--- a/include/exec/ramlist.h ++++ b/include/exec/ramlist.h +@@ -50,6 +50,7 @@ typedef struct RAMList { + /* RCU-enabled, writes protected by the ramlist lock. */ + QLIST_HEAD(, RAMBlock) blocks; + DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM]; ++ unsigned int num_dirty_blocks; + uint32_t version; + QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; + } RAMList; +diff --git a/system/physmem.c b/system/physmem.c +index 2c8b83f811..87f49e70c1 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1531,18 +1531,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size) + return offset; + } + +-static unsigned long last_ram_page(void) +-{ +- RAMBlock *block; +- ram_addr_t last = 0; +- +- RCU_READ_LOCK_GUARD(); +- RAMBLOCK_FOREACH(block) { +- last = MAX(last, block->offset + block->max_length); +- } +- return last >> TARGET_PAGE_BITS; +-} +- + static void qemu_ram_setup_dump(void *addr, ram_addr_t size) + { + int ret; +@@ -1795,13 +1783,11 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) + } + + /* Called with ram_list.mutex held */ +-static void dirty_memory_extend(ram_addr_t old_ram_size, +- ram_addr_t new_ram_size) ++static void dirty_memory_extend(ram_addr_t new_ram_size) + { +- ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, +- DIRTY_MEMORY_BLOCK_SIZE); +- ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, +- DIRTY_MEMORY_BLOCK_SIZE); ++ unsigned int old_num_blocks = ram_list.num_dirty_blocks; ++ unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size, ++ DIRTY_MEMORY_BLOCK_SIZE); + int i; + + /* Only need to extend if block count increased */ +@@ -1833,6 +1819,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size, + g_free_rcu(old_blocks, rcu); + } + } ++ ++ ram_list.num_dirty_blocks = new_num_blocks; + } + + static void ram_block_add(RAMBlock *new_block, Error **errp) +@@ -1841,11 +1829,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + const bool shared = qemu_ram_is_shared(new_block); + RAMBlock *block; + RAMBlock *last_block = NULL; +- ram_addr_t old_ram_size, new_ram_size; ++ ram_addr_t ram_size; + Error *err = NULL; + +- old_ram_size = last_ram_page(); +- + qemu_mutex_lock_ramlist(); + new_block->offset = find_ram_offset(new_block->max_length); + +@@ -1873,11 +1859,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + } + } + +- new_ram_size = MAX(old_ram_size, +- (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); +- if (new_ram_size > old_ram_size) { +- dirty_memory_extend(old_ram_size, new_ram_size); +- } ++ ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS; ++ dirty_memory_extend(ram_size); + /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, + * QLIST (which has an RCU-friendly variant) does not have insertion at + * tail, so save the last element in last_block. +-- +2.41.0.windows.1 + diff --git a/spapr-Implement-get_dt_compatible-callback.patch b/spapr-Implement-get_dt_compatible-callback.patch deleted file mode 100644 index e64a8746f498a68085824f6cace1bb2e958ce7c7..0000000000000000000000000000000000000000 --- a/spapr-Implement-get_dt_compatible-callback.patch +++ /dev/null @@ -1,68 +0,0 @@ -From c520d8e823431be94268daa2a911e224cab81521 Mon Sep 17 00:00:00 2001 -From: Stefan Berger -Date: Tue, 21 Jan 2020 10:29:31 -0500 -Subject: [PATCH 04/19] spapr: Implement get_dt_compatible() callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -For devices that cannot be statically initialized, implement a -get_dt_compatible() callback that allows us to ask the device for -the 'compatible' value. - -Signed-off-by: Stefan Berger -Reviewed-by: Marc-André Lureau -Reviewed-by: David Gibson -Message-Id: <20200121152935.649898-3-stefanb@linux.ibm.com> -Signed-off-by: David Gibson -Signed-off-by: jiangfangjie ---- - hw/ppc/spapr_vio.c | 11 +++++++++-- - include/hw/ppc/spapr_vio.h | 1 + - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c -index 583c13de..4e50916f 100644 ---- a/hw/ppc/spapr_vio.c -+++ b/hw/ppc/spapr_vio.c -@@ -89,6 +89,7 @@ static int vio_make_devnode(SpaprVioDevice *dev, - SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); - int vdevice_off, node_off, ret; - char *dt_name; -+ const char *dt_compatible; - - vdevice_off = fdt_path_offset(fdt, "/vdevice"); - if (vdevice_off < 0) { -@@ -115,9 +116,15 @@ static int vio_make_devnode(SpaprVioDevice *dev, - } - } - -- if (pc->dt_compatible) { -+ if (pc->get_dt_compatible) { -+ dt_compatible = pc->get_dt_compatible(dev); -+ } else { -+ dt_compatible = pc->dt_compatible; -+ } -+ -+ if (dt_compatible) { - ret = fdt_setprop_string(fdt, node_off, "compatible", -- pc->dt_compatible); -+ dt_compatible); - if (ret < 0) { - return ret; - } -diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h -index 04609f21..97951fc6 100644 ---- a/include/hw/ppc/spapr_vio.h -+++ b/include/hw/ppc/spapr_vio.h -@@ -56,6 +56,7 @@ typedef struct SpaprVioDeviceClass { - void (*realize)(SpaprVioDevice *dev, Error **errp); - void (*reset)(SpaprVioDevice *dev); - int (*devnode)(SpaprVioDevice *dev, void *fdt, int node_off); -+ const char *(*get_dt_compatible)(SpaprVioDevice *dev); - } SpaprVioDeviceClass; - - struct SpaprVioDevice { --- -2.23.0 - diff --git a/spapr_pci-add-spapr-msi-read-method.patch b/spapr_pci-add-spapr-msi-read-method.patch deleted file mode 100644 index 2cc4994f09171252daf5a435832480151c458f4b..0000000000000000000000000000000000000000 --- a/spapr_pci-add-spapr-msi-read-method.patch +++ /dev/null @@ -1,61 +0,0 @@ -From cbbcd56e090a59d0eaa4e35ed0efb24d6dd1003e Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:23:24 +0800 -Subject: [PATCH] spapr_pci: add spapr msi read method - -fix CVE-2020-15469 - -Add spapr msi mmio read method to avoid NULL pointer dereference -issue. - -Reported-by: Lei Sun -Acked-by: David Gibson -Reviewed-by: Li Qiang -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/ppc/spapr_pci.c | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c -index 9003fe9010..1571e049ab 100644 ---- a/hw/ppc/spapr_pci.c -+++ b/hw/ppc/spapr_pci.c -@@ -50,6 +50,7 @@ - #include "sysemu/kvm.h" - #include "sysemu/hostmem.h" - #include "sysemu/numa.h" -+#include "qemu/log.h" - - /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ - #define RTAS_QUERY_FN 0 -@@ -743,6 +744,12 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) - return route; - } - -+static uint64_t spapr_msi_read(void *opaque, hwaddr addr, unsigned size) -+{ -+ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); -+ return 0; -+} -+ - /* - * MSI/MSIX memory region implementation. - * The handler handles both MSI and MSIX. -@@ -760,8 +767,10 @@ static void spapr_msi_write(void *opaque, hwaddr addr, - } - - static const MemoryRegionOps spapr_msi_ops = { -- /* There is no .read as the read result is undefined by PCI spec */ -- .read = NULL, -+ /* .read result is undefined by PCI spec -+ * define .read method to avoid assert failure in memory_region_init_io -+ */ -+ .read = spapr_msi_read, - .write = spapr_msi_write, - .endianness = DEVICE_LITTLE_ENDIAN - }; --- -2.27.0 - diff --git a/sphinx-qapidoc-Fix-to-generate-doc-for-explicit-unbo.patch b/sphinx-qapidoc-Fix-to-generate-doc-for-explicit-unbo.patch new file mode 100644 index 0000000000000000000000000000000000000000..9388ea10dd71baebbe8a516b9234034a202c6c7f --- /dev/null +++ b/sphinx-qapidoc-Fix-to-generate-doc-for-explicit-unbo.patch @@ -0,0 +1,156 @@ +From c7fe47e4aab35c1817c4c53f0025a741a9e2ad57 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Fri, 28 Jun 2024 13:27:56 +0200 +Subject: [PATCH] sphinx/qapidoc: Fix to generate doc for explicit, unboxed + arguments + +When a command's arguments are specified as an explicit type T, +generated documentation points to the members of T. + +Example: + + ## + # @announce-self: + # + # Trigger generation of broadcast RARP frames to update network + [...] + ## + { 'command': 'announce-self', 'boxed': true, + 'data' : 'AnnounceParameters'} + +generates + + "announce-self" (Command) + ------------------------- + + Trigger generation of broadcast RARP frames to update network + [...] + + Arguments + ~~~~~~~~~ + + The members of "AnnounceParameters" + +Except when the command takes its arguments unboxed , i.e. it doesn't +have 'boxed': true, we generate *nothing*. A few commands have a +reference in their doc comment to compensate, but most don't. + +Example: + + ## + # @blockdev-snapshot-sync: + # + # Takes a synchronous snapshot of a block device. + # + # For the arguments, see the documentation of BlockdevSnapshotSync. + [...] + ## + { 'command': 'blockdev-snapshot-sync', + 'data': 'BlockdevSnapshotSync', + 'allow-preconfig': true } + +generates + + "blockdev-snapshot-sync" (Command) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Takes a synchronous snapshot of a block device. + + For the arguments, see the documentation of BlockdevSnapshotSync. + [...] + +Same for event data. + +Fix qapidoc.py to generate the reference regardless of boxing. Delete +now redundant references in the doc comments. + +Fixes: 4078ee5469e5 (docs/sphinx: Add new qapi-doc Sphinx extension) +Cc: qemu-stable@nongnu.org +Signed-off-by: Markus Armbruster +Message-ID: <20240628112756.794237-1-armbru@redhat.com> +Reviewed-by: John Snow +(cherry picked from commit e389929d19a543ea5b34d02553b355f9f1c03162) +Signed-off-by: zhujun2 +--- + docs/sphinx/qapidoc.py | 12 +++++------- + qapi/block-core.json | 7 ------- + 2 files changed, 5 insertions(+), 14 deletions(-) + +diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py +index 658c288f8f..3d19853444 100644 +--- a/docs/sphinx/qapidoc.py ++++ b/docs/sphinx/qapidoc.py +@@ -229,15 +229,15 @@ def _nodes_for_enum_values(self, doc): + section += dlnode + return [section] + +- def _nodes_for_arguments(self, doc, boxed_arg_type): ++ def _nodes_for_arguments(self, doc, arg_type): + """Return list of doctree nodes for the arguments section""" +- if boxed_arg_type: ++ if arg_type and not arg_type.is_implicit(): + assert not doc.args + section = self._make_section('Arguments') + dlnode = nodes.definition_list() + dlnode += self._make_dlitem( + [nodes.Text('The members of '), +- nodes.literal('', boxed_arg_type.name)], ++ nodes.literal('', arg_type.name)], + None) + section += dlnode + return [section] +@@ -341,8 +341,7 @@ def visit_command(self, name, info, ifcond, features, arg_type, + allow_preconfig, coroutine): + doc = self._cur_doc + self._add_doc('Command', +- self._nodes_for_arguments(doc, +- arg_type if boxed else None) ++ self._nodes_for_arguments(doc, arg_type) + + self._nodes_for_features(doc) + + self._nodes_for_sections(doc) + + self._nodes_for_if_section(ifcond)) +@@ -350,8 +349,7 @@ def visit_command(self, name, info, ifcond, features, arg_type, + def visit_event(self, name, info, ifcond, features, arg_type, boxed): + doc = self._cur_doc + self._add_doc('Event', +- self._nodes_for_arguments(doc, +- arg_type if boxed else None) ++ self._nodes_for_arguments(doc, arg_type) + + self._nodes_for_features(doc) + + self._nodes_for_sections(doc) + + self._nodes_for_if_section(ifcond)) +diff --git a/qapi/block-core.json b/qapi/block-core.json +index ded6f0f6d2..0fa184698a 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1662,8 +1662,6 @@ + # + # Takes a synchronous snapshot of a block device. + # +-# For the arguments, see the documentation of BlockdevSnapshotSync. +-# + # Returns: + # - nothing on success + # - If @device is not a valid block device, DeviceNotFound +@@ -1693,8 +1691,6 @@ + # device, the block device changes to using 'overlay' as its new + # active image. + # +-# For the arguments, see the documentation of BlockdevSnapshot. +-# + # Features: + # + # @allow-write-only-overlay: If present, the check whether this +@@ -6037,9 +6033,6 @@ + # string, or a snapshot with name already exists, the operation will + # fail. + # +-# For the arguments, see the documentation of +-# BlockdevSnapshotInternal. +-# + # Returns: + # - nothing on success + # - If @device is not a valid block device, GenericError +-- +2.41.0.windows.1 + diff --git a/ssi-Fix-bad-printf-format-specifiers.patch b/ssi-Fix-bad-printf-format-specifiers.patch deleted file mode 100644 index 811a14da46a4e55ca324209309774973b563b70c..0000000000000000000000000000000000000000 --- a/ssi-Fix-bad-printf-format-specifiers.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 073457a45eaccd2beac3c94c53a449b8f683501e Mon Sep 17 00:00:00 2001 -From: AlexChen -Date: Wed, 4 Nov 2020 18:22:45 +0800 -Subject: [PATCH] ssi: Fix bad printf format specifiers - -We should use printf format specifier "%u" instead of "%d" for -argument of type "unsigned int". - -Reported-by: Euler Robot -Signed-off-by: Alex Chen -Reviewed-by: Alistair Francis -Message-id: 5FA280F5.8060902@huawei.com -Signed-off-by: Peter Maydell -(cherry-picked from commit 9df0a97298) ---- - hw/ssi/imx_spi.c | 2 +- - hw/ssi/xilinx_spi.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c -index 5cec9b5d05..0b3052bdf9 100644 ---- a/hw/ssi/imx_spi.c -+++ b/hw/ssi/imx_spi.c -@@ -52,7 +52,7 @@ static const char *imx_spi_reg_name(uint32_t reg) - case ECSPI_MSGDATA: - return "ECSPI_MSGDATA"; - default: -- sprintf(unknown, "%d ?", reg); -+ sprintf(unknown, "%u ?", reg); - return unknown; - } - } -diff --git a/hw/ssi/xilinx_spi.c b/hw/ssi/xilinx_spi.c -index 1379cb164b..d2b69d027a 100644 ---- a/hw/ssi/xilinx_spi.c -+++ b/hw/ssi/xilinx_spi.c -@@ -139,7 +139,7 @@ static void xlx_spi_update_irq(XilinxSPI *s) - irq chain unless things really changed. */ - if (pending != s->irqline) { - s->irqline = pending; -- DB_PRINT("irq_change of state %d ISR:%x IER:%X\n", -+ DB_PRINT("irq_change of state %u ISR:%x IER:%X\n", - pending, s->regs[R_IPISR], s->regs[R_IPIER]); - qemu_set_irq(s->irq, pending); - } --- -2.27.0 - diff --git a/stdvga-fix-screen-blanking.patch b/stdvga-fix-screen-blanking.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b6416ff957000574299e8bf709efdd0fedf6a20 --- /dev/null +++ b/stdvga-fix-screen-blanking.patch @@ -0,0 +1,47 @@ +From 540314912566c91341226d9eb6df5b782f277813 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Wed, 5 Jun 2024 15:14:41 +0200 +Subject: [PATCH] stdvga: fix screen blanking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In case the display surface uses a shared buffer (i.e. uses vga vram +directly instead of a shadow) go unshare the buffer before clearing it. + +This avoids vga memory corruption, which in turn fixes unblanking not +working properly with X11. + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2067 +Signed-off-by: Gerd Hoffmann +Reviewed-by: Marc-André Lureau +Message-ID: <20240605131444.797896-2-kraxel@redhat.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit b1cf266c82cb1211ee2785f1813a6a3f3e693390) +Signed-off-by: zhujun2 +--- + hw/display/vga.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/display/vga.c b/hw/display/vga.c +index 37557c3442..cb6b6ee2ca 100644 +--- a/hw/display/vga.c ++++ b/hw/display/vga.c +@@ -1748,6 +1748,13 @@ static void vga_draw_blank(VGACommonState *s, int full_update) + if (s->last_scr_width <= 0 || s->last_scr_height <= 0) + return; + ++ if (is_buffer_shared(surface)) { ++ /* unshare buffer, otherwise the blanking corrupts vga vram */ ++ surface = qemu_create_displaysurface(s->last_scr_width, ++ s->last_scr_height); ++ dpy_gfx_replace_surface(s->con, surface); ++ } ++ + w = s->last_scr_width * surface_bytes_per_pixel(surface); + d = surface_data(surface); + for(i = 0; i < s->last_scr_height; i++) { +-- +2.41.0.windows.1 + diff --git a/sync-header-file-from-upstream.patch b/sync-header-file-from-upstream.patch new file mode 100644 index 0000000000000000000000000000000000000000..54c4e81b3a59c0e66d23de8fe3f190612c8f13dc --- /dev/null +++ b/sync-header-file-from-upstream.patch @@ -0,0 +1,137 @@ +From 5b9ece5e96c40f56e7c84bf15d4a5a7d1205bc25 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 26 May 2025 16:58:25 +0800 +Subject: [PATCH] sync header file from upstream + +The local interrupt controller simulation header file is inconsistent +with the upstream header file. To ensure uapi compatibility, +the upstream interrupt controller simulation header file is now +synchronized. + +Signed-off-by: Xianglai Li +--- + hw/intc/loongarch_extioi_kvm.c | 2 +- + hw/intc/loongarch_ipi_kvm.c | 2 +- + hw/intc/loongarch_pch_pic_kvm.c | 2 +- + linux-headers/asm-loongarch/kvm.h | 15 ++++++--------- + linux-headers/linux/kvm.h | 13 +++++++------ + target/loongarch/kvm/kvm.c | 4 ---- + 6 files changed, 16 insertions(+), 22 deletions(-) + +diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c +index 2e7c764b7c..94af4378e4 100644 +--- a/hw/intc/loongarch_extioi_kvm.c ++++ b/hw/intc/loongarch_extioi_kvm.c +@@ -115,7 +115,7 @@ static void kvm_loongarch_extioi_realize(DeviceState *dev, Error **errp) + } + + if (!extioi_class->is_created) { +- cd.type = KVM_DEV_TYPE_LA_EXTIOI; ++ cd.type = KVM_DEV_TYPE_LOONGARCH_EIOINTC; + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); + if (ret < 0) { + error_setg_errno(errp, errno, +diff --git a/hw/intc/loongarch_ipi_kvm.c b/hw/intc/loongarch_ipi_kvm.c +index fd308eb0c0..57fc05db77 100644 +--- a/hw/intc/loongarch_ipi_kvm.c ++++ b/hw/intc/loongarch_ipi_kvm.c +@@ -128,7 +128,7 @@ static void kvm_loongarch_ipi_realize(DeviceState *dev, Error **errp) + } + + if (!ipi_class->is_created) { +- cd.type = KVM_DEV_TYPE_LA_IPI; ++ cd.type = KVM_DEV_TYPE_LOONGARCH_IPI; + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); + if (ret < 0) { + error_setg_errno(errp, errno, "Creating the KVM device failed"); +diff --git a/hw/intc/loongarch_pch_pic_kvm.c b/hw/intc/loongarch_pch_pic_kvm.c +index 8f66d9a01f..e9cef02f9a 100644 +--- a/hw/intc/loongarch_pch_pic_kvm.c ++++ b/hw/intc/loongarch_pch_pic_kvm.c +@@ -113,7 +113,7 @@ static void kvm_loongarch_pch_pic_realize(DeviceState *dev, Error **errp) + } + + if (!pch_pic_class->is_created) { +- cd.type = KVM_DEV_TYPE_LA_PCH_PIC; ++ cd.type = KVM_DEV_TYPE_LOONGARCH_PCHPIC; + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); + if (ret < 0) { + error_setg_errno(errp, errno, +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 34abd65939..d22b19e134 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -135,25 +135,22 @@ struct kvm_iocsr_entry { + #define KVM_IRQCHIP_NUM_PINS 64 + #define KVM_MAX_CORES 256 + +-#define KVM_LOONGARCH_VM_HAVE_IRQCHIP 0x40000001 ++#define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000001 + +-#define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000002 ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000002 + +-#define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 +- +-#define KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS 0x40000006 ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS 0x40000003 + #define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU 0x0 + #define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE 0x1 + #define KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE 0x2 + +-#define KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL 0x40000007 ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL 0x40000004 + #define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU 0x0 + #define KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE 0x1 + #define KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED 0x3 + +-#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000004 +-#define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 +- + #define KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS 0x40000005 ++#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000006 ++#define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 + + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 0714651440..413663e332 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1487,12 +1487,13 @@ enum kvm_device_type { + #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME + KVM_DEV_TYPE_RISCV_AIA, + #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA +- KVM_DEV_TYPE_LA_PCH_PIC = 0x100, +-#define KVM_DEV_TYPE_LA_PCH_PIC KVM_DEV_TYPE_LA_PCH_PIC +- KVM_DEV_TYPE_LA_IPI, +-#define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI +- KVM_DEV_TYPE_LA_EXTIOI, +-#define KVM_DEV_TYPE_LA_EXTIOI KVM_DEV_TYPE_LA_EXTIOI ++ KVM_DEV_TYPE_LOONGARCH_IPI, ++#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI ++ KVM_DEV_TYPE_LOONGARCH_EIOINTC, ++#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC ++ KVM_DEV_TYPE_LOONGARCH_PCHPIC, ++#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC ++ + KVM_DEV_TYPE_MAX, + }; + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index f6e008a517..f724e77a1b 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -973,10 +973,6 @@ int kvm_arch_get_default_type(MachineState *ms) + int kvm_arch_init(MachineState *ms, KVMState *s) + { + cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); +- if(!kvm_vm_check_attr(kvm_state, KVM_LOONGARCH_VM_HAVE_IRQCHIP, KVM_LOONGARCH_VM_HAVE_IRQCHIP)) { +- s->kernel_irqchip_allowed = false; +- } +- + return 0; + } + +-- +2.33.0 + diff --git a/sync-loongarch-linux-headers.patch b/sync-loongarch-linux-headers.patch new file mode 100644 index 0000000000000000000000000000000000000000..30aad853a0a78ef0dabc008b50641c0eaed084d5 --- /dev/null +++ b/sync-loongarch-linux-headers.patch @@ -0,0 +1,97 @@ +From 23cede66eaa62e8ec559cfa538a59e72375c9fa8 Mon Sep 17 00:00:00 2001 +From: gaosong +Date: Sun, 8 Sep 2024 03:28:16 +0800 +Subject: [PATCH 56/78] sync loongarch linux-headers + +Signed-off-by: gaosong +Signed-off-by: Xianglai Li +--- + linux-headers/asm-loongarch/kvm.h | 36 +++++++++++++++++++++++++++- + linux-headers/asm-loongarch/unistd.h | 1 + + 2 files changed, 36 insertions(+), 1 deletion(-) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 81fec85f0a..13c1280662 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -19,6 +19,7 @@ + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 ++#define __KVM_HAVE_IRQ_LINE + + #define KVM_GUESTDBG_USE_SW_BP 0x00010000 + /* +@@ -66,6 +67,7 @@ struct kvm_fpu { + #define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL) + #define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL) + #define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL) ++#define KVM_REG_LOONGARCH_LBT (KVM_REG_LOONGARCH | 0x50000ULL) + #define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL) + #define KVM_CSR_IDX_MASK 0x7fff + #define KVM_CPUCFG_IDX_MASK 0x7fff +@@ -79,13 +81,34 @@ struct kvm_fpu { + /* Debugging: Special instruction for software breakpoint */ + #define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) + ++/* LBT registers */ ++#define KVM_REG_LOONGARCH_LBT_SCR0 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 1) ++#define KVM_REG_LOONGARCH_LBT_SCR1 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 2) ++#define KVM_REG_LOONGARCH_LBT_SCR2 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 3) ++#define KVM_REG_LOONGARCH_LBT_SCR3 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 4) ++#define KVM_REG_LOONGARCH_LBT_EFLAGS (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 5) ++#define KVM_REG_LOONGARCH_LBT_FTOP (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 6) ++ + #define LOONGARCH_REG_SHIFT 3 + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) + #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) + #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) ++ ++/* Device Control API on vm fd */ ++#define KVM_LOONGARCH_VM_FEAT_CTRL 0 ++#define KVM_LOONGARCH_VM_FEAT_LSX 0 ++#define KVM_LOONGARCH_VM_FEAT_LASX 1 ++#define KVM_LOONGARCH_VM_FEAT_X86BT 2 ++#define KVM_LOONGARCH_VM_FEAT_ARMBT 3 ++#define KVM_LOONGARCH_VM_FEAT_MIPSBT 4 ++#define KVM_LOONGARCH_VM_FEAT_PMU 5 ++#define KVM_LOONGARCH_VM_FEAT_PV_IPI 6 ++#define KVM_LOONGARCH_VM_FEAT_PV_STEALTIME 7 ++ ++/* Device Control API on vcpu fd */ + #define KVM_LOONGARCH_VCPU_CPUCFG 0 + #define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 +-#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 ++#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 + + struct kvm_debug_exit_arch { + }; +@@ -112,4 +135,15 @@ struct kvm_iocsr_entry { + #define KVM_IRQCHIP_NUM_PINS 64 + #define KVM_MAX_CORES 256 + ++#define KVM_LOONGARCH_VM_HAVE_IRQCHIP 0x40000001 ++ ++#define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000002 ++ ++#define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 ++ ++#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000004 ++#define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 ++ ++#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS 0x40000005 ++ + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ +diff --git a/linux-headers/asm-loongarch/unistd.h b/linux-headers/asm-loongarch/unistd.h +index fcb668984f..b344b1f917 100644 +--- a/linux-headers/asm-loongarch/unistd.h ++++ b/linux-headers/asm-loongarch/unistd.h +@@ -1,4 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#define __ARCH_WANT_NEW_STAT + #define __ARCH_WANT_SYS_CLONE + #define __ARCH_WANT_SYS_CLONE3 + +-- +2.39.1 + diff --git a/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch b/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c1b707d2a181fcc357d04385781add6b274a8f2 --- /dev/null +++ b/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch @@ -0,0 +1,91 @@ +From 401e145800134d0310d613f48c4962a108b8ddda Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 17 Mar 2024 16:37:03 +0800 +Subject: [PATCH] system/cpus: Fix pause_all_vcpus() under concurrent + environment + +Both main loop thread and vCPU thread are allowed to call +pause_all_vcpus(), and in general resume_all_vcpus() is called +after it. Two issues live in pause_all_vcpus(): + +1. There is possibility that during thread T1 waits on +qemu_pause_cond with bql unlocked, other thread has called +pause_all_vcpus() and resume_all_vcpus(), then thread T1 will +stuck, because the condition all_vcpus_paused() is always false. + +2. After all_vcpus_paused() has been checked as true, we will +unlock bql to relock replay_mutex. During the bql was unlocked, +the vcpu's state may has been changed by other thread, so we +must retry. + +Signed-off-by: Keqian Zhu +--- + system/cpus.c | 29 ++++++++++++++++++++++++----- + 1 file changed, 24 insertions(+), 5 deletions(-) + +diff --git a/system/cpus.c b/system/cpus.c +index a444a747f0..7c5369fa9c 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -551,12 +551,14 @@ static bool all_vcpus_paused(void) + return true; + } + +-void pause_all_vcpus(void) ++static void request_pause_all_vcpus(void) + { + CPUState *cpu; + +- qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); + CPU_FOREACH(cpu) { ++ if (cpu->stopped) { ++ continue; ++ } + if (qemu_cpu_is_self(cpu)) { + qemu_cpu_stop(cpu, true); + } else { +@@ -564,6 +566,14 @@ void pause_all_vcpus(void) + qemu_cpu_kick(cpu); + } + } ++} ++ ++void pause_all_vcpus(void) ++{ ++ qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); ++ ++retry: ++ request_pause_all_vcpus(); + + /* We need to drop the replay_lock so any vCPU threads woken up + * can finish their replay tasks +@@ -572,14 +582,23 @@ void pause_all_vcpus(void) + + while (!all_vcpus_paused()) { + qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); +- CPU_FOREACH(cpu) { +- qemu_cpu_kick(cpu); +- } ++ /* During we waited on qemu_pause_cond the bql was unlocked, ++ * the vcpu's state may has been changed by other thread, so ++ * we must request the pause state on all vcpus again. ++ */ ++ request_pause_all_vcpus(); + } + + qemu_mutex_unlock_iothread(); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); ++ ++ /* During the bql was unlocked, the vcpu's state may has been ++ * changed by other thread, so we must retry. ++ */ ++ if (!all_vcpus_paused()) { ++ goto retry; ++ } + } + + void cpu_resume(CPUState *cpu) +-- +2.27.0 + diff --git a/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch b/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4008abba6fee9de7707a9cdbf1ad33373a58270 --- /dev/null +++ b/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch @@ -0,0 +1,43 @@ +From a29922f76c9b5064ddd2e686fa725b96c435e889 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 17 Mar 2024 16:37:04 +0800 +Subject: [PATCH] system/cpus: Fix resume_all_vcpus() under vCPU hotplug + condition + +For vCPU being hotplugged, qemu_init_vcpu() is called. In this +function, we set vcpu state as stopped, and then wait vcpu thread +to be created. + +As the vcpu state is stopped, it will inform us it has been created +and then wait on halt_cond. After we has realized vcpu object, we +will resume the vcpu thread. + +However, during we wait vcpu thread to be created, the bql is +unlocked, and other thread is allowed to call resume_all_vcpus(), +which will resume the un-realized vcpu. + +This fixes the issue by filter out un-realized vcpu during +resume_all_vcpus(). + +Signed-off-by: Keqian Zhu +--- + system/cpus.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/system/cpus.c b/system/cpus.c +index 7c5369fa9c..f2289e9545 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -618,6 +618,9 @@ void resume_all_vcpus(void) + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); + CPU_FOREACH(cpu) { ++ if (!object_property_get_bool(OBJECT(cpu), "realized", &error_abort)) { ++ continue; ++ } + cpu_resume(cpu); + } + } +-- +2.27.0 + diff --git a/system-physmem-Fix-possible-double-free-when-destroy.patch b/system-physmem-Fix-possible-double-free-when-destroy.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2f3853bc110515d6d30dcb404fee28aef1339eb --- /dev/null +++ b/system-physmem-Fix-possible-double-free-when-destroy.patch @@ -0,0 +1,64 @@ +From 5f7464524d0fb2c25c9bacfb550df92bef9bb3bf Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:11:05 +0800 +Subject: [PATCH] system/physmem: Fix possible double free when destroy cpu as + +address_space_destroy() and g_free_rcu() both operate cpuas->as +at rcu thread context asynchronously, each one is a rcu task +that have different callback (the first callback is do_address_ +space_destroy() and the second callback is g_free()). + +It's possible that while the first task is pending and the second +task overwrites the rcu callback (as the second task operates on +the same object). Then the g_free will be called twice on cpuas->as. + +Signed-off-by: Keqian Zhu +--- + include/exec/memory.h | 1 + + system/memory.c | 3 +++ + system/physmem.c | 2 +- + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index e131c2682c..91c42c9a6a 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1114,6 +1114,7 @@ struct AddressSpace { + struct rcu_head rcu; + char *name; + MemoryRegion *root; ++ bool free_in_rcu; + + /* Accessed via RCU. */ + struct FlatView *current_map; +diff --git a/system/memory.c b/system/memory.c +index 798b6c0a17..fb817e54bc 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3130,6 +3130,9 @@ static void do_address_space_destroy(AddressSpace *as) + g_free(as->name); + g_free(as->ioeventfds); + memory_region_unref(as->root); ++ if (as->free_in_rcu) { ++ g_free(as); ++ } + } + + void address_space_destroy(AddressSpace *as) +diff --git a/system/physmem.c b/system/physmem.c +index 299174ad91..cbe838f203 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -788,8 +788,8 @@ void cpu_address_space_destroy(CPUState *cpu, int asidx) + memory_listener_unregister(&cpuas->tcg_as_listener); + } + ++ cpuas->as->free_in_rcu = true; + address_space_destroy(cpuas->as); +- g_free_rcu(cpuas->as, rcu); + + if (cpu->cpu_ases_ref_count == 1) { + g_free(cpu->cpu_ases); +-- +2.27.0 + diff --git a/system-physmem-Per-AddressSpace-bounce-buffering.patch b/system-physmem-Per-AddressSpace-bounce-buffering.patch new file mode 100644 index 0000000000000000000000000000000000000000..26a52dd4a92259af9b4c63cbbbb85c5030b8edc0 --- /dev/null +++ b/system-physmem-Per-AddressSpace-bounce-buffering.patch @@ -0,0 +1,265 @@ +From 215731d484366474a90a2e14f3a75bb84fd314a3 Mon Sep 17 00:00:00 2001 +From: Mattias Nissler +Date: Thu, 7 Sep 2023 06:04:23 -0700 +Subject: [PATCH 2/4] system/physmem: Per-AddressSpace bounce buffering +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 69e78f1b3484e429274352a464a94fa1d78be339 + +Instead of using a single global bounce buffer, give each AddressSpace +its own bounce buffer. The MapClient callback mechanism moves to +AddressSpace accordingly. + +This is in preparation for generalizing bounce buffer handling further +to allow multiple bounce buffers, with a total allocation limit +configured per AddressSpace. + +Reviewed-by: Peter Xu +Tested-by: Jonathan Cameron +Signed-off-by: Mattias Nissler +Message-ID: <20240507094210.300566-2-mnissler@rivosinc.com> +Reviewed-by: Philippe Mathieu-Daudé +[PMD: Split patch, part 2/2] +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: liuxiangdong +--- + include/exec/memory.h | 19 +++++++++++ + system/memory.c | 7 ++++ + system/physmem.c | 79 ++++++++++++++++--------------------------- + 3 files changed, 56 insertions(+), 49 deletions(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 4b7dc7f055..40dcf70530 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1106,6 +1106,19 @@ struct MemoryListener { + QTAILQ_ENTRY(MemoryListener) link_as; + }; + ++typedef struct AddressSpaceMapClient { ++ QEMUBH *bh; ++ QLIST_ENTRY(AddressSpaceMapClient) link; ++} AddressSpaceMapClient; ++ ++typedef struct { ++ MemoryRegion *mr; ++ void *buffer; ++ hwaddr addr; ++ hwaddr len; ++ bool in_use; ++} BounceBuffer; ++ + /** + * struct AddressSpace: describes a mapping of addresses to #MemoryRegion objects + */ +@@ -1124,6 +1137,12 @@ struct AddressSpace { + struct MemoryRegionIoeventfd *ioeventfds; + QTAILQ_HEAD(, MemoryListener) listeners; + QTAILQ_ENTRY(AddressSpace) address_spaces_link; ++ ++ /* Bounce buffer to use for this address space. */ ++ BounceBuffer bounce; ++ /* List of callbacks to invoke when buffers free up */ ++ QemuMutex map_client_list_lock; ++ QLIST_HEAD(, AddressSpaceMapClient) map_client_list; + }; + + typedef struct AddressSpaceDispatch AddressSpaceDispatch; +diff --git a/system/memory.c b/system/memory.c +index fb817e54bc..026e47dcb8 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3117,6 +3117,9 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) + as->ioeventfds = NULL; + QTAILQ_INIT(&as->listeners); + QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link); ++ as->bounce.in_use = false; ++ qemu_mutex_init(&as->map_client_list_lock); ++ QLIST_INIT(&as->map_client_list); + as->name = g_strdup(name ? name : "anonymous"); + address_space_update_topology(as); + address_space_update_ioeventfds(as); +@@ -3124,6 +3127,10 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) + + static void do_address_space_destroy(AddressSpace *as) + { ++ assert(!qatomic_read(&as->bounce.in_use)); ++ assert(QLIST_EMPTY(&as->map_client_list)); ++ qemu_mutex_destroy(&as->map_client_list_lock); ++ + assert(QTAILQ_EMPTY(&as->listeners)); + + flatview_unref(as->current_map); +diff --git a/system/physmem.c b/system/physmem.c +index 1d01e7a32b..4491a7dbd1 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -3021,26 +3021,8 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) + NULL, len, FLUSH_CACHE); + } + +-typedef struct { +- MemoryRegion *mr; +- void *buffer; +- hwaddr addr; +- hwaddr len; +- bool in_use; +-} BounceBuffer; +- +-static BounceBuffer bounce; +- +-typedef struct MapClient { +- QEMUBH *bh; +- QLIST_ENTRY(MapClient) link; +-} MapClient; +- +-QemuMutex map_client_list_lock; +-static QLIST_HEAD(, MapClient) map_client_list +- = QLIST_HEAD_INITIALIZER(map_client_list); +- +-static void address_space_unregister_map_client_do(MapClient *client) ++static void ++address_space_unregister_map_client_do(AddressSpaceMapClient *client) + { + QLIST_REMOVE(client, link); + g_free(client); +@@ -3048,10 +3030,10 @@ static void address_space_unregister_map_client_do(MapClient *client) + + static void address_space_notify_map_clients_locked(AddressSpace *as) + { +- MapClient *client; ++ AddressSpaceMapClient *client; + +- while (!QLIST_EMPTY(&map_client_list)) { +- client = QLIST_FIRST(&map_client_list); ++ while (!QLIST_EMPTY(&as->map_client_list)) { ++ client = QLIST_FIRST(&as->map_client_list); + qemu_bh_schedule(client->bh); + address_space_unregister_map_client_do(client); + } +@@ -3059,17 +3041,17 @@ static void address_space_notify_map_clients_locked(AddressSpace *as) + + void address_space_register_map_client(AddressSpace *as, QEMUBH *bh) + { +- MapClient *client = g_malloc(sizeof(*client)); ++ AddressSpaceMapClient *client = g_malloc(sizeof(*client)); + +- qemu_mutex_lock(&map_client_list_lock); ++ qemu_mutex_lock(&as->map_client_list_lock); + client->bh = bh; +- QLIST_INSERT_HEAD(&map_client_list, client, link); ++ QLIST_INSERT_HEAD(&as->map_client_list, client, link); + /* Write map_client_list before reading in_use. */ + smp_mb(); +- if (!qatomic_read(&bounce.in_use)) { ++ if (!qatomic_read(&as->bounce.in_use)) { + address_space_notify_map_clients_locked(as); + } +- qemu_mutex_unlock(&map_client_list_lock); ++ qemu_mutex_unlock(&as->map_client_list_lock); + } + + void cpu_exec_init_all(void) +@@ -3085,28 +3067,27 @@ void cpu_exec_init_all(void) + finalize_target_page_bits(); + io_mem_init(); + memory_map_init(); +- qemu_mutex_init(&map_client_list_lock); + } + + void address_space_unregister_map_client(AddressSpace *as, QEMUBH *bh) + { +- MapClient *client; ++ AddressSpaceMapClient *client; + +- qemu_mutex_lock(&map_client_list_lock); +- QLIST_FOREACH(client, &map_client_list, link) { ++ qemu_mutex_lock(&as->map_client_list_lock); ++ QLIST_FOREACH(client, &as->map_client_list, link) { + if (client->bh == bh) { + address_space_unregister_map_client_do(client); + break; + } + } +- qemu_mutex_unlock(&map_client_list_lock); ++ qemu_mutex_unlock(&as->map_client_list_lock); + } + + static void address_space_notify_map_clients(AddressSpace *as) + { +- qemu_mutex_lock(&map_client_list_lock); ++ qemu_mutex_lock(&as->map_client_list_lock); + address_space_notify_map_clients_locked(as); +- qemu_mutex_unlock(&map_client_list_lock); ++ qemu_mutex_unlock(&as->map_client_list_lock); + } + + static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, +@@ -3197,25 +3178,25 @@ void *address_space_map(AddressSpace *as, + mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); + + if (!memory_access_is_direct(mr, is_write)) { +- if (qatomic_xchg(&bounce.in_use, true)) { ++ if (qatomic_xchg(&as->bounce.in_use, true)) { + *plen = 0; + return NULL; + } + /* Avoid unbounded allocations */ + l = MIN(l, TARGET_PAGE_SIZE); +- bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); +- bounce.addr = addr; +- bounce.len = l; ++ as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); ++ as->bounce.addr = addr; ++ as->bounce.len = l; + + memory_region_ref(mr); +- bounce.mr = mr; ++ as->bounce.mr = mr; + if (!is_write) { + flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, +- bounce.buffer, l); ++ as->bounce.buffer, l); + } + + *plen = l; +- return bounce.buffer; ++ return as->bounce.buffer; + } + + +@@ -3233,7 +3214,7 @@ void *address_space_map(AddressSpace *as, + void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + bool is_write, hwaddr access_len) + { +- if (buffer != bounce.buffer) { ++ if (buffer != as->bounce.buffer) { + MemoryRegion *mr; + ram_addr_t addr1; + +@@ -3249,14 +3230,14 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + return; + } + if (is_write) { +- address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED, +- bounce.buffer, access_len); ++ address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED, ++ as->bounce.buffer, access_len); + } +- qemu_vfree(bounce.buffer); +- bounce.buffer = NULL; +- memory_region_unref(bounce.mr); ++ qemu_vfree(as->bounce.buffer); ++ as->bounce.buffer = NULL; ++ memory_region_unref(as->bounce.mr); + /* Clear in_use before reading map_client_list. */ +- qatomic_set_mb(&bounce.in_use, false); ++ qatomic_set_mb(&as->bounce.in_use, false); + address_space_notify_map_clients(as); + } + +-- +2.45.1.windows.1 + diff --git a/system-physmem-Propagate-AddressSpace-to-MapClient-h.patch b/system-physmem-Propagate-AddressSpace-to-MapClient-h.patch new file mode 100644 index 0000000000000000000000000000000000000000..78e9504e3b5526f310299d6dc1c62390a94695e1 --- /dev/null +++ b/system-physmem-Propagate-AddressSpace-to-MapClient-h.patch @@ -0,0 +1,207 @@ +From 9fd6abb40a7223f83244cdad4edf1f8ba21071aa Mon Sep 17 00:00:00 2001 +From: Mattias Nissler +Date: Thu, 7 Sep 2023 06:04:23 -0700 +Subject: [PATCH 1/4] system/physmem: Propagate AddressSpace to MapClient + helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 5c62719710bab66a98f68ebdba333e2240ed6668 + +Propagate AddressSpace handler to following helpers: +- register_map_client() +- unregister_map_client() +- notify_map_clients[_locked]() + +Rename them using 'address_space_' prefix instead of 'cpu_'. + +The AddressSpace argument will be used in the next commit. + +Reviewed-by: Peter Xu +Tested-by: Jonathan Cameron +Signed-off-by: Mattias Nissler +Message-ID: <20240507094210.300566-2-mnissler@rivosinc.com> +[PMD: Split patch, part 1/2] +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: liuxiangdong +--- + include/exec/cpu-common.h | 2 -- + include/exec/memory.h | 26 ++++++++++++++++++++++++-- + system/dma-helpers.c | 4 ++-- + system/physmem.c | 24 ++++++++++++------------ + 4 files changed, 38 insertions(+), 18 deletions(-) + +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 2a3d4aa1c8..c7fd30d5b9 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -165,8 +165,6 @@ void *cpu_physical_memory_map(hwaddr addr, + bool is_write); + void cpu_physical_memory_unmap(void *buffer, hwaddr len, + bool is_write, hwaddr access_len); +-void cpu_register_map_client(QEMUBH *bh); +-void cpu_unregister_map_client(QEMUBH *bh); + + bool cpu_physical_memory_is_io(hwaddr phys_addr); + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 91c42c9a6a..4b7dc7f055 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2916,8 +2916,8 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, hwaddr len, + * May return %NULL and set *@plen to zero(0), if resources needed to perform + * the mapping are exhausted. + * Use only for reads OR writes - not for read-modify-write operations. +- * Use cpu_register_map_client() to know when retrying the map operation is +- * likely to succeed. ++ * Use address_space_register_map_client() to know when retrying the map ++ * operation is likely to succeed. + * + * @as: #AddressSpace to be accessed + * @addr: address within that address space +@@ -2942,6 +2942,28 @@ void *address_space_map(AddressSpace *as, hwaddr addr, + void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + bool is_write, hwaddr access_len); + ++/* ++ * address_space_register_map_client: Register a callback to invoke when ++ * resources for address_space_map() are available again. ++ * ++ * address_space_map may fail when there are not enough resources available, ++ * such as when bounce buffer memory would exceed the limit. The callback can ++ * be used to retry the address_space_map operation. Note that the callback ++ * gets automatically removed after firing. ++ * ++ * @as: #AddressSpace to be accessed ++ * @bh: callback to invoke when address_space_map() retry is appropriate ++ */ ++void address_space_register_map_client(AddressSpace *as, QEMUBH *bh); ++ ++/* ++ * address_space_unregister_map_client: Unregister a callback that has ++ * previously been registered and not fired yet. ++ * ++ * @as: #AddressSpace to be accessed ++ * @bh: callback to unregister ++ */ ++void address_space_unregister_map_client(AddressSpace *as, QEMUBH *bh); + + /* Internal functions, part of the implementation of address_space_read. */ + MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, +diff --git a/system/dma-helpers.c b/system/dma-helpers.c +index 36211acc7e..611ea04ffb 100644 +--- a/system/dma-helpers.c ++++ b/system/dma-helpers.c +@@ -167,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret) + if (dbs->iov.size == 0) { + trace_dma_map_wait(dbs); + dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); +- cpu_register_map_client(dbs->bh); ++ address_space_register_map_client(dbs->sg->as, dbs->bh); + goto out; + } + +@@ -197,7 +197,7 @@ static void dma_aio_cancel(BlockAIOCB *acb) + } + + if (dbs->bh) { +- cpu_unregister_map_client(dbs->bh); ++ address_space_unregister_map_client(dbs->sg->as, dbs->bh); + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + } +diff --git a/system/physmem.c b/system/physmem.c +index 0c629233bd..1d01e7a32b 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -3040,24 +3040,24 @@ QemuMutex map_client_list_lock; + static QLIST_HEAD(, MapClient) map_client_list + = QLIST_HEAD_INITIALIZER(map_client_list); + +-static void cpu_unregister_map_client_do(MapClient *client) ++static void address_space_unregister_map_client_do(MapClient *client) + { + QLIST_REMOVE(client, link); + g_free(client); + } + +-static void cpu_notify_map_clients_locked(void) ++static void address_space_notify_map_clients_locked(AddressSpace *as) + { + MapClient *client; + + while (!QLIST_EMPTY(&map_client_list)) { + client = QLIST_FIRST(&map_client_list); + qemu_bh_schedule(client->bh); +- cpu_unregister_map_client_do(client); ++ address_space_unregister_map_client_do(client); + } + } + +-void cpu_register_map_client(QEMUBH *bh) ++void address_space_register_map_client(AddressSpace *as, QEMUBH *bh) + { + MapClient *client = g_malloc(sizeof(*client)); + +@@ -3067,7 +3067,7 @@ void cpu_register_map_client(QEMUBH *bh) + /* Write map_client_list before reading in_use. */ + smp_mb(); + if (!qatomic_read(&bounce.in_use)) { +- cpu_notify_map_clients_locked(); ++ address_space_notify_map_clients_locked(as); + } + qemu_mutex_unlock(&map_client_list_lock); + } +@@ -3088,24 +3088,24 @@ void cpu_exec_init_all(void) + qemu_mutex_init(&map_client_list_lock); + } + +-void cpu_unregister_map_client(QEMUBH *bh) ++void address_space_unregister_map_client(AddressSpace *as, QEMUBH *bh) + { + MapClient *client; + + qemu_mutex_lock(&map_client_list_lock); + QLIST_FOREACH(client, &map_client_list, link) { + if (client->bh == bh) { +- cpu_unregister_map_client_do(client); ++ address_space_unregister_map_client_do(client); + break; + } + } + qemu_mutex_unlock(&map_client_list_lock); + } + +-static void cpu_notify_map_clients(void) ++static void address_space_notify_map_clients(AddressSpace *as) + { + qemu_mutex_lock(&map_client_list_lock); +- cpu_notify_map_clients_locked(); ++ address_space_notify_map_clients_locked(as); + qemu_mutex_unlock(&map_client_list_lock); + } + +@@ -3173,8 +3173,8 @@ flatview_extend_translation(FlatView *fv, hwaddr addr, + * May map a subset of the requested range, given by and returned in *plen. + * May return NULL if resources needed to perform the mapping are exhausted. + * Use only for reads OR writes - not for read-modify-write operations. +- * Use cpu_register_map_client() to know when retrying the map operation is +- * likely to succeed. ++ * Use address_space_register_map_client() to know when retrying the map ++ * operation is likely to succeed. + */ + void *address_space_map(AddressSpace *as, + hwaddr addr, +@@ -3257,7 +3257,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + memory_region_unref(bounce.mr); + /* Clear in_use before reading map_client_list. */ + qatomic_set_mb(&bounce.in_use, false); +- cpu_notify_map_clients(); ++ address_space_notify_map_clients(as); + } + + void *cpu_physical_memory_map(hwaddr addr, +-- +2.45.1.windows.1 + diff --git a/target-arm-Add-CPU-features-to-query-cpu-model-expan.patch b/target-arm-Add-CPU-features-to-query-cpu-model-expan.patch deleted file mode 100644 index 4047145033d7010acfb3cfb002feb920fb303f0d..0000000000000000000000000000000000000000 --- a/target-arm-Add-CPU-features-to-query-cpu-model-expan.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 274d25bdb2df13a26ad6d2a8a06fcc281a22f642 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Thu, 6 Aug 2020 16:14:58 +0800 -Subject: [PATCH 7/9] target/arm: Add CPU features to query-cpu-model-expansion - -Add CPU features to the result of query-cpu-model-expansion so that -other applications (such as libvirt) can know the supported CPU -features. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/cpu.c | 27 +++++++++++++++++++++++++++ - target/arm/cpu.h | 2 ++ - target/arm/monitor.c | 2 ++ - 3 files changed, 31 insertions(+) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index db46afba..dcf9f49e 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -25,6 +25,8 @@ - #include "qemu/module.h" - #include "qapi/error.h" - #include "qapi/visitor.h" -+#include "qapi/qmp/qdict.h" -+#include "qom/qom-qobject.h" - #include "cpu.h" - #include "internals.h" - #include "exec/exec-all.h" -@@ -1403,6 +1405,31 @@ static const CPUFeatureDep feature_dependencies[] = { - }, - }; - -+void arm_cpu_features_to_dict(ARMCPU *cpu, QDict *features) -+{ -+ Object *obj = OBJECT(cpu); -+ const char *name; -+ ObjectProperty *prop; -+ bool is_32bit = !arm_feature(&cpu->env, ARM_FEATURE_AARCH64); -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(cpu_features); ++i) { -+ if (is_32bit != cpu_features[i].is_32bit) { -+ continue; -+ } -+ -+ name = cpu_features[i].name; -+ prop = object_property_find(obj, name, NULL); -+ if (prop) { -+ QObject *value; -+ -+ assert(prop->get); -+ value = object_property_get_qobject(obj, name, &error_abort); -+ qdict_put_obj(features, name, value); -+ } -+ } -+} -+ - static void arm_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) - { -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 7bb481fb..068c3fa2 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -3692,4 +3692,6 @@ static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) - #define cpu_isar_feature(name, cpu) \ - ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) - -+void arm_cpu_features_to_dict(ARMCPU *cpu, QDict *features); -+ - #endif -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index e2b1d117..7c2ff3c0 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -219,6 +219,8 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - } - } - -+ arm_cpu_features_to_dict(ARM_CPU(obj), qdict_out); -+ - if (!qdict_size(qdict_out)) { - qobject_unref(qdict_out); - } else { --- -2.25.1 - diff --git a/target-arm-Add-ID_AA64MMFR2_EL1.patch b/target-arm-Add-ID_AA64MMFR2_EL1.patch deleted file mode 100644 index eee33ae241bde2333d5308c7ca39297782598ccc..0000000000000000000000000000000000000000 --- a/target-arm-Add-ID_AA64MMFR2_EL1.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 3451fb922aa7b0fe532e508ca13d4ab4b3ec75bf Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 8 Feb 2020 12:58:13 +0000 -Subject: [PATCH 02/13] target/arm: Add ID_AA64MMFR2_EL1 - -Add definitions for all of the fields, up to ARMv8.5. -Convert the existing RESERVED register to a full register. -Query KVM for the value of the register for the host. - -Reviewed-by: Peter Maydell -Signed-off-by: Richard Henderson -Message-id: 20200208125816.14954-18-richard.henderson@linaro.org -Signed-off-by: Peter Maydell ---- - target/arm/cpu.h | 17 +++++++++++++++++ - target/arm/helper.c | 4 ++-- - target/arm/kvm64.c | 2 ++ - 3 files changed, 21 insertions(+), 2 deletions(-) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index fe310828..3e65bc50 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -866,6 +866,7 @@ struct ARMCPU { - uint64_t id_aa64pfr1; - uint64_t id_aa64mmfr0; - uint64_t id_aa64mmfr1; -+ uint64_t id_aa64mmfr2; - } isar; - uint32_t midr; - uint32_t revidr; -@@ -1762,6 +1763,22 @@ FIELD(ID_AA64MMFR1, PAN, 20, 4) - FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) - FIELD(ID_AA64MMFR1, XNX, 28, 4) - -+FIELD(ID_AA64MMFR2, CNP, 0, 4) -+FIELD(ID_AA64MMFR2, UAO, 4, 4) -+FIELD(ID_AA64MMFR2, LSM, 8, 4) -+FIELD(ID_AA64MMFR2, IESB, 12, 4) -+FIELD(ID_AA64MMFR2, VARANGE, 16, 4) -+FIELD(ID_AA64MMFR2, CCIDX, 20, 4) -+FIELD(ID_AA64MMFR2, NV, 24, 4) -+FIELD(ID_AA64MMFR2, ST, 28, 4) -+FIELD(ID_AA64MMFR2, AT, 32, 4) -+FIELD(ID_AA64MMFR2, IDS, 36, 4) -+FIELD(ID_AA64MMFR2, FWB, 40, 4) -+FIELD(ID_AA64MMFR2, TTL, 48, 4) -+FIELD(ID_AA64MMFR2, BBM, 52, 4) -+FIELD(ID_AA64MMFR2, EVT, 56, 4) -+FIELD(ID_AA64MMFR2, E0PD, 60, 4) -+ - FIELD(ID_DFR0, COPDBG, 0, 4) - FIELD(ID_DFR0, COPSDBG, 4, 4) - FIELD(ID_DFR0, MMAPDBG, 8, 4) -diff --git a/target/arm/helper.c b/target/arm/helper.c -index b74c23a9..c50b1ba1 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -6182,10 +6182,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->isar.id_aa64mmfr1 }, -- { .name = "ID_AA64MMFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, -+ { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = 0 }, -+ .resetvalue = cpu->isar.id_aa64mmfr2 }, - { .name = "ID_AA64MMFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, - .access = PL1_R, .type = ARM_CP_CONST, -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 4f0bf000..b794108a 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -541,6 +541,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - ARM64_SYS_REG(3, 0, 0, 7, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, - ARM64_SYS_REG(3, 0, 0, 7, 1)); -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, -+ ARM64_SYS_REG(3, 0, 0, 7, 2)); - - /* - * Note that if AArch32 support is not present in the host, --- -2.25.1 - diff --git a/target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch b/target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch deleted file mode 100644 index 7516ed8108de271970e600dbd03c964611b3b0ba..0000000000000000000000000000000000000000 --- a/target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 515975da851ca9567053bcf0487fde4447dfdc4f Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:04 +0000 -Subject: [PATCH 06/13] target/arm: Add _aa64_ and _any_ versions of pmu_8_1 - isar checks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add the 64-bit version of the "is this a v8.1 PMUv3?" -ID register check function, and the _any_ version that -checks for either AArch32 or AArch64 support. We'll use -this in a later commit. - -We don't (yet) do any isar_feature checks on ID_AA64DFR1_EL1, -but we move id_aa64dfr1 into the ARMISARegisters struct with -id_aa64dfr0, for consistency. - -Reviewed-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Peter Maydell -Message-id: 20200214175116.9164-10-peter.maydell@linaro.org ---- - target/arm/cpu.c | 3 ++- - target/arm/cpu.h | 15 +++++++++++++-- - target/arm/cpu64.c | 8 ++++---- - target/arm/helper.c | 12 +++++++----- - 4 files changed, 26 insertions(+), 12 deletions(-) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7e9b85a2..bb2edf4e 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1522,7 +1522,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - cpu); - #endif - } else { -- cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); -+ cpu->isar.id_aa64dfr0 = -+ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); - cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); - cpu->pmceid0 = 0; - cpu->pmceid1 = 0; -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 2d8d27e8..230130be 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -868,6 +868,8 @@ struct ARMCPU { - uint64_t id_aa64mmfr0; - uint64_t id_aa64mmfr1; - uint64_t id_aa64mmfr2; -+ uint64_t id_aa64dfr0; -+ uint64_t id_aa64dfr1; - } isar; - uint32_t midr; - uint32_t revidr; -@@ -884,8 +886,6 @@ struct ARMCPU { - uint32_t id_mmfr2; - uint32_t id_mmfr3; - uint32_t id_mmfr4; -- uint64_t id_aa64dfr0; -- uint64_t id_aa64dfr1; - uint64_t id_aa64afr0; - uint64_t id_aa64afr1; - uint32_t dbgdidr; -@@ -3657,6 +3657,17 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; - } - -+static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) -+{ -+ return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && -+ FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; -+} -+ -+static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) -+{ -+ return isar_feature_aa64_pmu_8_1(id) || isar_feature_aa32_pmu_8_1(id); -+} -+ - /* - * Forward to the above feature tests given an ARMCPU pointer. - */ -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index afdabbeb..aa96548f 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -137,7 +137,7 @@ static void aarch64_a57_initfn(Object *obj) - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; -- cpu->id_aa64dfr0 = 0x10305106; -+ cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; - cpu->dbgdidr = 0x3516d000; -@@ -191,7 +191,7 @@ static void aarch64_a53_initfn(Object *obj) - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; -- cpu->id_aa64dfr0 = 0x10305106; -+ cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ - cpu->dbgdidr = 0x3516d000; -@@ -244,7 +244,7 @@ static void aarch64_a72_initfn(Object *obj) - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; -- cpu->id_aa64dfr0 = 0x10305106; -+ cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; - cpu->dbgdidr = 0x3516d000; -@@ -276,7 +276,7 @@ static void aarch64_kunpeng_920_initfn(Object *obj) - cpu->midr = 0x480fd010; - cpu->ctr = 0x84448004; - cpu->isar.id_aa64pfr0 = 0x11001111; -- cpu->id_aa64dfr0 = 0x110305408; -+ cpu->isar.id_aa64dfr0 = 0x110305408; - cpu->isar.id_aa64isar0 = 0x10211120; - cpu->isar.id_aa64mmfr0 = 0x101125; - } -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 3f06ca19..a71f4ef6 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -23,6 +23,7 @@ - #include "hw/semihosting/semihost.h" - #include "sysemu/cpus.h" - #include "sysemu/kvm.h" -+#include "sysemu/tcg.h" - #include "qemu/range.h" - #include "qapi/qapi-commands-machine-target.h" - #include "qapi/error.h" -@@ -5611,9 +5612,10 @@ static void define_debug_regs(ARMCPU *cpu) - * check that if they both exist then they agree. - */ - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -- assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); -- assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); -- assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) == ctx_cmps); -+ assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); -+ assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); -+ assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) -+ == ctx_cmps); - } - - define_one_arm_cp_reg(cpu, &dbgdidr); -@@ -6112,11 +6114,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_aa64dfr0 }, -+ .resetvalue = cpu->isar.id_aa64dfr0 }, - { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_aa64dfr1 }, -+ .resetvalue = cpu->isar.id_aa64dfr1 }, - { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, --- -2.25.1 - diff --git a/target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch b/target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch deleted file mode 100644 index 66e4ec4ad078aacdd4e7cb9a76244e1460487551..0000000000000000000000000000000000000000 --- a/target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 4001f3040937094660eab44dbb49b86817317ea9 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:01 +0000 -Subject: [PATCH 03/13] target/arm: Add and use FIELD definitions for - ID_AA64DFR0_EL1 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add FIELD() definitions for the ID_AA64DFR0_EL1 and use them -where we currently have hard-coded bit values. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Richard Henderson -Signed-off-by: Peter Maydell -Message-id: 20200214175116.9164-7-peter.maydell@linaro.org ---- - target/arm/cpu.c | 2 +- - target/arm/cpu.h | 10 ++++++++++ - target/arm/helper.c | 6 +++--- - 3 files changed, 14 insertions(+), 4 deletions(-) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 811e5c63..dbd05e01 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1522,7 +1522,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - cpu); - #endif - } else { -- cpu->id_aa64dfr0 &= ~0xf00; -+ cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); - cpu->id_dfr0 &= ~(0xf << 24); - cpu->pmceid0 = 0; - cpu->pmceid1 = 0; -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 3e65bc50..91cc02b4 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -1779,6 +1779,16 @@ FIELD(ID_AA64MMFR2, BBM, 52, 4) - FIELD(ID_AA64MMFR2, EVT, 56, 4) - FIELD(ID_AA64MMFR2, E0PD, 60, 4) - -+FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) -+FIELD(ID_AA64DFR0, TRACEVER, 4, 4) -+FIELD(ID_AA64DFR0, PMUVER, 8, 4) -+FIELD(ID_AA64DFR0, BRPS, 12, 4) -+FIELD(ID_AA64DFR0, WRPS, 20, 4) -+FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) -+FIELD(ID_AA64DFR0, PMSVER, 32, 4) -+FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) -+FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) -+ - FIELD(ID_DFR0, COPDBG, 0, 4) - FIELD(ID_DFR0, COPSDBG, 4, 4) - FIELD(ID_DFR0, MMAPDBG, 8, 4) -diff --git a/target/arm/helper.c b/target/arm/helper.c -index c50b1ba1..419be640 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -5611,9 +5611,9 @@ static void define_debug_regs(ARMCPU *cpu) - * check that if they both exist then they agree. - */ - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -- assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps); -- assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps); -- assert(extract32(cpu->id_aa64dfr0, 28, 4) == ctx_cmps); -+ assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); -+ assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); -+ assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) == ctx_cmps); - } - - define_one_arm_cp_reg(cpu, &dbgdidr); --- -2.25.1 - diff --git a/target-arm-Add-confidential-guest-support.patch b/target-arm-Add-confidential-guest-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..6a3a9e865655bb48bb72f811565b17d3bd59f155 --- /dev/null +++ b/target-arm-Add-confidential-guest-support.patch @@ -0,0 +1,124 @@ +From 754c30c1d126357d60ea29a2c17428a0abdcca49 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Thu, 16 Jun 2022 18:24:55 +0100 +Subject: [PATCH] target/arm: Add confidential guest support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/6353278a78f3942ff1b576aab77d79d926e8f9f0 + +Add a new RmeGuest object, inheriting from ConfidentialGuestSupport, to +support the Arm Realm Management Extension (RME). It is instantiated by +passing on the command-line: + + -M virt,confidential-guest-support= + -object rme-guest,id=[,options...] + +This is only the skeleton. Support will be added in following patches. + +Cc: Eric Blake +Cc: Markus Armbruster +Cc: Daniel P. Berrangé +Cc: Eduardo Habkost +Acked-by: Markus Armbruster +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/meson.build +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + docs/system/confidential-guest-support.rst | 1 + + qapi/qom.json | 1 + + target/arm/kvm-rme.c | 40 ++++++++++++++++++++++ + target/arm/meson.build | 2 +- + 4 files changed, 43 insertions(+), 1 deletion(-) + create mode 100644 target/arm/kvm-rme.c + +diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst +index 0c490dbda2..acf46d8856 100644 +--- a/docs/system/confidential-guest-support.rst ++++ b/docs/system/confidential-guest-support.rst +@@ -40,5 +40,6 @@ Currently supported confidential guest mechanisms are: + * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`) + * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`) + * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`) ++* Arm Realm Management Extension (RME) + + Other mechanisms may be supported in future. +diff --git a/qapi/qom.json b/qapi/qom.json +index a5336e6b11..e405c51da3 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -999,6 +999,7 @@ + { 'name': 'pr-manager-helper', + 'if': 'CONFIG_LINUX' }, + 'qtest', ++ 'rme-guest', + 'rng-builtin', + 'rng-egd', + { 'name': 'rng-random', +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +new file mode 100644 +index 0000000000..1de65f2b1d +--- /dev/null ++++ b/target/arm/kvm-rme.c +@@ -0,0 +1,40 @@ ++/* ++ * QEMU Arm RME support ++ * ++ * Copyright Linaro 2024 ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "hw/boards.h" ++#include "hw/core/cpu.h" ++#include "kvm_arm.h" ++#include "migration/blocker.h" ++#include "qapi/error.h" ++#include "qom/object_interfaces.h" ++#include "exec/confidential-guest-support.h" ++#include "sysemu/kvm.h" ++#include "sysemu/runstate.h" ++ ++#define TYPE_RME_GUEST "rme-guest" ++OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) ++ ++struct RmeGuest { ++ ConfidentialGuestSupport parent_obj; ++}; ++ ++OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, ++ CONFIDENTIAL_GUEST_SUPPORT, ++ { TYPE_USER_CREATABLE }, { }) ++ ++static void rme_guest_class_init(ObjectClass *oc, void *data) ++{ ++} ++ ++static void rme_guest_init(Object *obj) ++{ ++} ++ ++static void rme_guest_finalize(Object *obj) ++{ ++} +diff --git a/target/arm/meson.build b/target/arm/meson.build +index 389ee54658..7973b35cca 100644 +--- a/target/arm/meson.build ++++ b/target/arm/meson.build +@@ -8,7 +8,7 @@ arm_ss.add(files( + )) + arm_ss.add(zlib) + +-arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c'), if_false: files('kvm-stub.c')) ++arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c', 'kvm-rme.c'), if_false: files('kvm-stub.c')) + arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) + arm_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c', 'kvm64.c', 'kvm-tmm.c'), if_false: files('kvm-stub.c')) + +-- +2.33.0 + diff --git a/target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch b/target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch deleted file mode 100644 index d6e29be12e1bbf6ef55d43bf35960e2168fc51e8..0000000000000000000000000000000000000000 --- a/target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 6f18e959eabf9c752659eb3851f193bf343346c5 Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 8 Feb 2020 12:57:59 +0000 -Subject: [PATCH 01/13] target/arm: Add isar_feature tests for PAN + ATS1E1 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Include definitions for all of the bits in ID_MMFR3. -We already have a definition for ID_AA64MMFR1.PAN. - -Reviewed-by: Alex Bennée -Reviewed-by: Peter Maydell -Signed-off-by: Richard Henderson -Message-id: 20200208125816.14954-4-richard.henderson@linaro.org -Signed-off-by: Peter Maydell ---- - target/arm/cpu.h | 29 +++++++++++++++++++++++++++++ - 1 file changed, 29 insertions(+) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 86eb79cd..fe310828 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -1680,6 +1680,15 @@ FIELD(ID_ISAR6, FHM, 8, 4) - FIELD(ID_ISAR6, SB, 12, 4) - FIELD(ID_ISAR6, SPECRES, 16, 4) - -+FIELD(ID_MMFR3, CMAINTVA, 0, 4) -+FIELD(ID_MMFR3, CMAINTSW, 4, 4) -+FIELD(ID_MMFR3, BPMAINT, 8, 4) -+FIELD(ID_MMFR3, MAINTBCST, 12, 4) -+FIELD(ID_MMFR3, PAN, 16, 4) -+FIELD(ID_MMFR3, COHWALK, 20, 4) -+FIELD(ID_MMFR3, CMEMSZ, 24, 4) -+FIELD(ID_MMFR3, SUPERSEC, 28, 4) -+ - FIELD(ID_MMFR4, SPECSEI, 0, 4) - FIELD(ID_MMFR4, AC2, 4, 4) - FIELD(ID_MMFR4, XNX, 8, 4) -@@ -3445,6 +3454,16 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) - return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4; - } - -+static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) -+{ -+ return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) != 0; -+} -+ -+static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) -+{ -+ return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2; -+} -+ - /* - * 64-bit feature tests via id registers. - */ -@@ -3589,6 +3608,16 @@ static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; - } - -+static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) -+{ -+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; -+} -+ -+static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) -+{ -+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; -+} -+ - static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) - { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; --- -2.25.1 - diff --git a/target-arm-Add-more-CPU-features.patch b/target-arm-Add-more-CPU-features.patch deleted file mode 100644 index a22e5177300d305df8c0430ee21e29c587bd5399..0000000000000000000000000000000000000000 --- a/target-arm-Add-more-CPU-features.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 3eee1e4ff1ca342e760f759c727abc41780d0afa Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Tue, 11 Aug 2020 10:28:10 +0800 -Subject: [PATCH 9/9] target/arm: Add more CPU features - -Add i8mm, bf16, and dgh CPU features for AArch64. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/cpu.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index dcf9f49e..7ae2d3da 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1132,6 +1132,9 @@ static struct CPUFeatureInfo cpu_features[] = { - FIELD_INFO("fhm", ID_ISAR6, FHM, false, 1, 0, true), - FIELD_INFO("sb", ID_ISAR6, SB, false, 1, 0, true), - FIELD_INFO("specres", ID_ISAR6, SPECRES, false, 1, 0, true), -+ FIELD_INFO("i8mm", ID_AA64ISAR1, I8MM, false, 1, 0, false), -+ FIELD_INFO("bf16", ID_AA64ISAR1, BF16, false, 1, 0, false), -+ FIELD_INFO("dgh", ID_AA64ISAR1, DGH, false, 1, 0, false), - - FIELD_INFO("cmaintva", ID_MMFR3, CMAINTVA, false, 1, 0, true), - FIELD_INFO("cmaintsw", ID_MMFR3, CMAINTSW, false, 1, 0, true), --- -2.25.1 - diff --git a/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch b/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch new file mode 100644 index 0000000000000000000000000000000000000000..023fe7f49df7f1fa140ac9c9a7ea7dda1511f453 --- /dev/null +++ b/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch @@ -0,0 +1,294 @@ +From b311feda2078e7ee8f060531d4d061beccbc2f77 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 20:13:10 +0100 +Subject: [PATCH] target/arm: Add support of *unrealize* ARMCPU during vCPU + Hot-unplug + +vCPU Hot-unplug will result in QOM CPU object unrealization which will do away +with all the vCPU thread creations, allocations, registrations that happened +as part of the realization process. This change introduces the ARM CPU unrealize +function taking care of exactly that. + +Note, initialized KVM vCPUs are not destroyed in host KVM but their Qemu context +is parked at the QEMU KVM layer. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Vishnu Pajjuri +[VP: Identified CPU stall issue & suggested probable fix] +Signed-off-by: Salil Mehta +--- + target/arm/cpu.c | 101 +++++++++++++++++++++++++++++++++++++++++ + target/arm/cpu.h | 14 ++++++ + target/arm/gdbstub.c | 6 +++ + target/arm/helper.c | 25 ++++++++++ + target/arm/internals.h | 3 ++ + target/arm/kvm64.c | 4 ++ + 6 files changed, 153 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 18b8a79c8f..501f88eb2f 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -142,6 +142,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node); + } + ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque) + { +@@ -153,6 +163,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); + } + ++void arm_unregister_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) + { + /* Reset a single ARMCPRegInfo register */ +@@ -2390,6 +2410,85 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + acc->parent_realize(dev, errp); + } + ++static void arm_cpu_unrealizefn(DeviceState *dev) ++{ ++ ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUARMState *env = &cpu->env; ++ CPUState *cs = CPU(dev); ++ bool has_secure; ++ ++ has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); ++ ++ /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */ ++ cpu_address_space_destroy(cs, ARMASIdx_NS); ++ ++ if (cpu->tag_memory != NULL) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagNS); ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagS); ++ } ++ } ++ ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_S); ++ } ++ ++ destroy_cpreg_list(cpu); ++ arm_cpu_unregister_gdb_regs(cpu); ++ unregister_cp_regs_for_features(cpu); ++ ++ if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->sau.rbar); ++ g_free(env->sau.rlar); ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMSA) && ++ arm_feature(env, ARM_FEATURE_V7) && ++ cpu->pmsav7_dregion) { ++ if (arm_feature(env, ARM_FEATURE_V8)) { ++ g_free(env->pmsav8.rbar[M_REG_NS]); ++ g_free(env->pmsav8.rlar[M_REG_NS]); ++ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->pmsav8.rbar[M_REG_S]); ++ g_free(env->pmsav8.rlar[M_REG_S]); ++ } ++ } else { ++ g_free(env->pmsav7.drbar); ++ g_free(env->pmsav7.drsr); ++ g_free(env->pmsav7.dracr); ++ } ++ if (cpu->pmsav8r_hdregion) { ++ g_free(env->pmsav8.hprbar); ++ g_free(env->pmsav8.hprlar); ++ } ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMU)) { ++ if (!kvm_enabled()) { ++ arm_unregister_pre_el_change_hooks(cpu); ++ arm_unregister_el_change_hooks(cpu); ++ } ++ ++#ifndef CONFIG_USER_ONLY ++ if (cpu->pmu_timer) { ++ timer_del(cpu->pmu_timer); ++ } ++#endif ++ } ++ ++ cpu_remove_sync(CPU(dev)); ++ acc->parent_unrealize(dev); ++ ++#ifndef CONFIG_USER_ONLY ++ timer_del(cpu->gt_timer[GTIMER_PHYS]); ++ timer_del(cpu->gt_timer[GTIMER_VIRT]); ++ timer_del(cpu->gt_timer[GTIMER_HYP]); ++ timer_del(cpu->gt_timer[GTIMER_SEC]); ++ timer_del(cpu->gt_timer[GTIMER_HYPVIRT]); ++#endif ++} ++ + static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) + { + ObjectClass *oc; +@@ -2492,6 +2591,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + + device_class_set_parent_realize(dc, arm_cpu_realizefn, + &acc->parent_realize); ++ device_class_set_parent_unrealize(dc, arm_cpu_unrealizefn, ++ &acc->parent_unrealize); + + device_class_set_props(dc, arm_cpu_properties); + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 145d3dbf13..c51a0e3467 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1138,6 +1138,7 @@ struct ARMCPUClass { + + const ARMCPUInfo *info; + DeviceRealize parent_realize; ++ DeviceUnrealize parent_unrealize; + ResettablePhases parent_phases; + }; + +@@ -3359,6 +3360,13 @@ static inline AddressSpace *arm_addressspace(CPUState *cs, MemTxAttrs attrs) + */ + void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque); ++/** ++ * arm_unregister_pre_el_change_hook: ++ * unregister all pre EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu); ++ + /** + * arm_register_el_change_hook: + * Register a hook function which will be called immediately after this +@@ -3371,6 +3379,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + */ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void + *opaque); ++/** ++ * arm_unregister_el_change_hook: ++ * unregister all EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_el_change_hooks(ARMCPU *cpu); + + /** + * arm_rebuild_hflags: +diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c +index 28f546a5ff..5ba1e28e34 100644 +--- a/target/arm/gdbstub.c ++++ b/target/arm/gdbstub.c +@@ -553,3 +553,9 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) + } + #endif /* CONFIG_TCG */ + } ++ ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ gdb_unregister_coprocessor_all(cs); ++} +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 2746d3fdac..e47498828c 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -263,6 +263,19 @@ void init_cpreg_list(ARMCPU *cpu) + g_list_free(keys); + } + ++void destroy_cpreg_list(ARMCPU *cpu) ++{ ++ assert(cpu->cpreg_indexes); ++ assert(cpu->cpreg_values); ++ assert(cpu->cpreg_vmstate_indexes); ++ assert(cpu->cpreg_vmstate_values); ++ ++ g_free(cpu->cpreg_indexes); ++ g_free(cpu->cpreg_values); ++ g_free(cpu->cpreg_vmstate_indexes); ++ g_free(cpu->cpreg_vmstate_values); ++} ++ + /* + * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. + */ +@@ -9438,6 +9451,18 @@ void register_cp_regs_for_features(ARMCPU *cpu) + #endif + } + ++void unregister_cp_regs_for_features(ARMCPU *cpu) ++{ ++ CPUARMState *env = &cpu->env; ++ if (arm_feature(env, ARM_FEATURE_M)) { ++ /* M profile has no coprocessor registers */ ++ return; ++ } ++ ++ /* empty it all. unregister all the coprocessor registers */ ++ g_hash_table_remove_all(cpu->cp_regs); ++} ++ + /* Sort alphabetically by type name, except for "any". */ + static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b) + { +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 143d57c0fe..c3a7682f05 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -187,9 +187,12 @@ void arm_cpu_register(const ARMCPUInfo *info); + void aarch64_cpu_register(const ARMCPUInfo *info); + + void register_cp_regs_for_features(ARMCPU *cpu); ++void unregister_cp_regs_for_features(ARMCPU *cpu); + void init_cpreg_list(ARMCPU *cpu); ++void destroy_cpreg_list(ARMCPU *cpu); + + void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu); + void arm_translate_init(void); + + void arm_restore_state_to_opc(CPUState *cs, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 03ce1e7525..9c3a35d63a 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -647,6 +647,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + + int kvm_arch_destroy_vcpu(CPUState *cs) + { ++ if (cs->thread_id) { ++ qemu_del_vm_change_state_handler(cs->vmcse); ++ } ++ + return 0; + } + +-- +2.27.0 + diff --git a/target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch b/target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch deleted file mode 100644 index 49c7dc63022ec1196b8c225b1c5291fbbe10e1ad..0000000000000000000000000000000000000000 --- a/target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 427975fbc87c3d999ee4d13b65a95ba496c148d6 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Fri, 29 May 2020 11:02:44 +0800 -Subject: [PATCH] target/arm: Add the kvm_adjvtime vcpu property for Cortex-A72 - -Add the kvm_adjvtime vcpu property for ARM Cortex-A72 cpu model, -so that virtual time adjust will be enabled for it. - -Signed-off-by: Ying Fang - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index b30ca7c9..15f4ee92 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -257,6 +257,9 @@ static void aarch64_a72_initfn(Object *obj) - cpu->gic_vpribits = 5; - cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); -+ if(kvm_enabled()) { -+ kvm_arm_add_vcpu_properties(obj); -+ } - } - - static void aarch64_kunpeng_920_initfn(Object *obj) --- -2.23.0 - diff --git a/target-arm-Adjust-and-validate-mtedesc-sizem1.patch b/target-arm-Adjust-and-validate-mtedesc-sizem1.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2248de01584af6ace6b58bb70c58dd0763801e3 --- /dev/null +++ b/target-arm-Adjust-and-validate-mtedesc-sizem1.patch @@ -0,0 +1,66 @@ +From 19ef3764888b212a63603ac46e88b4cfd99dd7b2 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Wed, 25 Jun 2025 17:24:49 +0800 +Subject: [PATCH] target/arm: Adjust and validate mtedesc sizem1 + +cherry-pick from b12a7671b6099a26ce5d5ab09701f151e21c112c + +When we added SVE_MTEDESC_SHIFT, we effectively limited the +maximum size of MTEDESC. Adjust SIZEM1 to consume the remaining +bits (32 - 10 - 5 - 12 == 5). Assert that the data to be stored +fits within the field (expecting 8 * 4 - 1 == 31, exact fit). + +Cc: qemu-stable@nongnu.org +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Tested-by: Gustavo Romero +Message-id: 20240207025210.8837-4-richard.henderson@linaro.org +Signed-off-by: Peter Maydell +Signed-off-by: gubin +--- + target/arm/internals.h | 2 +- + target/arm/tcg/translate-sve.c | 7 ++++--- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 20b9c1da38..ed9bfb29c8 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -1265,7 +1265,7 @@ FIELD(MTEDESC, TBI, 4, 2) + FIELD(MTEDESC, TCMA, 6, 2) + FIELD(MTEDESC, WRITE, 8, 1) + FIELD(MTEDESC, ALIGN, 9, 3) +-FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - 12) /* size - 1 */ ++FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - SVE_MTEDESC_SHIFT - 12) /* size - 1 */ + + bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); + uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); +diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c +index 1d8e0d29bf..1b722ae75d 100644 +--- a/target/arm/tcg/translate-sve.c ++++ b/target/arm/tcg/translate-sve.c +@@ -4457,17 +4457,18 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, + { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr t_pg; ++ uint32_t sizem1; + int desc = 0; + + assert(mte_n >= 1 && mte_n <= 4); ++ sizem1 = (mte_n << dtype_msz(dtype)) - 1; ++ assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); + if (s->mte_active[0]) { +- int msz = dtype_msz(dtype); +- + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); +- desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); ++ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); + desc <<= SVE_MTEDESC_SHIFT; + } else { + addr = clean_data_tbi(s, addr); +-- +2.33.0 + diff --git a/target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch b/target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch deleted file mode 100644 index 81ad2961b00130a741079e0f38c56b7dffdcf803..0000000000000000000000000000000000000000 --- a/target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch +++ /dev/null @@ -1,160 +0,0 @@ -From 79a60f0eeb56faf5d162ca566d1cd9988c3e4d60 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Thu, 6 Aug 2020 16:14:40 +0800 -Subject: [PATCH 4/9] target/arm: Allow ID registers to synchronize to KVM - -There are 2 steps to synchronize the values of system registers from -CPU state to KVM: -1. write to the values of system registers from CPU state to - (index,value) list by write_cpustate_to_list; -2. write the values in (index,value) list to KVM by - write_list_to_kvmstate; - -In step 1, the values of constant system registers are not allowed to -write to (index,value) list. However, a constant system register is -CONSTANT for guest but not for QEMU, which means, QEMU can set/modify -the value of constant system registers that is different from phsical -registers when startup. But if KVM is enabled, guest can not read the -values of the system registers which QEMU set unless they can be written -to (index,value) list. And why not try to write to KVM if kvm_sync is -true? - -At the moment we call write_cpustate_to_list, all ID registers are -contant, including ID_PFR1_EL1 and ID_AA64PFR0_EL1 because GIC has been -initialized. Hence, let's give all ID registers a chance to write to -KVM. If the write is successful, then write to (index,value) list. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/helper.c | 31 ++++++++++++++++++++----------- - target/arm/kvm.c | 38 ++++++++++++++++++++++++++++++++++++++ - target/arm/kvm_arm.h | 3 +++ - 3 files changed, 61 insertions(+), 11 deletions(-) - -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 459af431..97b6b861 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -32,6 +32,7 @@ - #include "arm_ldst.h" - #include "exec/cpu_ldst.h" - #endif -+#include "kvm_arm.h" - - #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ - -@@ -267,30 +268,38 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) - ok = false; - continue; - } -- if (ri->type & ARM_CP_NO_RAW) { -+ /* -+ * (Op0, Op1, CRn, CRm, Op2) of ID registers is (3, 0, 0, crm, op2), -+ * where 1<=crm<8, 0<=op2<8. Let's give ID registers a chance to -+ * synchronize to kvm. -+ */ -+ if ((ri->type & ARM_CP_NO_RAW) && !(kvm_sync && -+ ri->opc0 == 3 && ri->opc1 == 0 && ri->crn == 0 && ri->crm > 0)) { - continue; - } - - newval = read_raw_cp_reg(&cpu->env, ri); - if (kvm_sync) { -- /* -- * Only sync if the previous list->cpustate sync succeeded. -- * Rather than tracking the success/failure state for every -- * item in the list, we just recheck "does the raw write we must -- * have made in write_list_to_cpustate() read back OK" here. -- */ -- uint64_t oldval = cpu->cpreg_values[i]; -+ /* Only sync if we can sync to KVM successfully. */ -+ uint64_t oldval; -+ uint64_t kvmval; - -+ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &oldval)) { -+ continue; -+ } - if (oldval == newval) { - continue; - } - -- write_raw_cp_reg(&cpu->env, ri, oldval); -- if (read_raw_cp_reg(&cpu->env, ri) != oldval) { -+ if (kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &newval)) { -+ continue; -+ } -+ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &kvmval) || -+ kvmval != newval) { - continue; - } - -- write_raw_cp_reg(&cpu->env, ri, newval); -+ kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); - } - cpu->cpreg_values[i] = newval; - } -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 4f131f68..229b17ce 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -457,6 +457,44 @@ out: - return ret; - } - -+int kvm_arm_get_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *target) -+{ -+ uint32_t v32; -+ int ret; -+ -+ switch (regidx & KVM_REG_SIZE_MASK) { -+ case KVM_REG_SIZE_U32: -+ ret = kvm_get_one_reg(CPU(cpu), regidx, &v32); -+ if (ret == 0) { -+ *target = v32; -+ } -+ return ret; -+ case KVM_REG_SIZE_U64: -+ return kvm_get_one_reg(CPU(cpu), regidx, target); -+ default: -+ return -1; -+ } -+} -+ -+int kvm_arm_set_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *source) -+{ -+ uint32_t v32; -+ -+ switch (regidx & KVM_REG_SIZE_MASK) { -+ case KVM_REG_SIZE_U32: -+ v32 = *source; -+ if (v32 != *source) { -+ error_report("the value of source is too large"); -+ return -1; -+ } -+ return kvm_set_one_reg(CPU(cpu), regidx, &v32); -+ case KVM_REG_SIZE_U64: -+ return kvm_set_one_reg(CPU(cpu), regidx, source); -+ default: -+ return -1; -+ } -+} -+ - bool write_kvmstate_to_list(ARMCPU *cpu) - { - CPUState *cs = CPU(cpu); -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 0de5f83e..9b7104d6 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -400,4 +400,7 @@ static inline const char *its_class_name(void) - } - } - -+int kvm_arm_get_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *target); -+int kvm_arm_set_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *source); -+ - #endif --- -2.25.1 - diff --git a/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch b/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch deleted file mode 100644 index ca4b796b58600aa35771d26a247690dfca413cc9..0000000000000000000000000000000000000000 --- a/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch +++ /dev/null @@ -1,41 +0,0 @@ -From cdc6896659b85f7ed8f7552850312e55170de0c5 Mon Sep 17 00:00:00 2001 -From: Christophe Lyon -Date: Fri, 25 Oct 2019 11:57:11 +0200 -Subject: [PATCH] target/arm: Allow reading flags from FPSCR for M-profile - -rt==15 is a special case when reading the flags: it means the -destination is APSR. This patch avoids rejecting -vmrs apsr_nzcv, fpscr -as illegal instruction. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Christophe Lyon -Message-id: 20191025095711.10853-1-christophe.lyon@linaro.org -[PMM: updated the comment] -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell -(cherry picked from commit 2529ab43b8a05534494704e803e0332d111d8b91) -Signed-off-by: Michael Roth ---- - target/arm/translate-vfp.inc.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c -index ef45cecbea..75406fd9db 100644 ---- a/target/arm/translate-vfp.inc.c -+++ b/target/arm/translate-vfp.inc.c -@@ -704,9 +704,10 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) - if (arm_dc_feature(s, ARM_FEATURE_M)) { - /* - * The only M-profile VFP vmrs/vmsr sysreg is FPSCR. -- * Writes to R15 are UNPREDICTABLE; we choose to undef. -+ * Accesses to R15 are UNPREDICTABLE; we choose to undef. -+ * (FPSCR -> r15 is a special case which writes to the PSR flags.) - */ -- if (a->rt == 15 || a->reg != ARM_VFP_FPSCR) { -+ if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) { - return false; - } - } --- -2.23.0 diff --git a/target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch b/target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch new file mode 100644 index 0000000000000000000000000000000000000000..e001b1d68f9ca34a695d209614ee101fca2109c6 --- /dev/null +++ b/target-arm-Avoid-shifts-by-1-in-tszimm_shr-and-tszim.patch @@ -0,0 +1,66 @@ +From 7810c5462cc56c92f50ecf3878525c15000212f6 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Fri, 22 Nov 2024 18:02:26 +0800 +Subject: [PATCH] target/arm: Avoid shifts by -1 in tszimm_shr() and + tszimm_shl() + +cherry-pick from 76916dfa89e8900639c1055c07a295c06628a0bc + +The function tszimm_esz() returns a shift amount, or possibly -1 in +certain cases that correspond to unallocated encodings in the +instruction set. We catch these later in the trans_ functions +(generally with an "a-esz < 0" check), but before we do the +decodetree-generated code will also call tszimm_shr() or tszimm_sl(), +which will use the tszimm_esz() return value as a shift count without +checking that it is not negative, which is undefined behaviour. + +Avoid the UB by checking the return value in tszimm_shr() and +tszimm_shl(). + +Cc: qemu-stable@nongnu.org +Resolves: Coverity CID 1547617, 1547694 +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240722172957.1041231-4-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/tcg/translate-sve.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c +index 296e7d1ce2..dd0c633897 100644 +--- a/target/arm/tcg/translate-sve.c ++++ b/target/arm/tcg/translate-sve.c +@@ -50,13 +50,27 @@ static int tszimm_esz(DisasContext *s, int x) + + static int tszimm_shr(DisasContext *s, int x) + { +- return (16 << tszimm_esz(s, x)) - x; ++ /* ++ * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the ++ * trans function will check for esz < 0), so we can return any ++ * value we like from here in that case as long as we avoid UB. ++ */ ++ int esz = tszimm_esz(s, x); ++ if (esz < 0) { ++ return esz; ++ } ++ return (16 << esz) - x; + } + + /* See e.g. LSL (immediate, predicated). */ + static int tszimm_shl(DisasContext *s, int x) + { +- return x - (8 << tszimm_esz(s, x)); ++ /* As with tszimm_shr(), value will be unused if esz < 0 */ ++ int esz = tszimm_esz(s, x); ++ if (esz < 0) { ++ return esz; ++ } ++ return x - (8 << esz); + } + + /* The SH bit is in bit 8. Extract the low 8 and shift. */ +-- +2.41.0.windows.1 + diff --git a/target-arm-Change-arm_cpu_mp_affinity-when-enabled-I.patch b/target-arm-Change-arm_cpu_mp_affinity-when-enabled-I.patch new file mode 100644 index 0000000000000000000000000000000000000000..db5b4648551d34927dbac0ec252a9765cdd01bda --- /dev/null +++ b/target-arm-Change-arm_cpu_mp_affinity-when-enabled-I.patch @@ -0,0 +1,70 @@ +From 33aa02dc05bed8316b1c64131e8269f404287598 Mon Sep 17 00:00:00 2001 +From: Xiang Chen +Date: Tue, 15 Apr 2025 20:10:50 +0800 +Subject: [PATCH] target/arm: Change arm_cpu_mp_affinity when enabled IPIV + feature + +virt inclusion +category: feature +bugzilla: https://gitee.com/openeuler/qemu/issues/IC1EV7 + +--------------------------------------------------------------- + +Before IPIV feature, it gets mpidr from vcpu id, but after +the feature, we need to know whether IPIV is enabled. + +Signed-off-by: Xiang Chen +--- + linux-headers/linux/kvm.h | 2 ++ + target/arm/cpu.c | 22 +++++++++++++++++++--- + 2 files changed, 21 insertions(+), 3 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index b94c5fd90f..a9d407eace 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1205,6 +1205,8 @@ struct kvm_ppc_resize_hpt { + + #define KVM_CAP_SEV_ES_GHCB 500 + #define KVM_CAP_HYGON_COCO_EXT 501 ++ ++#define KVM_CAP_ARM_IPIV_MODE 503 + /* support userspace to request firmware to build CSV3 guest's memory space */ + #define KVM_CAP_HYGON_COCO_EXT_CSV3_SET_PRIV_MEM (1 << 0) + /* support request to update CSV3 guest's memory region multiple times */ +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 09d391bd34..b0f70de018 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1324,9 +1324,25 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) + + uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) + { +- uint32_t Aff1 = idx / clustersz; +- uint32_t Aff0 = idx % clustersz; +- return (Aff1 << ARM_AFF1_SHIFT) | Aff0; ++ uint64_t Aff0 = 0, Aff1 = 0, Aff2 = 0, Aff3 = 0; ++ int mode; ++ ++ if (!kvm_enabled()) { ++ Aff1 = idx / clustersz; ++ Aff0 = idx % clustersz; ++ return (Aff1 << ARM_AFF1_SHIFT) | Aff0; ++ } ++ ++ mode = kvm_check_extension(kvm_state, KVM_CAP_ARM_IPIV_MODE); ++ if (mode) { ++ Aff1 = idx % 16; ++ Aff2 = idx / 16; ++ } else { ++ Aff1 = idx / clustersz; ++ Aff0 = idx % clustersz; ++ } ++ return (Aff3 << ARM_AFF3_SHIFT) | (Aff2 << ARM_AFF2_SHIFT) | ++ (Aff1 << ARM_AFF1_SHIFT) | Aff0; + } + + static void arm_cpu_initfn(Object *obj) +-- +2.41.0.windows.1 + diff --git a/target-arm-Clear-high-SVE-elements-in-handle_vec_sim.patch b/target-arm-Clear-high-SVE-elements-in-handle_vec_sim.patch new file mode 100644 index 0000000000000000000000000000000000000000..a3a17d2efa4928525046b1df4d0ac6ef82b1f4ea --- /dev/null +++ b/target-arm-Clear-high-SVE-elements-in-handle_vec_sim.patch @@ -0,0 +1,34 @@ +From 87ff72f354301147e35009dabdb8be68e9dfa30c Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Tue, 13 Aug 2024 11:42:49 +0100 +Subject: [PATCH] target/arm: Clear high SVE elements in handle_vec_simd_wshli + +AdvSIMD instructions are supposed to zero bits beyond 128. +Affects SSHLL, USHLL, SSHLL2, USHLL2. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Richard Henderson +Message-id: 20240717060903.205098-15-richard.henderson@linaro.org +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit 8e0c9a9efa21a16190cbac288e414bbf1d80f639) +Signed-off-by: zhujun2 +--- + target/arm/tcg/translate-a64.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c +index a2e49c39f9..5560a53630 100644 +--- a/target/arm/tcg/translate-a64.c ++++ b/target/arm/tcg/translate-a64.c +@@ -10141,6 +10141,7 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, + tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); + write_vec_element(s, tcg_rd, rd, i, size + 1); + } ++ clear_vec_high(s, true, rd); + } + + /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ +-- +2.41.0.windows.1 + diff --git a/target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch b/target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch deleted file mode 100644 index bfcce54936d4cb8c8ca1de997a6d0d469dab3bc1..0000000000000000000000000000000000000000 --- a/target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch +++ /dev/null @@ -1,248 +0,0 @@ -From 2eded1a4deeb5dd8d28414e54948bcf773f6b540 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:03 +0000 -Subject: [PATCH 05/13] target/arm: Define an aa32_pmu_8_1 isar feature test - function - -Instead of open-coding a check on the ID_DFR0 PerfMon ID register -field, create a standardly-named isar_feature for "does AArch32 have -a v8.1 PMUv3" and use it. - -This entails moving the id_dfr0 field into the ARMISARegisters struct. - -Reviewed-by: Richard Henderson -Signed-off-by: Peter Maydell -Message-id: 20200214175116.9164-9-peter.maydell@linaro.org ---- - hw/intc/armv7m_nvic.c | 2 +- - target/arm/cpu.c | 26 +++++++++++++------------- - target/arm/cpu.h | 9 ++++++++- - target/arm/cpu64.c | 6 +++--- - target/arm/helper.c | 5 ++--- - 5 files changed, 27 insertions(+), 21 deletions(-) - -diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c -index 9f8f0d3f..0741db7b 100644 ---- a/hw/intc/armv7m_nvic.c -+++ b/hw/intc/armv7m_nvic.c -@@ -1223,7 +1223,7 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) - case 0xd44: /* PFR1. */ - return cpu->id_pfr1; - case 0xd48: /* DFR0. */ -- return cpu->id_dfr0; -+ return cpu->isar.id_dfr0; - case 0xd4c: /* AFR0. */ - return cpu->id_afr0; - case 0xd50: /* MMFR0. */ -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 6ad211b1..7e9b85a2 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1523,7 +1523,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - #endif - } else { - cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); -- cpu->id_dfr0 = FIELD_DP32(cpu->id_dfr0, ID_DFR0, PERFMON, 0); -+ cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); - cpu->pmceid0 = 0; - cpu->pmceid1 = 0; - } -@@ -1761,7 +1761,7 @@ static void arm1136_r2_initfn(Object *obj) - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; -- cpu->id_dfr0 = 0x2; -+ cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->id_mmfr0 = 0x01130003; - cpu->id_mmfr1 = 0x10030302; -@@ -1793,7 +1793,7 @@ static void arm1136_initfn(Object *obj) - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; -- cpu->id_dfr0 = 0x2; -+ cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->id_mmfr0 = 0x01130003; - cpu->id_mmfr1 = 0x10030302; -@@ -1826,7 +1826,7 @@ static void arm1176_initfn(Object *obj) - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x11; -- cpu->id_dfr0 = 0x33; -+ cpu->isar.id_dfr0 = 0x33; - cpu->id_afr0 = 0; - cpu->id_mmfr0 = 0x01130003; - cpu->id_mmfr1 = 0x10030302; -@@ -1856,7 +1856,7 @@ static void arm11mpcore_initfn(Object *obj) - cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; -- cpu->id_dfr0 = 0; -+ cpu->isar.id_dfr0 = 0; - cpu->id_afr0 = 0x2; - cpu->id_mmfr0 = 0x01100103; - cpu->id_mmfr1 = 0x10020302; -@@ -1888,7 +1888,7 @@ static void cortex_m3_initfn(Object *obj) - cpu->pmsav7_dregion = 8; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000200; -- cpu->id_dfr0 = 0x00100000; -+ cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x00000030; - cpu->id_mmfr1 = 0x00000000; -@@ -1919,7 +1919,7 @@ static void cortex_m4_initfn(Object *obj) - cpu->isar.mvfr2 = 0x00000000; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000200; -- cpu->id_dfr0 = 0x00100000; -+ cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x00000030; - cpu->id_mmfr1 = 0x00000000; -@@ -1952,7 +1952,7 @@ static void cortex_m33_initfn(Object *obj) - cpu->isar.mvfr2 = 0x00000040; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000210; -- cpu->id_dfr0 = 0x00200000; -+ cpu->isar.id_dfr0 = 0x00200000; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x00101F40; - cpu->id_mmfr1 = 0x00000000; -@@ -2003,7 +2003,7 @@ static void cortex_r5_initfn(Object *obj) - cpu->midr = 0x411fc153; /* r1p3 */ - cpu->id_pfr0 = 0x0131; - cpu->id_pfr1 = 0x001; -- cpu->id_dfr0 = 0x010400; -+ cpu->isar.id_dfr0 = 0x010400; - cpu->id_afr0 = 0x0; - cpu->id_mmfr0 = 0x0210030; - cpu->id_mmfr1 = 0x00000000; -@@ -2058,7 +2058,7 @@ static void cortex_a8_initfn(Object *obj) - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x1031; - cpu->id_pfr1 = 0x11; -- cpu->id_dfr0 = 0x400; -+ cpu->isar.id_dfr0 = 0x400; - cpu->id_afr0 = 0; - cpu->id_mmfr0 = 0x31100003; - cpu->id_mmfr1 = 0x20000000; -@@ -2131,7 +2131,7 @@ static void cortex_a9_initfn(Object *obj) - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x1031; - cpu->id_pfr1 = 0x11; -- cpu->id_dfr0 = 0x000; -+ cpu->isar.id_dfr0 = 0x000; - cpu->id_afr0 = 0; - cpu->id_mmfr0 = 0x00100103; - cpu->id_mmfr1 = 0x20000000; -@@ -2196,7 +2196,7 @@ static void cortex_a7_initfn(Object *obj) - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x00001131; - cpu->id_pfr1 = 0x00011011; -- cpu->id_dfr0 = 0x02010555; -+ cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10101105; - cpu->id_mmfr1 = 0x40000000; -@@ -2242,7 +2242,7 @@ static void cortex_a15_initfn(Object *obj) - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x00001131; - cpu->id_pfr1 = 0x00011011; -- cpu->id_dfr0 = 0x02010555; -+ cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10201105; - cpu->id_mmfr1 = 0x20000000; -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 91cc02b4..2d8d27e8 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -860,6 +860,7 @@ struct ARMCPU { - uint32_t mvfr0; - uint32_t mvfr1; - uint32_t mvfr2; -+ uint32_t id_dfr0; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; - uint64_t id_aa64pfr0; -@@ -875,7 +876,6 @@ struct ARMCPU { - uint32_t reset_sctlr; - uint32_t id_pfr0; - uint32_t id_pfr1; -- uint32_t id_dfr0; - uint64_t pmceid0; - uint64_t pmceid1; - uint32_t id_afr0; -@@ -3491,6 +3491,13 @@ static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) - return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2; - } - -+static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) -+{ -+ /* 0xf means "non-standard IMPDEF PMU" */ -+ return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && -+ FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; -+} -+ - /* - * 64-bit feature tests via id registers. - */ -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 15f4ee92..afdabbeb 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -123,7 +123,7 @@ static void aarch64_a57_initfn(Object *obj) - cpu->reset_sctlr = 0x00c50838; - cpu->id_pfr0 = 0x00000131; - cpu->id_pfr1 = 0x00011011; -- cpu->id_dfr0 = 0x03010066; -+ cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10101105; - cpu->id_mmfr1 = 0x40000000; -@@ -177,7 +177,7 @@ static void aarch64_a53_initfn(Object *obj) - cpu->reset_sctlr = 0x00c50838; - cpu->id_pfr0 = 0x00000131; - cpu->id_pfr1 = 0x00011011; -- cpu->id_dfr0 = 0x03010066; -+ cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10101105; - cpu->id_mmfr1 = 0x40000000; -@@ -231,7 +231,7 @@ static void aarch64_a72_initfn(Object *obj) - cpu->reset_sctlr = 0x00c50838; - cpu->id_pfr0 = 0x00000131; - cpu->id_pfr1 = 0x00011011; -- cpu->id_dfr0 = 0x03010066; -+ cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->id_mmfr0 = 0x10201105; - cpu->id_mmfr1 = 0x40000000; -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 419be640..3f06ca19 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -5907,7 +5907,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_dfr0 }, -+ .resetvalue = cpu->isar.id_dfr0 }, - { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -6050,8 +6050,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) - } else { - define_arm_cp_regs(cpu, not_v7_cp_reginfo); - } -- if (FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) >= 4 && -- FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) != 0xf) { -+ if (cpu_isar_feature(aa32_pmu_8_1, cpu)) { - ARMCPRegInfo v81_pmu_regs[] = { - { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, --- -2.25.1 - diff --git a/target-arm-Disable-SVE-extensions-when-SVE-is-disabl.patch b/target-arm-Disable-SVE-extensions-when-SVE-is-disabl.patch new file mode 100644 index 0000000000000000000000000000000000000000..6fd4fec39b8db4d9359a0986a7dba8ee7d8848c9 --- /dev/null +++ b/target-arm-Disable-SVE-extensions-when-SVE-is-disabl.patch @@ -0,0 +1,39 @@ +From a113ddc33b432c8b4d21160dccb54ba19580ab01 Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 11:22:56 +0800 +Subject: [PATCH] target/arm: Disable SVE extensions when SVE is disabled + +cherry picked from commit daf9748ac002ec35258e5986b6257961fd04b565 + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2304 +Reported-by: Marcin Juszkiewicz +Signed-off-by: Richard Henderson +Signed-off-by: Marcin Juszkiewicz +Message-id: 20240526204551.553282-1-richard.henderson@linaro.org +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +Signed-off-by: Gao Jiazhen +--- + target/arm/cpu64.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 5d28838175..6eca55ac29 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -110,6 +110,11 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + */ + if (!cpu_isar_feature(aa64_sve, cpu)) { + /* SVE is disabled and so are all vector lengths. Good. */ ++ /* ++ * SVE is disabled and so are all vector lengths. Good. ++ * Disable all SVE extensions as well. ++ */ ++ cpu->isar.id_aa64zfr0 = 0; + return; + } + +-- +2.41.0.windows.1 + diff --git a/target-arm-Don-t-abort-on-M-profile-exception-return.patch b/target-arm-Don-t-abort-on-M-profile-exception-return.patch deleted file mode 100644 index b6796e25b8b04c76a117ad129cb807a0da93da45..0000000000000000000000000000000000000000 --- a/target-arm-Don-t-abort-on-M-profile-exception-return.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 9027d3fba605d8f6093342ebe4a1da450d374630 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Thu, 22 Aug 2019 14:15:34 +0100 -Subject: [PATCH] target/arm: Don't abort on M-profile exception return in - linux-user mode - -An attempt to do an exception-return (branch to one of the magic -addresses) in linux-user mode for M-profile should behave like -a normal branch, because linux-user mode is always going to be -in 'handler' mode. This used to work, but we broke it when we added -support for the M-profile security extension in commit d02a8698d7ae2bfed. - -In that commit we allowed even handler-mode calls to magic return -values to be checked for and dealt with by causing an -EXCP_EXCEPTION_EXIT exception to be taken, because this is -needed for the FNC_RETURN return-from-non-secure-function-call -handling. For system mode we added a check in do_v7m_exception_exit() -to make any spurious calls from Handler mode behave correctly, but -forgot that linux-user mode would also be affected. - -How an attempted return-from-non-secure-function-call in linux-user -mode should be handled is not clear -- on real hardware it would -result in return to secure code (not to the Linux kernel) which -could then handle the error in any way it chose. For QEMU we take -the simple approach of treating this erroneous return the same way -it would be handled on a CPU without the security extensions -- -treat it as a normal branch. - -The upshot of all this is that for linux-user mode we should never -do any of the bx_excret magic, so the code change is simple. - -This ought to be a weird corner case that only affects broken guest -code (because Linux user processes should never be attempting to do -exception returns or NS function returns), except that the code that -assigns addresses in RAM for the process and stack in our linux-user -code does not attempt to avoid this magic address range, so -legitimate code attempting to return to a trampoline routine on the -stack can fall into this case. This change fixes those programs, -but we should also look at restricting the range of memory we -use for M-profile linux-user guests to the area that would be -real RAM in hardware. - -Cc: qemu-stable@nongnu.org -Reported-by: Christophe Lyon -Reviewed-by: Richard Henderson -Signed-off-by: Peter Maydell -Message-id: 20190822131534.16602-1-peter.maydell@linaro.org -Fixes: https://bugs.launchpad.net/qemu/+bug/1840922 -Signed-off-by: Peter Maydell -(cherry picked from commit 5e5584c89f36b302c666bc6db535fd3f7ff35ad2) -Signed-off-by: Michael Roth ---- - target/arm/translate.c | 21 ++++++++++++++++++++- - 1 file changed, 20 insertions(+), 1 deletion(-) - -diff --git a/target/arm/translate.c b/target/arm/translate.c -index 7853462b21..24cb4ba075 100644 ---- a/target/arm/translate.c -+++ b/target/arm/translate.c -@@ -952,10 +952,27 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) - store_cpu_field(var, thumb); - } - --/* Set PC and Thumb state from var. var is marked as dead. -+/* -+ * Set PC and Thumb state from var. var is marked as dead. - * For M-profile CPUs, include logic to detect exception-return - * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC, - * and BX reg, and no others, and happens only for code in Handler mode. -+ * The Security Extension also requires us to check for the FNC_RETURN -+ * which signals a function return from non-secure state; this can happen -+ * in both Handler and Thread mode. -+ * To avoid having to do multiple comparisons in inline generated code, -+ * we make the check we do here loose, so it will match for EXC_RETURN -+ * in Thread mode. For system emulation do_v7m_exception_exit() checks -+ * for these spurious cases and returns without doing anything (giving -+ * the same behaviour as for a branch to a non-magic address). -+ * -+ * In linux-user mode it is unclear what the right behaviour for an -+ * attempted FNC_RETURN should be, because in real hardware this will go -+ * directly to Secure code (ie not the Linux kernel) which will then treat -+ * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN -+ * attempt behave the way it would on a CPU without the security extension, -+ * which is to say "like a normal branch". That means we can simply treat -+ * all branches as normal with no magic address behaviour. - */ - static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) - { -@@ -963,10 +980,12 @@ static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) - * s->base.is_jmp that we need to do the rest of the work later. - */ - gen_bx(s, var); -+#ifndef CONFIG_USER_ONLY - if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) || - (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) { - s->base.is_jmp = DISAS_BX_EXCRET; - } -+#endif - } - - static inline void gen_bx_excret_final_code(DisasContext *s) --- -2.23.0 diff --git a/target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch b/target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch new file mode 100644 index 0000000000000000000000000000000000000000..6f5fab7af9a85bceba55850c0f1acc6b4d01ec88 --- /dev/null +++ b/target-arm-Don-t-assert-for-128-bit-tile-accesses-wh.patch @@ -0,0 +1,61 @@ +From 9e0b6c4df61aced66c5b3ee9ca93c6ac33868dc0 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 28 Nov 2024 14:06:44 +0800 +Subject: [PATCH] target/arm: Don't assert for 128-bit tile accesses when SVL + is 128 + +cherry-pick from 56f1c0db928aae0b83fd91c89ddb226b137e2b21 + +For an instruction which accesses a 128-bit element tile when +the SVL is also 128 (for example MOV z0.Q, p0/M, ZA0H.Q[w0,0]), +we will assert in get_tile_rowcol(): + +qemu-system-aarch64: ../../tcg/tcg-op.c:926: tcg_gen_deposit_z_i32: Assertion `len > 0' failed. + +This happens because we calculate + len = ctz32(streaming_vec_reg_size(s)) - esz;$ +but if the SVL and the element size are the same len is 0, and +the deposit operation asserts. + +In this case the ZA storage contains exactly one 128 bit +element ZA tile, and the horizontal or vertical slice is just +that tile. This means that regardless of the index value in +the Ws register, we always access that tile. (In pseudocode terms, +we calculate (index + offset) MOD 1, which is 0.) + +Special case the len == 0 case to avoid hitting the assertion +in tcg_gen_deposit_z_i32(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240722172957.1041231-2-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/tcg/translate-sme.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c +index 8f0dfc884e..1e89516736 100644 +--- a/target/arm/tcg/translate-sme.c ++++ b/target/arm/tcg/translate-sme.c +@@ -49,7 +49,15 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, + /* Prepare a power-of-two modulo via extraction of @len bits. */ + len = ctz32(streaming_vec_reg_size(s)) - esz; + +- if (vertical) { ++ if (!len) { ++ /* ++ * SVL is 128 and the element size is 128. There is exactly ++ * one 128x128 tile in the ZA storage, and so we calculate ++ * (Rs + imm) MOD 1, which is always 0. We need to special case ++ * this because TCG doesn't allow deposit ops with len 0. ++ */ ++ tcg_gen_movi_i32(tmp, 0); ++ } else if (vertical) { + /* + * Compute the byte offset of the index within the tile: + * (index % (svl / size)) * size +-- +2.41.0.windows.1 + diff --git a/target-arm-Don-t-get-MDCR_EL2-in-pmu_counter_enabled.patch b/target-arm-Don-t-get-MDCR_EL2-in-pmu_counter_enabled.patch new file mode 100644 index 0000000000000000000000000000000000000000..6187d572a8f2234e352098dff9dffe3e6f006c61 --- /dev/null +++ b/target-arm-Don-t-get-MDCR_EL2-in-pmu_counter_enabled.patch @@ -0,0 +1,68 @@ +From 42a30e10bada5f034b0b2bfe8760482c972a4e61 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 28 Nov 2024 14:14:21 +0800 +Subject: [PATCH] target/arm: Don't get MDCR_EL2 in pmu_counter_enabled() + before checking ARM_FEATURE_PMU +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from ac1d88e9e7ca0bed83e91e07ce6d0597f10cc77d + +It doesn't make sense to read the value of MDCR_EL2 on a non-A-profile +CPU, and in fact if you try to do it we will assert: + + (assertion=0x5555565a8c70 "!arm_feature(env, ARM_FEATURE_M)", file=0x5555565a6e5c "../../target/arm/helper.c", line=12600, function=0x5555565a9560 <__PRETTY_FUNCTION__.0> "arm_security_space_below_el3") at ./assert/assert.c:101 + +We might call pmu_counter_enabled() on an M-profile CPU (for example +from the migration pre/post hooks in machine.c); this should always +return false because these CPUs don't set ARM_FEATURE_PMU. + +Avoid the assertion by not calling arm_mdcr_el2_eff() before we +have done the early return for "PMU not present". + +This fixes an assertion failure if you try to do a loadvm or +savevm for an M-profile board. + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2155 +Signed-off-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Message-id: 20240208153346.970021-1-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/helper.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 793aa89cc6..762eb086c5 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -1182,13 +1182,21 @@ static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) + bool enabled, prohibited = false, filtered; + bool secure = arm_is_secure(env); + int el = arm_current_el(env); +- uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); +- uint8_t hpmn = mdcr_el2 & MDCR_HPMN; ++ uint64_t mdcr_el2; ++ uint8_t hpmn; + ++ /* ++ * We might be called for M-profile cores where MDCR_EL2 doesn't ++ * exist and arm_mdcr_el2_eff() will assert, so this early-exit check ++ * must be before we read that value. ++ */ + if (!arm_feature(env, ARM_FEATURE_PMU)) { + return false; + } + ++ mdcr_el2 = arm_mdcr_el2_eff(env); ++ hpmn = mdcr_el2 & MDCR_HPMN; ++ + if (!arm_feature(env, ARM_FEATURE_EL2) || + (counter < hpmn || counter == 31)) { + e = env->cp15.c9_pmcr & PMCRE; +-- +2.41.0.windows.1 + diff --git a/target-arm-Drop-user-only-special-case-in-sve_stN_r.patch b/target-arm-Drop-user-only-special-case-in-sve_stN_r.patch new file mode 100644 index 0000000000000000000000000000000000000000..6d619d894c8bd396c774eda8ac0142d433efd2b9 --- /dev/null +++ b/target-arm-Drop-user-only-special-case-in-sve_stN_r.patch @@ -0,0 +1,43 @@ +From 1475170931ea2979a150fe4c1d3fc6b649eb3a6e Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Tue, 12 Nov 2024 06:12:32 -0800 +Subject: [PATCH] target/arm: Drop user-only special case in sve_stN_r + +This path is reachable with plugins enabled, and provoked +with run-plugin-catch-syscalls-with-libinline.so. + +Cc: qemu-stable@nongnu.org +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Message-ID: <20241112141232.321354-1-richard.henderson@linaro.org> +(cherry picked from commit f27550804688da43c6e0d87b2f9e143adbf76271) +Signed-off-by: zhujun2 +--- + target/arm/tcg/sve_helper.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c +index f006d152cc..ce8134320b 100644 +--- a/target/arm/tcg/sve_helper.c ++++ b/target/arm/tcg/sve_helper.c +@@ -6306,9 +6306,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +-#ifdef CONFIG_USER_ONLY +- g_assert_not_reached(); +-#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, +@@ -6339,7 +6336,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, + } while (reg_off & 63); + } while (reg_off <= reg_last); + return; +-#endif + } + + mem_off = info.mem_off_first[0]; +-- +2.41.0.windows.1 + diff --git a/target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch b/target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch deleted file mode 100644 index d6e82fae2dc02954d37eb723f930bccd28b70618..0000000000000000000000000000000000000000 --- a/target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 69eedbfc873ded9bf35439b813e9f6a7431dc727 Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 8 Feb 2020 12:58:12 +0000 -Subject: [PATCH 09/13] target/arm: Enable ARMv8.2-ATS1E1 in -cpu max - -This includes enablement of ARMv8.1-PAN. - -Reviewed-by: Peter Maydell -Signed-off-by: Richard Henderson -Message-id: 20200208125816.14954-17-richard.henderson@linaro.org -Signed-off-by: Peter Maydell ---- - target/arm/cpu.c | 4 ++++ - target/arm/cpu64.c | 5 +++++ - 2 files changed, 9 insertions(+) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index a23c71db..119bd275 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2484,6 +2484,10 @@ static void arm_max_initfn(Object *obj) - t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ - cpu->isar.mvfr2 = t; - -+ t = cpu->id_mmfr3; -+ t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ -+ cpu->id_mmfr3 = t; -+ - t = cpu->id_mmfr4; - t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ - cpu->id_mmfr4 = t; -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 7ad8b5e2..a0d07fd7 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -362,6 +362,7 @@ static void aarch64_max_initfn(Object *obj) - t = cpu->isar.id_aa64mmfr1; - t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* HPD */ - t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); -+ t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */ - cpu->isar.id_aa64mmfr1 = t; - - /* Replicate the same data to the 32-bit id registers. */ -@@ -382,6 +383,10 @@ static void aarch64_max_initfn(Object *obj) - u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); - cpu->isar.id_isar6 = u; - -+ u = cpu->id_mmfr3; -+ u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ -+ cpu->id_mmfr3 = u; -+ - /* - * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, - * so do not set MVFR1.FPHP. Strictly speaking this is not legal, --- -2.25.1 - diff --git a/target-arm-Fix-A64-scalar-SQSHRN-and-SQRSHRN.patch b/target-arm-Fix-A64-scalar-SQSHRN-and-SQRSHRN.patch new file mode 100644 index 0000000000000000000000000000000000000000..e8a1081b79c10a7610781177b91c37b5667e69aa --- /dev/null +++ b/target-arm-Fix-A64-scalar-SQSHRN-and-SQRSHRN.patch @@ -0,0 +1,53 @@ +From fe9725eed4d9be8e14d2c3865f1d7d5f24cbdd73 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 28 Nov 2024 14:21:15 +0800 +Subject: [PATCH] target/arm: Fix A64 scalar SQSHRN and SQRSHRN + +cherry-pick from 6fffc8378562c7fea6290c430b4f653f830a4c1a + +In commit 1b7bc9b5c8bf374dd we changed handle_vec_simd_sqshrn() so +that instead of starting with a 0 value and depositing in each new +element from the narrowing operation, it instead started with the raw +result of the narrowing operation of the first element. + +This is fine in the vector case, because the deposit operations for +the second and subsequent elements will always overwrite any higher +bits that might have been in the first element's result value in +tcg_rd. However in the scalar case we only go through this loop +once. The effect is that for a signed narrowing operation, if the +result is negative then we will now return a value where the bits +above the first element are incorrectly 1 (because the narrowfn +returns a sign-extended result, not one that is truncated to the +element size). + +Fix this by using an extract operation to get exactly the correct +bits of the output of the narrowfn for element 1, instead of a +plain move. + +Cc: qemu-stable@nongnu.org +Fixes: 1b7bc9b5c8bf374dd3 ("target/arm: Avoid tcg_const_ptr in handle_vec_simd_sqshrn") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2089 +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240123153416.877308-1-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/tcg/translate-a64.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c +index 5560a53630..a05182b57f 100644 +--- a/target/arm/tcg/translate-a64.c ++++ b/target/arm/tcg/translate-a64.c +@@ -8221,7 +8221,7 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, + narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd); + tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); + if (i == 0) { +- tcg_gen_mov_i64(tcg_final, tcg_rd); ++ tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize); + } else { + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-FJCVTZS-vs-flush-to-zero.patch b/target-arm-Fix-FJCVTZS-vs-flush-to-zero.patch new file mode 100644 index 0000000000000000000000000000000000000000..9bd34d7cfed7e7891c3663ddd3f5b058f9db74de --- /dev/null +++ b/target-arm-Fix-FJCVTZS-vs-flush-to-zero.patch @@ -0,0 +1,106 @@ +From 148e01eba8041bad93081a19a240034bb8138988 Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Tue, 25 Jun 2024 11:35:26 -0700 +Subject: [PATCH] target/arm: Fix FJCVTZS vs flush-to-zero + +Input denormals cause the Javascript inexact bit +(output to Z) to be set. + +Cc: qemu-stable@nongnu.org +Fixes: 6c1f6f2733a ("target/arm: Implement ARMv8.3-JSConv") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2375 +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Message-id: 20240625183536.1672454-4-richard.henderson@linaro.org +[PMM: fixed hardcoded tab in test case] +Signed-off-by: Peter Maydell +(cherry picked from commit 7619129f0d4a14d918227c5c47ad7433662e9ccc) +Signed-off-by: zhujun2 +--- + target/arm/vfp_helper.c | 18 +++++++++--------- + tests/tcg/aarch64/Makefile.target | 3 ++- + tests/tcg/aarch64/test-2375.c | 21 +++++++++++++++++++++ + 3 files changed, 32 insertions(+), 10 deletions(-) + create mode 100644 tests/tcg/aarch64/test-2375.c + +diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c +index 3e5e37abbe..ff59bc5522 100644 +--- a/target/arm/vfp_helper.c ++++ b/target/arm/vfp_helper.c +@@ -1121,8 +1121,8 @@ const FloatRoundMode arm_rmode_to_sf_map[] = { + uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus) + { + float_status *status = vstatus; +- uint32_t inexact, frac; +- uint32_t e_old, e_new; ++ uint32_t frac, e_old, e_new; ++ bool inexact; + + e_old = get_float_exception_flags(status); + set_float_exception_flags(0, status); +@@ -1130,13 +1130,13 @@ uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus) + e_new = get_float_exception_flags(status); + set_float_exception_flags(e_old | e_new, status); + +- if (value == float64_chs(float64_zero)) { +- /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */ +- inexact = 1; +- } else { +- /* Normal inexact or overflow or NaN */ +- inexact = e_new & (float_flag_inexact | float_flag_invalid); +- } ++ /* Normal inexact, denormal with flush-to-zero, or overflow or NaN */ ++ inexact = e_new & (float_flag_inexact | ++ float_flag_input_denormal | ++ float_flag_invalid); ++ ++ /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */ ++ inexact |= value == float64_chs(float64_zero); + + /* Pack the result and the env->ZF representation of Z together. */ + return deposit64(frac, 32, 32, inexact); +diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target +index cded1d01fc..6d593c6392 100644 +--- a/tests/tcg/aarch64/Makefile.target ++++ b/tests/tcg/aarch64/Makefile.target +@@ -40,8 +40,9 @@ endif + + # Pauth Tests + ifneq ($(CROSS_CC_HAS_ARMV8_3),) +-AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5 ++AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5 test-2375 + pauth-%: CFLAGS += -march=armv8.3-a ++test-2375: CFLAGS += -march=armv8.3-a + run-pauth-1: QEMU_OPTS += -cpu max + run-pauth-2: QEMU_OPTS += -cpu max + # Choose a cpu with FEAT_Pauth but without FEAT_FPAC for pauth-[45]. +diff --git a/tests/tcg/aarch64/test-2375.c b/tests/tcg/aarch64/test-2375.c +new file mode 100644 +index 0000000000..84c7e7de71 +--- /dev/null ++++ b/tests/tcg/aarch64/test-2375.c +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* Copyright (c) 2024 Linaro Ltd */ ++/* See https://gitlab.com/qemu-project/qemu/-/issues/2375 */ ++ ++#include ++ ++int main(void) ++{ ++ int r, z; ++ ++ asm("msr fpcr, %2\n\t" ++ "fjcvtzs %w0, %d3\n\t" ++ "cset %1, eq" ++ : "=r"(r), "=r"(z) ++ : "r"(0x01000000L), /* FZ = 1 */ ++ "w"(0xfcff00L)); /* denormal */ ++ ++ assert(r == 0); ++ assert(z == 0); ++ return 0; ++} +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-PAuth-sbox-functions.patch b/target-arm-Fix-PAuth-sbox-functions.patch deleted file mode 100644 index ac8d05065f766eb8ca90cd00de6a60350c2306c3..0000000000000000000000000000000000000000 --- a/target-arm-Fix-PAuth-sbox-functions.patch +++ /dev/null @@ -1,49 +0,0 @@ -From a7149fc18020c3d432c31838069dcfcb745299bf Mon Sep 17 00:00:00 2001 -From: zhanghailiang -Date: Sat, 20 Jun 2020 12:01:30 +0800 -Subject: [PATCH] target/arm: Fix PAuth sbox functions - -In the PAC computation, sbox was applied over wrong bits. -As this is a 4-bit sbox, bit index should be incremented by 4 instead of 16. - -Test vector from QARMA paper (https://eprint.iacr.org/2016/444.pdf) was -used to verify one computation of the pauth_computepac() function which -uses sbox2. - -Launchpad: https://bugs.launchpad.net/bugs/1859713 -Reviewed-by: Richard Henderson -Signed-off-by: Vincent DEHORS -Signed-off-by: Adrien GRASSEIN -Message-id: 20200116230809.19078-2-richard.henderson@linaro.org -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell -Signed-off-by: zhanghailiang ---- - target/arm/pauth_helper.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c -index d3194f20..0a5f41e1 100644 ---- a/target/arm/pauth_helper.c -+++ b/target/arm/pauth_helper.c -@@ -89,7 +89,7 @@ static uint64_t pac_sub(uint64_t i) - uint64_t o = 0; - int b; - -- for (b = 0; b < 64; b += 16) { -+ for (b = 0; b < 64; b += 4) { - o |= (uint64_t)sub[(i >> b) & 0xf] << b; - } - return o; -@@ -104,7 +104,7 @@ static uint64_t pac_inv_sub(uint64_t i) - uint64_t o = 0; - int b; - -- for (b = 0; b < 64; b += 16) { -+ for (b = 0; b < 64; b += 4) { - o |= (uint64_t)inv_sub[(i >> b) & 0xf] << b; - } - return o; --- -2.23.0 - diff --git a/target-arm-Fix-SVE-SDOT-UDOT-USDOT-4-way-indexed.patch b/target-arm-Fix-SVE-SDOT-UDOT-USDOT-4-way-indexed.patch new file mode 100644 index 0000000000000000000000000000000000000000..676faaa398604c7868e7901c9a6b6a170d1ce039 --- /dev/null +++ b/target-arm-Fix-SVE-SDOT-UDOT-USDOT-4-way-indexed.patch @@ -0,0 +1,73 @@ +From 95f371c36858dd003c0c6a3d4f6ddfbc299dda9f Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Thu, 7 Nov 2024 20:56:18 -0500 +Subject: [PATCH] target/arm: Fix SVE SDOT/UDOT/USDOT (4-way, indexed) + +cheery-pick from e6b2fa1b81ac6b05c4397237c846a295a9857920 + +Our implementation of the indexed version of SVE SDOT/UDOT/USDOT got +the calculation of the inner loop terminator wrong. Although we +correctly account for the element size when we calculate the +terminator for the first iteration: + intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); +we don't do that when we move it forward after the first inner loop +completes. The intention is that we process the vector in 128-bit +segments, which for a 64-bit element size should mean (1, 2), (3, 4), +(5, 6), etc. This bug meant that we would iterate (1, 2), (3, 4, 5, +6), (7, 8, 9, 10) etc and apply the wrong indexed element to some of +the operations, and also index off the end of the vector. + +You don't see this bug if the vector length is small enough that we +don't need to iterate the outer loop, i.e. if it is only 128 bits, +or if it is the 64-bit special case from AA32/AA64 AdvSIMD. If the +vector length is 256 bits then we calculate the right results for the +elements in the vector but do index off the end of the vector. Vector +lengths greater than 256 bits see wrong answers. The instructions +that produce 32-bit results behave correctly. + +Fix the recalculation of 'segend' for subsequent iterations, and +restore a version of the comment that was lost in the refactor of +commit 7020ffd656a5 that explains why we only need to clamp segend to +opr_sz_n for the first iteration, not the later ones. + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2595 +Fixes: 7020ffd656a5 ("target/arm: Macroize helper_gvec_{s,u}dot_idx_{b,h}") +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20241101185544.2130972-1-peter.maydell@linaro.org +Signed-off-by: qihao_yewu +--- + target/arm/tcg/vec_helper.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c +index 1f93510b85..11e874c05a 100644 +--- a/target/arm/tcg/vec_helper.c ++++ b/target/arm/tcg/vec_helper.c +@@ -692,6 +692,13 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ + { \ + intptr_t i = 0, opr_sz = simd_oprsz(desc); \ + intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \ ++ /* \ ++ * Special case: opr_sz == 8 from AA64/AA32 advsimd means the \ ++ * first iteration might not be a full 16 byte segment. But \ ++ * for vector lengths beyond that this must be SVE and we know \ ++ * opr_sz is a multiple of 16, so we need not clamp segend \ ++ * to opr_sz_n when we advance it at the end of the loop. \ ++ */ \ + intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \ + intptr_t index = simd_data(desc); \ + TYPED *d = vd, *a = va; \ +@@ -709,7 +716,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ + n[i * 4 + 2] * m2 + \ + n[i * 4 + 3] * m3); \ + } while (++i < segend); \ +- segend = i + 4; \ ++ segend = i + (16 / sizeof(TYPED)); \ + } while (i < opr_sz_n); \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ + } +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-SVE-SME-gross-MTE-suppression-checks.patch b/target-arm-Fix-SVE-SME-gross-MTE-suppression-checks.patch new file mode 100644 index 0000000000000000000000000000000000000000..dcb32524851c326f9296c560ab551954753b7018 --- /dev/null +++ b/target-arm-Fix-SVE-SME-gross-MTE-suppression-checks.patch @@ -0,0 +1,87 @@ +From b69c9f4b7b72c0634f2353135f83d8e59f3308dd Mon Sep 17 00:00:00 2001 +From: gubin +Date: Tue, 17 Dec 2024 14:42:31 +0800 +Subject: [PATCH] target/arm: Fix SVE/SME gross MTE suppression checks + +cherry-pick from 855f94eca80c85a99f459e36684ea2f98f6a3243 + +The TBI and TCMA bits are located within mtedesc, not desc. + +Cc: qemu-stable@nongnu.org +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Tested-by: Gustavo Romero +Message-id: 20240207025210.8837-7-richard.henderson@linaro.org +Signed-off-by: Peter Maydell +Signed-off-by: gubin +--- + target/arm/tcg/sme_helper.c | 8 ++++---- + target/arm/tcg/sve_helper.c | 12 ++++++------ + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c +index 1ee2690ceb..904bfdac43 100644 +--- a/target/arm/tcg/sme_helper.c ++++ b/target/arm/tcg/sme_helper.c +@@ -573,8 +573,8 @@ void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ +- if (!tbi_check(desc, bit55) || +- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { ++ if (!tbi_check(mtedesc, bit55) || ++ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + +@@ -750,8 +750,8 @@ void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ +- if (!tbi_check(desc, bit55) || +- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { ++ if (!tbi_check(mtedesc, bit55) || ++ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + +diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c +index ce8134320b..9694201550 100644 +--- a/target/arm/tcg/sve_helper.c ++++ b/target/arm/tcg/sve_helper.c +@@ -5800,8 +5800,8 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ +- if (!tbi_check(desc, bit55) || +- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { ++ if (!tbi_check(mtedesc, bit55) || ++ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + +@@ -6156,8 +6156,8 @@ void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ +- if (!tbi_check(desc, bit55) || +- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { ++ if (!tbi_check(mtedesc, bit55) || ++ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + +@@ -6406,8 +6406,8 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ +- if (!tbi_check(desc, bit55) || +- tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { ++ if (!tbi_check(mtedesc, bit55) || ++ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch b/target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch new file mode 100644 index 0000000000000000000000000000000000000000..be6b147ab6490913ee0beebf3842e8e100163e6e --- /dev/null +++ b/target-arm-Fix-UMOPA-UMOPS-of-16-bit-values.patch @@ -0,0 +1,63 @@ +From 07dfcad1b3d9ecbf1afe65d3457a6dbcb31f1b94 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Tue, 17 Dec 2024 14:47:59 +0800 +Subject: [PATCH] target/arm: Fix UMOPA/UMOPS of 16-bit values + +cherry-pick from ea3f5a90f036734522e9af3bffd77e69e9f47355 + +The UMOPA/UMOPS instructions are supposed to multiply unsigned 8 or +16 bit elements and accumulate the products into a 64-bit element. +In the Arm ARM pseudocode, this is done with the usual +infinite-precision signed arithmetic. However our implementation +doesn't quite get it right, because in the DEF_IMOP_64() macro we do: + sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); + +where NTYPE and MTYPE are uint16_t or int16_t. In the uint16_t case, +the C usual arithmetic conversions mean the values are converted to +"int" type and the multiply is done as a 32-bit multiply. This means +that if the inputs are, for example, 0xffff and 0xffff then the +result is 0xFFFE0001 as an int, which is then promoted to uint64_t +for the accumulation into sum; this promotion incorrectly sign +extends the multiply. + +Avoid the incorrect sign extension by casting to int64_t before +the multiply, so we do the multiply as 64-bit signed arithmetic, +which is a type large enough that the multiply can never +overflow into the sign bit. + +(The equivalent 8-bit operations in DEF_IMOP_32() are fine, because +the 8-bit multiplies can never overflow into the sign bit of a +32-bit integer.) + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2372 +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240722172957.1041231-3-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/tcg/sme_helper.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c +index 1ee2690ceb..e94b5335e1 100644 +--- a/target/arm/tcg/sme_helper.c ++++ b/target/arm/tcg/sme_helper.c +@@ -1134,10 +1134,10 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ + uint64_t sum = 0; \ + /* Apply P to N as a mask, making the inactive elements 0. */ \ + n &= expand_pred_h(p); \ +- sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ +- sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ +- sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \ +- sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \ ++ sum += (int64_t)(NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ ++ sum += (int64_t)(NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ ++ sum += (int64_t)(NTYPE)(n >> 32) * (MTYPE)(m >> 32); \ ++ sum += (int64_t)(NTYPE)(n >> 48) * (MTYPE)(m >> 48); \ + return neg ? a - sum : a + sum; \ + } + +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-VCMLA-Dd-Dn-Dm-idx.patch b/target-arm-Fix-VCMLA-Dd-Dn-Dm-idx.patch new file mode 100644 index 0000000000000000000000000000000000000000..17111bb643d22ba2d70f8b6a28292f3b2c39b921 --- /dev/null +++ b/target-arm-Fix-VCMLA-Dd-Dn-Dm-idx.patch @@ -0,0 +1,47 @@ +From cdf914a667f9d0f086329174c24f9623b00b8fb2 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Tue, 17 Dec 2024 14:54:18 +0800 +Subject: [PATCH] target/arm: Fix VCMLA Dd, Dn, Dm[idx] + +cherry-pick from 76bccf3cb9d9383da0128bbc6d1300cddbe3ae8f + +The inner loop, bounded by eltspersegment, must not be +larger than the outer loop, bounded by elements. + +Cc: qemu-stable@nongnu.org +Fixes: 18fc2405781 ("target/arm: Implement SVE fp complex multiply add (indexed)") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2376 +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Message-id: 20240625183536.1672454-2-richard.henderson@linaro.org +Signed-off-by: Peter Maydell +Signed-off-by: gubin +--- + target/arm/tcg/vec_helper.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c +index 11e874c05a..83b49ef009 100644 +--- a/target/arm/tcg/vec_helper.c ++++ b/target/arm/tcg/vec_helper.c +@@ -850,7 +850,7 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, + intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); + uint32_t neg_real = flip ^ neg_imag; + intptr_t elements = opr_sz / sizeof(float16); +- intptr_t eltspersegment = 16 / sizeof(float16); ++ intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); + intptr_t i, j; + + /* Shift boolean to the sign bit so we can xor to negate. */ +@@ -912,7 +912,7 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, + intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); + uint32_t neg_real = flip ^ neg_imag; + intptr_t elements = opr_sz / sizeof(float32); +- intptr_t eltspersegment = 16 / sizeof(float32); ++ intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); + intptr_t i, j; + + /* Shift boolean to the sign bit so we can xor to negate. */ +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-incorrect-aa64_tidcp1-feature-check.patch b/target-arm-Fix-incorrect-aa64_tidcp1-feature-check.patch new file mode 100644 index 0000000000000000000000000000000000000000..4996a3efcc02978f77e476116b1f2e5dd4f550d5 --- /dev/null +++ b/target-arm-Fix-incorrect-aa64_tidcp1-feature-check.patch @@ -0,0 +1,39 @@ +From 1ad09007da426e9cd1585babcdd4de25ddfb2f8b Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 28 Nov 2024 14:39:05 +0800 +Subject: [PATCH] target/arm: Fix incorrect aa64_tidcp1 feature check + +cherry-pick from ee0a2e3c9d2991a11c13ffadb15e4d0add43c257 + +A typo in the implementation of isar_feature_aa64_tidcp1() means we +were checking the field in the wrong ID register, so we might have +provided the feature on CPUs that don't have it and not provided +it on CPUs that should have it. Correct this bug. + +Cc: qemu-stable@nongnu.org +Fixes: 9cd0c0dec97be9 "target/arm: Implement FEAT_TIDCP1" +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2120 +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240123160333.958841-1-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/cpu-features.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h +index 954d358268..165a497f7b 100644 +--- a/target/arm/cpu-features.h ++++ b/target/arm/cpu-features.h +@@ -771,7 +771,7 @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) + + static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR1, TIDCP1) != 0; ++ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; + } + + static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-nregs-computation-in-do_-ld-st-_zpa.patch b/target-arm-Fix-nregs-computation-in-do_-ld-st-_zpa.patch new file mode 100644 index 0000000000000000000000000000000000000000..767269492a4a9782f68d28203e1a29a71329b44b --- /dev/null +++ b/target-arm-Fix-nregs-computation-in-do_-ld-st-_zpa.patch @@ -0,0 +1,83 @@ +From b6a6427bf45c249e8397bf758055ebb54622e8e2 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Tue, 17 Dec 2024 14:32:17 +0800 +Subject: [PATCH] target/arm: Fix nregs computation in do_{ld,st}_zpa + +cherry-pick from 64c6e7444dff64b42d11b836b9aec9acfbe8ecc2 + +The field is encoded as [0-3], which is convenient for +indexing our array of function pointers, but the true +value is [1-4]. Adjust before calling do_mem_zpa. + +Add an assert, and move the comment re passing ZT to +the helper back next to the relevant code. + +Cc: qemu-stable@nongnu.org +Fixes: 206adacfb8d ("target/arm: Add mte helpers for sve scalar + int loads") +Signed-off-by: Richard Henderson +Tested-by: Gustavo Romero +Message-id: 20240207025210.8837-3-richard.henderson@linaro.org +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +Signed-off-by: gubin +--- + target/arm/tcg/translate-sve.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c +index dd0c633897..1d8e0d29bf 100644 +--- a/target/arm/tcg/translate-sve.c ++++ b/target/arm/tcg/translate-sve.c +@@ -4459,11 +4459,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, + TCGv_ptr t_pg; + int desc = 0; + +- /* +- * For e.g. LD4, there are not enough arguments to pass all 4 +- * registers as pointers, so encode the regno into the data field. +- * For consistency, do this even for LD1. +- */ ++ assert(mte_n >= 1 && mte_n <= 4); + if (s->mte_active[0]) { + int msz = dtype_msz(dtype); + +@@ -4477,6 +4473,11 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, + addr = clean_data_tbi(s, addr); + } + ++ /* ++ * For e.g. LD4, there are not enough arguments to pass all 4 ++ * registers as pointers, so encode the regno into the data field. ++ * For consistency, do this even for LD1. ++ */ + desc = simd_desc(vsz, vsz, zt | desc); + t_pg = tcg_temp_new_ptr(); + +@@ -4614,7 +4615,7 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg, + * accessible via the instruction encoding. + */ + assert(fn != NULL); +- do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); ++ do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); + } + + static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) +@@ -5182,14 +5183,13 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, + if (nreg == 0) { + /* ST1 */ + fn = fn_single[s->mte_active[0]][be][msz][esz]; +- nreg = 1; + } else { + /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ + assert(msz == esz); + fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; + } + assert(fn != NULL); +- do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); ++ do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); + } + + static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) +-- +2.41.0.windows.1 + diff --git a/target-arm-Fix-write-redundant-values-to-kvm.patch b/target-arm-Fix-write-redundant-values-to-kvm.patch deleted file mode 100644 index e165d04bafdd3fa3ceca0a1c0af68dfc0bb95df4..0000000000000000000000000000000000000000 --- a/target-arm-Fix-write-redundant-values-to-kvm.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 479c384f2944f52f9199bffa191b587a3f02663c Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Wed, 9 Dec 2020 19:35:08 +0800 -Subject: [PATCH] target/arm: Fix write redundant values to kvm - -After modifying the value of a ID register, we'd better to try to write -it to KVM so that we can known the value is acceptable for KVM. -Because it may modify the registers' values of KVM, it's not suitable -for other registers. - -(cherry-picked from a0d7a9de807639fcfcbe1fe037cb8772d459a9cf) -Signed-off-by: Peng Liang ---- - target/arm/helper.c | 73 ++++++++++++++++++++++++++++++--------------- - 1 file changed, 49 insertions(+), 24 deletions(-) - -diff --git a/target/arm/helper.c b/target/arm/helper.c -index b262f5d6c5..bddd355fa0 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -252,6 +252,16 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri) - return true; - } - -+static bool is_id_reg(const ARMCPRegInfo *ri) -+{ -+ /* -+ * (Op0, Op1, CRn, CRm, Op2) of ID registers is (3, 0, 0, crm, op2), -+ * where 1<=crm<8, 0<=op2<8. -+ */ -+ return ri->opc0 == 3 && ri->opc1 == 0 && ri->crn == 0 && -+ ri->crm > 0 && ri->crm < 8; -+} -+ - bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) - { - /* Write the coprocessor state from cpu->env to the (index,value) list. */ -@@ -268,38 +278,53 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) - ok = false; - continue; - } -- /* -- * (Op0, Op1, CRn, CRm, Op2) of ID registers is (3, 0, 0, crm, op2), -- * where 1<=crm<8, 0<=op2<8. Let's give ID registers a chance to -- * synchronize to kvm. -- */ -- if ((ri->type & ARM_CP_NO_RAW) && !(kvm_sync && -- ri->opc0 == 3 && ri->opc1 == 0 && ri->crn == 0 && ri->crm > 0)) { -+ if ((ri->type & ARM_CP_NO_RAW) && !(kvm_sync && is_id_reg(ri))) { - continue; - } - - newval = read_raw_cp_reg(&cpu->env, ri); - if (kvm_sync) { -- /* Only sync if we can sync to KVM successfully. */ -- uint64_t oldval; -- uint64_t kvmval; -+ if (is_id_reg(ri)) { -+ /* Only sync if we can sync to KVM successfully. */ -+ uint64_t oldval; -+ uint64_t kvmval; - -- if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &oldval)) { -- continue; -- } -- if (oldval == newval) { -- continue; -- } -+ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &oldval)) { -+ continue; -+ } -+ if (oldval == newval) { -+ continue; -+ } - -- if (kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &newval)) { -- continue; -- } -- if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &kvmval) || -- kvmval != newval) { -- continue; -- } -+ if (kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &newval)) { -+ continue; -+ } -+ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &kvmval) || -+ kvmval != newval) { -+ continue; -+ } -+ -+ kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); -+ } else { -+ /* -+ * Only sync if the previous list->cpustate sync succeeded. -+ * Rather than tracking the success/failure state for every -+ * item in the list, we just recheck "does the raw write we must -+ * have made in write_list_to_cpustate() read back OK" here. -+ */ -+ uint64_t oldval = cpu->cpreg_values[i]; -+ -+ if (oldval == newval) { -+ continue; -+ } - -- kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); -+ write_raw_cp_reg(&cpu->env, ri, oldval); -+ if (read_raw_cp_reg(&cpu->env, ri) != oldval) { -+ continue; -+ } -+ -+ write_raw_cp_reg(&cpu->env, ri, newval); -+ } - } - cpu->cpreg_values[i] = newval; - } --- -2.27.0 - diff --git a/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch b/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch deleted file mode 100644 index a46232f8ba04e1e2a956d8493dc1515fcf1f272a..0000000000000000000000000000000000000000 --- a/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 38fb634853ac6547326d9f88b9a068d9fc6b4ad4 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Tue, 27 Aug 2019 13:19:31 +0100 -Subject: [PATCH] target/arm: Free TCG temps in trans_VMOV_64_sp() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The function neon_store_reg32() doesn't free the TCG temp that it -is passed, so the caller must do that. We got this right in most -places but forgot to free the TCG temps in trans_VMOV_64_sp(). - -Cc: qemu-stable@nongnu.org -Signed-off-by: Peter Maydell -Reviewed-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20190827121931.26836-1-peter.maydell@linaro.org -(cherry picked from commit 342d27581bd3ecdb995e4fc55fcd383cf3242888) -Signed-off-by: Michael Roth ---- - target/arm/translate-vfp.inc.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c -index 092eb5ec53..ef45cecbea 100644 ---- a/target/arm/translate-vfp.inc.c -+++ b/target/arm/translate-vfp.inc.c -@@ -881,8 +881,10 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) - /* gpreg to fpreg */ - tmp = load_reg(s, a->rt); - neon_store_reg32(tmp, a->vm); -+ tcg_temp_free_i32(tmp); - tmp = load_reg(s, a->rt2); - neon_store_reg32(tmp, a->vm + 1); -+ tcg_temp_free_i32(tmp); - } - - return true; --- -2.23.0 diff --git a/target-arm-LDAPR-should-honour-SCTLR_ELx.nAA.patch b/target-arm-LDAPR-should-honour-SCTLR_ELx.nAA.patch new file mode 100644 index 0000000000000000000000000000000000000000..01d36f07acd8e6adb7cf9ddbd41741e5a9a7018f --- /dev/null +++ b/target-arm-LDAPR-should-honour-SCTLR_ELx.nAA.patch @@ -0,0 +1,56 @@ +From 626103c76d0d8db8dee3f613b6e3159c8ddd5a57 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 2 Jan 2025 10:25:00 +0800 +Subject: [PATCH] target/arm: LDAPR should honour SCTLR_ELx.nAA + +cherry-pick from 25489b521b61b874c4c6583956db0012a3674e3a + +In commit c1a1f80518d360b when we added the FEAT_LSE2 relaxations to +the alignment requirements for atomic and ordered loads and stores, +we didn't quite get it right for LDAPR/LDAPRH/LDAPRB with no +immediate offset. These instructions were handled in the old decoder +as part of disas_ldst_atomic(), but unlike all the other insns that +function decoded (LDADD, LDCLR, etc) these insns are "ordered", not +"atomic", so they should be using check_ordered_align() rather than +check_atomic_align(). Commit c1a1f80518d360b used +check_atomic_align() regardless for everything in +disas_ldst_atomic(). We then carried that incorrect check over in +the decodetree conversion, where LDAPR/LDAPRH/LDAPRB are now handled +by trans_LDAPR(). + +The effect is that when FEAT_LSE2 is implemented, these instructions +don't honour the SCTLR_ELx.nAA bit and will generate alignment +faults when they should not. + +(The LDAPR insns with an immediate offset were in disas_ldst_ldapr_stlr() +and then in trans_LDAPR_i() and trans_STLR_i(), and have always used +the correct check_ordered_align().) + +Use check_ordered_align() in trans_LDAPR(). + +Cc: qemu-stable@nongnu.org +Fixes: c1a1f80518d360b ("target/arm: Relax ordered/atomic alignment checks for LSE2") +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240709134504.3500007-3-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/tcg/translate-a64.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c +index a05182b57f..5beac07b60 100644 +--- a/target/arm/tcg/translate-a64.c ++++ b/target/arm/tcg/translate-a64.c +@@ -3306,7 +3306,7 @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) + if (a->rn == 31) { + gen_check_sp_alignment(s); + } +- mop = check_atomic_align(s, a->rn, a->sz); ++ mop = check_ordered_align(s, a->rn, 0, false, a->sz); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + /* +-- +2.41.0.windows.1 + diff --git a/target-arm-Move-DBGDIDR-into-ARMISARegisters.patch b/target-arm-Move-DBGDIDR-into-ARMISARegisters.patch deleted file mode 100644 index e7f2833ee889363902a3e063bffbbff4b4e2c6af..0000000000000000000000000000000000000000 --- a/target-arm-Move-DBGDIDR-into-ARMISARegisters.patch +++ /dev/null @@ -1,158 +0,0 @@ -From df641941e6fd7fef78e5c77c9a809a7a8e148589 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:06 +0000 -Subject: [PATCH 08/13] target/arm: Move DBGDIDR into ARMISARegisters - -We're going to want to read the DBGDIDR register from KVM in -a subsequent commit, which means it needs to be in the -ARMISARegisters sub-struct. Move it. - -Signed-off-by: Peter Maydell -Reviewed-by: Richard Henderson -Message-id: 20200214175116.9164-12-peter.maydell@linaro.org ---- - target/arm/cpu.c | 8 ++++---- - target/arm/cpu.h | 2 +- - target/arm/cpu64.c | 6 +++--- - target/arm/helper.c | 2 +- - target/arm/internals.h | 6 +++--- - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index bb2edf4e..a23c71db 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2070,7 +2070,7 @@ static void cortex_a8_initfn(Object *obj) - cpu->isar.id_isar2 = 0x21232031; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; -- cpu->dbgdidr = 0x15141000; -+ cpu->isar.dbgdidr = 0x15141000; - cpu->clidr = (1 << 27) | (2 << 24) | 3; - cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ - cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ -@@ -2143,7 +2143,7 @@ static void cortex_a9_initfn(Object *obj) - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; -- cpu->dbgdidr = 0x35141000; -+ cpu->isar.dbgdidr = 0x35141000; - cpu->clidr = (1 << 27) | (1 << 24) | 3; - cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ - cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ -@@ -2211,7 +2211,7 @@ static void cortex_a7_initfn(Object *obj) - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; -- cpu->dbgdidr = 0x3515f005; -+ cpu->isar.dbgdidr = 0x3515f005; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ -@@ -2254,7 +2254,7 @@ static void cortex_a15_initfn(Object *obj) - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; -- cpu->dbgdidr = 0x3515f021; -+ cpu->isar.dbgdidr = 0x3515f021; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 4b1ae32b..3040aa40 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -861,6 +861,7 @@ struct ARMCPU { - uint32_t mvfr1; - uint32_t mvfr2; - uint32_t id_dfr0; -+ uint32_t dbgdidr; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; - uint64_t id_aa64pfr0; -@@ -888,7 +889,6 @@ struct ARMCPU { - uint32_t id_mmfr4; - uint64_t id_aa64afr0; - uint64_t id_aa64afr1; -- uint32_t dbgdidr; - uint32_t clidr; - uint64_t mp_affinity; /* MP ID without feature bits */ - /* The elements of this array are the CCSIDR values for each cache, -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index aa96548f..7ad8b5e2 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -140,7 +140,7 @@ static void aarch64_a57_initfn(Object *obj) - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; -- cpu->dbgdidr = 0x3516d000; -+ cpu->isar.dbgdidr = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ -@@ -194,7 +194,7 @@ static void aarch64_a53_initfn(Object *obj) - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ -- cpu->dbgdidr = 0x3516d000; -+ cpu->isar.dbgdidr = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ -@@ -247,7 +247,7 @@ static void aarch64_a72_initfn(Object *obj) - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; -- cpu->dbgdidr = 0x3516d000; -+ cpu->isar.dbgdidr = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ -diff --git a/target/arm/helper.c b/target/arm/helper.c -index c1ff4b6b..60ff7c0f 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -5597,7 +5597,7 @@ static void define_debug_regs(ARMCPU *cpu) - ARMCPRegInfo dbgdidr = { - .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL0_R, .accessfn = access_tda, -- .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr, -+ .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr, - }; - - /* Note that all these register fields hold "number of Xs minus 1". */ -diff --git a/target/arm/internals.h b/target/arm/internals.h -index a72d0a6c..1d01ecc4 100644 ---- a/target/arm/internals.h -+++ b/target/arm/internals.h -@@ -867,7 +867,7 @@ static inline int arm_num_brps(ARMCPU *cpu) - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; - } else { -- return FIELD_EX32(cpu->dbgdidr, DBGDIDR, BRPS) + 1; -+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; - } - } - -@@ -881,7 +881,7 @@ static inline int arm_num_wrps(ARMCPU *cpu) - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; - } else { -- return FIELD_EX32(cpu->dbgdidr, DBGDIDR, WRPS) + 1; -+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; - } - } - -@@ -895,7 +895,7 @@ static inline int arm_num_ctx_cmps(ARMCPU *cpu) - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; - } else { -- return FIELD_EX32(cpu->dbgdidr, DBGDIDR, CTX_CMPS) + 1; -+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; - } - } - --- -2.25.1 - diff --git a/target-arm-Read-debug-related-ID-registers-from-KVM.patch b/target-arm-Read-debug-related-ID-registers-from-KVM.patch deleted file mode 100644 index 1be7cd1713a456204efcb412c1d76398991ef77d..0000000000000000000000000000000000000000 --- a/target-arm-Read-debug-related-ID-registers-from-KVM.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 9cda8af5af9e95e7b0ff683d0fb661c1ffcba8d8 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:07 +0000 -Subject: [PATCH 11/13] target/arm: Read debug-related ID registers from KVM - -Now we have isar_feature test functions that look at fields in the -ID_AA64DFR0_EL1 and ID_DFR0 ID registers, add the code that reads -these register values from KVM so that the checks behave correctly -when we're using KVM. - -No isar_feature function tests ID_AA64DFR1_EL1 or DBGDIDR yet, but we -add it to maintain the invariant that every field in the -ARMISARegisters struct is populated for a KVM CPU and can be relied -on. This requirement isn't actually written down yet, so add a note -to the relevant comment. - -Signed-off-by: Peter Maydell -Reviewed-by: Richard Henderson -Message-id: 20200214175116.9164-13-peter.maydell@linaro.org ---- - target/arm/cpu.h | 5 +++++ - target/arm/kvm32.c | 8 ++++++++ - target/arm/kvm64.c | 36 ++++++++++++++++++++++++++++++++++++ - 3 files changed, 49 insertions(+) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index a78c30c3..56d8cd8c 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -848,6 +848,11 @@ struct ARMCPU { - * prefix means a constant register. - * Some of these registers are split out into a substructure that - * is shared with the translators to control the ISA. -+ * -+ * Note that if you add an ID register to the ARMISARegisters struct -+ * you need to also update the 32-bit and 64-bit versions of the -+ * kvm_arm_get_host_cpu_features() function to correctly populate the -+ * field by reading the value from the KVM vCPU. - */ - struct ARMISARegisters { - uint32_t id_isar0; -diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c -index 2247148e..e984d52d 100644 ---- a/target/arm/kvm32.c -+++ b/target/arm/kvm32.c -@@ -93,6 +93,9 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - ahcf->isar.id_isar6 = 0; - } - -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, -+ ARM_CP15_REG32(0, 0, 1, 2)); -+ - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, - KVM_REG_ARM | KVM_REG_SIZE_U32 | - KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR0); -@@ -121,6 +124,11 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - ahcf->isar.id_mmfr4 = 0; - } - -+ /* -+ * There is no way to read DBGDIDR, because currently 32-bit KVM -+ * doesn't implement debug at all. Leave it at zero. -+ */ -+ - kvm_arm_destroy_scratch_host_vcpu(fdarray); - - if (err < 0) { -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 276d1466..2a88b8df 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -533,6 +533,10 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - } else { - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, - ARM64_SYS_REG(3, 0, 0, 4, 1)); -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, -+ ARM64_SYS_REG(3, 0, 0, 5, 0)); -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, -+ ARM64_SYS_REG(3, 0, 0, 5, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, - ARM64_SYS_REG(3, 0, 0, 6, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, -@@ -551,6 +555,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - * than skipping the reads and leaving 0, as we must avoid - * considering the values in every case. - */ -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, -+ ARM64_SYS_REG(3, 0, 0, 1, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, - ARM64_SYS_REG(3, 0, 0, 1, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, -@@ -582,6 +588,36 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - ARM64_SYS_REG(3, 0, 0, 3, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, - ARM64_SYS_REG(3, 0, 0, 3, 2)); -+ -+ /* -+ * DBGDIDR is a bit complicated because the kernel doesn't -+ * provide an accessor for it in 64-bit mode, which is what this -+ * scratch VM is in, and there's no architected "64-bit sysreg -+ * which reads the same as the 32-bit register" the way there is -+ * for other ID registers. Instead we synthesize a value from the -+ * AArch64 ID_AA64DFR0, the same way the kernel code in -+ * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. -+ * We only do this if the CPU supports AArch32 at EL1. -+ */ -+ if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { -+ int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); -+ int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); -+ int ctx_cmps = -+ FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); -+ int version = 6; /* ARMv8 debug architecture */ -+ bool has_el3 = -+ !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); -+ uint32_t dbgdidr = 0; -+ -+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); -+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); -+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); -+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); -+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); -+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); -+ dbgdidr |= (1 << 15); /* RES1 bit */ -+ ahcf->isar.dbgdidr = dbgdidr; -+ } - } - - kvm_arm_destroy_scratch_host_vcpu(fdarray); --- -2.25.1 - diff --git a/target-arm-Reinstate-vfp-property-on-AArch32-CPUs.patch b/target-arm-Reinstate-vfp-property-on-AArch32-CPUs.patch new file mode 100644 index 0000000000000000000000000000000000000000..685dfe406d27263474efeb87e2b2b5e1aad533c3 --- /dev/null +++ b/target-arm-Reinstate-vfp-property-on-AArch32-CPUs.patch @@ -0,0 +1,44 @@ +From 582f5bc85da2d1c6a61e5164dfc272dc96f846d5 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 2 Jan 2025 10:30:33 +0800 +Subject: [PATCH] target/arm: Reinstate "vfp" property on AArch32 CPUs + +cherry-pick from 185e3fdf8d106cb2f7d234d5e6453939c66db2a9 + +In commit 4315f7c614743 we restructured the logic for creating the +VFP related properties to avoid testing the aa32_simd_r32 feature on +AArch64 CPUs. However in the process we accidentally stopped +exposing the "vfp" QOM property on AArch32 TCG CPUs. + +This mostly hasn't had any ill effects because not many people want +to disable VFP, but it wasn't intentional. Reinstate the property. + +Cc: qemu-stable@nongnu.org +Fixes: 4315f7c614743 ("target/arm: Restructure has_vfp_d32 test") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2098 +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240126193432.2210558-1-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/cpu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 9dd61c10ea..09d391bd34 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1635,6 +1635,10 @@ void arm_cpu_post_init(Object *obj) + } + } else if (cpu_isar_feature(aa32_vfp, cpu)) { + cpu->has_vfp = true; ++ if (tcg_enabled() || qtest_enabled()) { ++ qdev_property_add_static(DEVICE(obj), ++ &arm_cpu_has_vfp_property); ++ } + if (cpu_isar_feature(aa32_simd_r32, cpu)) { + cpu->has_vfp_d32 = true; + /* +-- +2.41.0.windows.1 + diff --git a/target-arm-Stop-assuming-DBGDIDR-always-exists.patch b/target-arm-Stop-assuming-DBGDIDR-always-exists.patch deleted file mode 100644 index c7648c5430ebd61b2267a184ad16828dd8d25015..0000000000000000000000000000000000000000 --- a/target-arm-Stop-assuming-DBGDIDR-always-exists.patch +++ /dev/null @@ -1,186 +0,0 @@ -From 1d4d4cda9637ec09f8cf30785f68b58cd46815c8 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:05 +0000 -Subject: [PATCH 07/13] target/arm: Stop assuming DBGDIDR always exists - -The AArch32 DBGDIDR defines properties like the number of -breakpoints, watchpoints and context-matching comparators. On an -AArch64 CPU, the register may not even exist if AArch32 is not -supported at EL1. - -Currently we hard-code use of DBGDIDR to identify the number of -breakpoints etc; this works for all our TCG CPUs, but will break if -we ever add an AArch64-only CPU. We also have an assert() that the -AArch32 and AArch64 registers match, which currently works only by -luck for KVM because we don't populate either of these ID registers -from the KVM vCPU and so they are both zero. - -Clean this up so we have functions for finding the number -of breakpoints, watchpoints and context comparators which look -in the appropriate ID register. - -This allows us to drop the "check that AArch64 and AArch32 agree -on the number of breakpoints etc" asserts: - * we no longer look at the AArch32 versions unless that's the - right place to be looking - * it's valid to have a CPU (eg AArch64-only) where they don't match - * we shouldn't have been asserting the validity of ID registers - in a codepath used with KVM anyway - -Signed-off-by: Peter Maydell -Reviewed-by: Richard Henderson -Message-id: 20200214175116.9164-11-peter.maydell@linaro.org ---- - target/arm/cpu.h | 7 +++++++ - target/arm/debug_helper.c | 6 +++--- - target/arm/helper.c | 21 +++++--------------- - target/arm/internals.h | 42 +++++++++++++++++++++++++++++++++++++++ - 4 files changed, 57 insertions(+), 19 deletions(-) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 230130be..4b1ae32b 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -1798,6 +1798,13 @@ FIELD(ID_DFR0, MPROFDBG, 20, 4) - FIELD(ID_DFR0, PERFMON, 24, 4) - FIELD(ID_DFR0, TRACEFILT, 28, 4) - -+FIELD(DBGDIDR, SE_IMP, 12, 1) -+FIELD(DBGDIDR, NSUHD_IMP, 14, 1) -+FIELD(DBGDIDR, VERSION, 16, 4) -+FIELD(DBGDIDR, CTX_CMPS, 20, 4) -+FIELD(DBGDIDR, BRPS, 24, 4) -+FIELD(DBGDIDR, WRPS, 28, 4) -+ - FIELD(MVFR0, SIMDREG, 0, 4) - FIELD(MVFR0, FPSP, 4, 4) - FIELD(MVFR0, FPDP, 8, 4) -diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c -index dde80273..3f8f667d 100644 ---- a/target/arm/debug_helper.c -+++ b/target/arm/debug_helper.c -@@ -16,8 +16,8 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn) - { - CPUARMState *env = &cpu->env; - uint64_t bcr = env->cp15.dbgbcr[lbn]; -- int brps = extract32(cpu->dbgdidr, 24, 4); -- int ctx_cmps = extract32(cpu->dbgdidr, 20, 4); -+ int brps = arm_num_brps(cpu); -+ int ctx_cmps = arm_num_ctx_cmps(cpu); - int bt; - uint32_t contextidr; - -@@ -28,7 +28,7 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn) - * case DBGWCR_EL1.LBN must indicate that breakpoint). - * We choose the former. - */ -- if (lbn > brps || lbn < (brps - ctx_cmps)) { -+ if (lbn >= brps || lbn < (brps - ctx_cmps)) { - return false; - } - -diff --git a/target/arm/helper.c b/target/arm/helper.c -index a71f4ef6..c1ff4b6b 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -5601,23 +5601,12 @@ static void define_debug_regs(ARMCPU *cpu) - }; - - /* Note that all these register fields hold "number of Xs minus 1". */ -- brps = extract32(cpu->dbgdidr, 24, 4); -- wrps = extract32(cpu->dbgdidr, 28, 4); -- ctx_cmps = extract32(cpu->dbgdidr, 20, 4); -+ brps = arm_num_brps(cpu); -+ wrps = arm_num_wrps(cpu); -+ ctx_cmps = arm_num_ctx_cmps(cpu); - - assert(ctx_cmps <= brps); - -- /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties -- * of the debug registers such as number of breakpoints; -- * check that if they both exist then they agree. -- */ -- if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -- assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); -- assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); -- assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) -- == ctx_cmps); -- } -- - define_one_arm_cp_reg(cpu, &dbgdidr); - define_arm_cp_regs(cpu, debug_cp_reginfo); - -@@ -5625,7 +5614,7 @@ static void define_debug_regs(ARMCPU *cpu) - define_arm_cp_regs(cpu, debug_lpae_cp_reginfo); - } - -- for (i = 0; i < brps + 1; i++) { -+ for (i = 0; i < brps; i++) { - ARMCPRegInfo dbgregs[] = { - { .name = "DBGBVR", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4, -@@ -5644,7 +5633,7 @@ static void define_debug_regs(ARMCPU *cpu) - define_arm_cp_regs(cpu, dbgregs); - } - -- for (i = 0; i < wrps + 1; i++) { -+ for (i = 0; i < wrps; i++) { - ARMCPRegInfo dbgregs[] = { - { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6, -diff --git a/target/arm/internals.h b/target/arm/internals.h -index 232d9638..a72d0a6c 100644 ---- a/target/arm/internals.h -+++ b/target/arm/internals.h -@@ -857,6 +857,48 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) - } - } - -+/** -+ * arm_num_brps: Return number of implemented breakpoints. -+ * Note that the ID register BRPS field is "number of bps - 1", -+ * and we return the actual number of breakpoints. -+ */ -+static inline int arm_num_brps(ARMCPU *cpu) -+{ -+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; -+ } else { -+ return FIELD_EX32(cpu->dbgdidr, DBGDIDR, BRPS) + 1; -+ } -+} -+ -+/** -+ * arm_num_wrps: Return number of implemented watchpoints. -+ * Note that the ID register WRPS field is "number of wps - 1", -+ * and we return the actual number of watchpoints. -+ */ -+static inline int arm_num_wrps(ARMCPU *cpu) -+{ -+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; -+ } else { -+ return FIELD_EX32(cpu->dbgdidr, DBGDIDR, WRPS) + 1; -+ } -+} -+ -+/** -+ * arm_num_ctx_cmps: Return number of implemented context comparators. -+ * Note that the ID register CTX_CMPS field is "number of cmps - 1", -+ * and we return the actual number of comparators. -+ */ -+static inline int arm_num_ctx_cmps(ARMCPU *cpu) -+{ -+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; -+ } else { -+ return FIELD_EX32(cpu->dbgdidr, DBGDIDR, CTX_CMPS) + 1; -+ } -+} -+ - /* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. - * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. - */ --- -2.25.1 - diff --git a/target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch b/target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch deleted file mode 100644 index ecbaf7750c42aab1efade6d50e53fd7e92762883..0000000000000000000000000000000000000000 --- a/target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch +++ /dev/null @@ -1,453 +0,0 @@ -From 2bc630dc858bd0c010b7c375ebf1e8f4b4e0e346 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:13 +0000 -Subject: [PATCH 10/13] target/arm: Test correct register in aa32_pan and - aa32_ats1e1 checks - -The isar_feature_aa32_pan and isar_feature_aa32_ats1e1 functions -are supposed to be testing fields in ID_MMFR3; but a cut-and-paste -error meant we were looking at MVFR0 instead. - -Fix the functions to look at the right register; this requires -us to move at least id_mmfr3 to the ARMISARegisters struct; we -choose to move all the ID_MMFRn registers for consistency. - -Fixes: 3d6ad6bb466f -Signed-off-by: Peter Maydell -Reviewed-by: Richard Henderson -Message-id: 20200214175116.9164-19-peter.maydell@linaro.org ---- - hw/intc/armv7m_nvic.c | 8 ++-- - target/arm/cpu.c | 96 +++++++++++++++++++++---------------------- - target/arm/cpu.h | 14 +++---- - target/arm/cpu64.c | 28 ++++++------- - target/arm/helper.c | 12 +++--- - target/arm/kvm32.c | 17 ++++++++ - target/arm/kvm64.c | 10 +++++ - 7 files changed, 106 insertions(+), 79 deletions(-) - -diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c -index 0741db7b..f7ef6ad1 100644 ---- a/hw/intc/armv7m_nvic.c -+++ b/hw/intc/armv7m_nvic.c -@@ -1227,13 +1227,13 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) - case 0xd4c: /* AFR0. */ - return cpu->id_afr0; - case 0xd50: /* MMFR0. */ -- return cpu->id_mmfr0; -+ return cpu->isar.id_mmfr0; - case 0xd54: /* MMFR1. */ -- return cpu->id_mmfr1; -+ return cpu->isar.id_mmfr1; - case 0xd58: /* MMFR2. */ -- return cpu->id_mmfr2; -+ return cpu->isar.id_mmfr2; - case 0xd5c: /* MMFR3. */ -- return cpu->id_mmfr3; -+ return cpu->isar.id_mmfr3; - case 0xd60: /* ISAR0. */ - return cpu->isar.id_isar0; - case 0xd64: /* ISAR1. */ -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 119bd275..c3728e3d 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1764,9 +1764,9 @@ static void arm1136_r2_initfn(Object *obj) - cpu->id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; -- cpu->id_mmfr0 = 0x01130003; -- cpu->id_mmfr1 = 0x10030302; -- cpu->id_mmfr2 = 0x01222110; -+ cpu->isar.id_mmfr0 = 0x01130003; -+ cpu->isar.id_mmfr1 = 0x10030302; -+ cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; -@@ -1796,9 +1796,9 @@ static void arm1136_initfn(Object *obj) - cpu->id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; -- cpu->id_mmfr0 = 0x01130003; -- cpu->id_mmfr1 = 0x10030302; -- cpu->id_mmfr2 = 0x01222110; -+ cpu->isar.id_mmfr0 = 0x01130003; -+ cpu->isar.id_mmfr1 = 0x10030302; -+ cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; -@@ -1829,9 +1829,9 @@ static void arm1176_initfn(Object *obj) - cpu->id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x33; - cpu->id_afr0 = 0; -- cpu->id_mmfr0 = 0x01130003; -- cpu->id_mmfr1 = 0x10030302; -- cpu->id_mmfr2 = 0x01222100; -+ cpu->isar.id_mmfr0 = 0x01130003; -+ cpu->isar.id_mmfr1 = 0x10030302; -+ cpu->isar.id_mmfr2 = 0x01222100; - cpu->isar.id_isar0 = 0x0140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231121; -@@ -1859,9 +1859,9 @@ static void arm11mpcore_initfn(Object *obj) - cpu->id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0; - cpu->id_afr0 = 0x2; -- cpu->id_mmfr0 = 0x01100103; -- cpu->id_mmfr1 = 0x10020302; -- cpu->id_mmfr2 = 0x01222000; -+ cpu->isar.id_mmfr0 = 0x01100103; -+ cpu->isar.id_mmfr1 = 0x10020302; -+ cpu->isar.id_mmfr2 = 0x01222000; - cpu->isar.id_isar0 = 0x00100011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11221011; -@@ -1891,10 +1891,10 @@ static void cortex_m3_initfn(Object *obj) - cpu->id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x00000030; -- cpu->id_mmfr1 = 0x00000000; -- cpu->id_mmfr2 = 0x00000000; -- cpu->id_mmfr3 = 0x00000000; -+ cpu->isar.id_mmfr0 = 0x00000030; -+ cpu->isar.id_mmfr1 = 0x00000000; -+ cpu->isar.id_mmfr2 = 0x00000000; -+ cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; -@@ -1922,10 +1922,10 @@ static void cortex_m4_initfn(Object *obj) - cpu->id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x00000030; -- cpu->id_mmfr1 = 0x00000000; -- cpu->id_mmfr2 = 0x00000000; -- cpu->id_mmfr3 = 0x00000000; -+ cpu->isar.id_mmfr0 = 0x00000030; -+ cpu->isar.id_mmfr1 = 0x00000000; -+ cpu->isar.id_mmfr2 = 0x00000000; -+ cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; -@@ -1955,10 +1955,10 @@ static void cortex_m33_initfn(Object *obj) - cpu->id_pfr1 = 0x00000210; - cpu->isar.id_dfr0 = 0x00200000; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x00101F40; -- cpu->id_mmfr1 = 0x00000000; -- cpu->id_mmfr2 = 0x01000000; -- cpu->id_mmfr3 = 0x00000000; -+ cpu->isar.id_mmfr0 = 0x00101F40; -+ cpu->isar.id_mmfr1 = 0x00000000; -+ cpu->isar.id_mmfr2 = 0x01000000; -+ cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; -@@ -2006,10 +2006,10 @@ static void cortex_r5_initfn(Object *obj) - cpu->id_pfr1 = 0x001; - cpu->isar.id_dfr0 = 0x010400; - cpu->id_afr0 = 0x0; -- cpu->id_mmfr0 = 0x0210030; -- cpu->id_mmfr1 = 0x00000000; -- cpu->id_mmfr2 = 0x01200000; -- cpu->id_mmfr3 = 0x0211; -+ cpu->isar.id_mmfr0 = 0x0210030; -+ cpu->isar.id_mmfr1 = 0x00000000; -+ cpu->isar.id_mmfr2 = 0x01200000; -+ cpu->isar.id_mmfr3 = 0x0211; - cpu->isar.id_isar0 = 0x02101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232141; -@@ -2061,10 +2061,10 @@ static void cortex_a8_initfn(Object *obj) - cpu->id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x400; - cpu->id_afr0 = 0; -- cpu->id_mmfr0 = 0x31100003; -- cpu->id_mmfr1 = 0x20000000; -- cpu->id_mmfr2 = 0x01202000; -- cpu->id_mmfr3 = 0x11; -+ cpu->isar.id_mmfr0 = 0x31100003; -+ cpu->isar.id_mmfr1 = 0x20000000; -+ cpu->isar.id_mmfr2 = 0x01202000; -+ cpu->isar.id_mmfr3 = 0x11; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x12112111; - cpu->isar.id_isar2 = 0x21232031; -@@ -2134,10 +2134,10 @@ static void cortex_a9_initfn(Object *obj) - cpu->id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x000; - cpu->id_afr0 = 0; -- cpu->id_mmfr0 = 0x00100103; -- cpu->id_mmfr1 = 0x20000000; -- cpu->id_mmfr2 = 0x01230000; -- cpu->id_mmfr3 = 0x00002111; -+ cpu->isar.id_mmfr0 = 0x00100103; -+ cpu->isar.id_mmfr1 = 0x20000000; -+ cpu->isar.id_mmfr2 = 0x01230000; -+ cpu->isar.id_mmfr3 = 0x00002111; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; -@@ -2199,10 +2199,10 @@ static void cortex_a7_initfn(Object *obj) - cpu->id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x10101105; -- cpu->id_mmfr1 = 0x40000000; -- cpu->id_mmfr2 = 0x01240000; -- cpu->id_mmfr3 = 0x02102211; -+ cpu->isar.id_mmfr0 = 0x10101105; -+ cpu->isar.id_mmfr1 = 0x40000000; -+ cpu->isar.id_mmfr2 = 0x01240000; -+ cpu->isar.id_mmfr3 = 0x02102211; - /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but - * table 4-41 gives 0x02101110, which includes the arm div insns. - */ -@@ -2245,10 +2245,10 @@ static void cortex_a15_initfn(Object *obj) - cpu->id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x10201105; -- cpu->id_mmfr1 = 0x20000000; -- cpu->id_mmfr2 = 0x01240000; -- cpu->id_mmfr3 = 0x02102211; -+ cpu->isar.id_mmfr0 = 0x10201105; -+ cpu->isar.id_mmfr1 = 0x20000000; -+ cpu->isar.id_mmfr2 = 0x01240000; -+ cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; -@@ -2484,13 +2484,13 @@ static void arm_max_initfn(Object *obj) - t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ - cpu->isar.mvfr2 = t; - -- t = cpu->id_mmfr3; -+ t = cpu->isar.id_mmfr3; - t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ -- cpu->id_mmfr3 = t; -+ cpu->isar.id_mmfr3 = t; - -- t = cpu->id_mmfr4; -+ t = cpu->isar.id_mmfr4; - t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ -- cpu->id_mmfr4 = t; -+ cpu->isar.id_mmfr4 = t; - } - #endif - } -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 3040aa40..a78c30c3 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -857,6 +857,11 @@ struct ARMCPU { - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; -+ uint32_t id_mmfr0; -+ uint32_t id_mmfr1; -+ uint32_t id_mmfr2; -+ uint32_t id_mmfr3; -+ uint32_t id_mmfr4; - uint32_t mvfr0; - uint32_t mvfr1; - uint32_t mvfr2; -@@ -882,11 +887,6 @@ struct ARMCPU { - uint64_t pmceid0; - uint64_t pmceid1; - uint32_t id_afr0; -- uint32_t id_mmfr0; -- uint32_t id_mmfr1; -- uint32_t id_mmfr2; -- uint32_t id_mmfr3; -- uint32_t id_mmfr4; - uint64_t id_aa64afr0; - uint64_t id_aa64afr1; - uint32_t clidr; -@@ -3490,12 +3490,12 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) - - static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) != 0; -+ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; - } - - static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2; -+ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; - } - - static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index a0d07fd7..d450b8c8 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -125,10 +125,10 @@ static void aarch64_a57_initfn(Object *obj) - cpu->id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x10101105; -- cpu->id_mmfr1 = 0x40000000; -- cpu->id_mmfr2 = 0x01260000; -- cpu->id_mmfr3 = 0x02102211; -+ cpu->isar.id_mmfr0 = 0x10101105; -+ cpu->isar.id_mmfr1 = 0x40000000; -+ cpu->isar.id_mmfr2 = 0x01260000; -+ cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; -@@ -179,10 +179,10 @@ static void aarch64_a53_initfn(Object *obj) - cpu->id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x10101105; -- cpu->id_mmfr1 = 0x40000000; -- cpu->id_mmfr2 = 0x01260000; -- cpu->id_mmfr3 = 0x02102211; -+ cpu->isar.id_mmfr0 = 0x10101105; -+ cpu->isar.id_mmfr1 = 0x40000000; -+ cpu->isar.id_mmfr2 = 0x01260000; -+ cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; -@@ -233,10 +233,10 @@ static void aarch64_a72_initfn(Object *obj) - cpu->id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; -- cpu->id_mmfr0 = 0x10201105; -- cpu->id_mmfr1 = 0x40000000; -- cpu->id_mmfr2 = 0x01260000; -- cpu->id_mmfr3 = 0x02102211; -+ cpu->isar.id_mmfr0 = 0x10201105; -+ cpu->isar.id_mmfr1 = 0x40000000; -+ cpu->isar.id_mmfr2 = 0x01260000; -+ cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; -@@ -383,9 +383,9 @@ static void aarch64_max_initfn(Object *obj) - u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); - cpu->isar.id_isar6 = u; - -- u = cpu->id_mmfr3; -+ u = cpu->isar.id_mmfr3; - u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ -- cpu->id_mmfr3 = u; -+ cpu->isar.id_mmfr3 = u; - - /* - * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 60ff7c0f..49cd7a7e 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -5906,19 +5906,19 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_mmfr0 }, -+ .resetvalue = cpu->isar.id_mmfr0 }, - { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_mmfr1 }, -+ .resetvalue = cpu->isar.id_mmfr1 }, - { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_mmfr2 }, -+ .resetvalue = cpu->isar.id_mmfr2 }, - { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_mmfr3 }, -+ .resetvalue = cpu->isar.id_mmfr3 }, - { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -5946,7 +5946,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->id_mmfr4 }, -+ .resetvalue = cpu->isar.id_mmfr4 }, - { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -6426,7 +6426,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) - define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo); - define_arm_cp_regs(cpu, vmsa_cp_reginfo); - /* TTCBR2 is introduced with ARMv8.2-A32HPD. */ -- if (FIELD_EX32(cpu->id_mmfr4, ID_MMFR4, HPDS) != 0) { -+ if (FIELD_EX32(cpu->isar.id_mmfr4, ID_MMFR4, HPDS) != 0) { - define_one_arm_cp_reg(cpu, &ttbcr2_reginfo); - } - } -diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c -index ee158830..2247148e 100644 ---- a/target/arm/kvm32.c -+++ b/target/arm/kvm32.c -@@ -104,6 +104,23 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - * Fortunately there is not yet anything in there that affects migration. - */ - -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, -+ ARM_CP15_REG32(0, 0, 1, 4)); -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, -+ ARM_CP15_REG32(0, 0, 1, 5)); -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, -+ ARM_CP15_REG32(0, 0, 1, 6)); -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, -+ ARM_CP15_REG32(0, 0, 1, 7)); -+ if (read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, -+ ARM_CP15_REG32(0, 0, 2, 6))) { -+ /* -+ * Older kernels don't support reading ID_MMFR4 (a new in v8 -+ * register); assume it's zero. -+ */ -+ ahcf->isar.id_mmfr4 = 0; -+ } -+ - kvm_arm_destroy_scratch_host_vcpu(fdarray); - - if (err < 0) { -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index b794108a..276d1466 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -551,6 +551,14 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - * than skipping the reads and leaving 0, as we must avoid - * considering the values in every case. - */ -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, -+ ARM64_SYS_REG(3, 0, 0, 1, 4)); -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, -+ ARM64_SYS_REG(3, 0, 0, 1, 5)); -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, -+ ARM64_SYS_REG(3, 0, 0, 1, 6)); -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, -+ ARM64_SYS_REG(3, 0, 0, 1, 7)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, - ARM64_SYS_REG(3, 0, 0, 2, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, -@@ -563,6 +571,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - ARM64_SYS_REG(3, 0, 0, 2, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, - ARM64_SYS_REG(3, 0, 0, 2, 5)); -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, -+ ARM64_SYS_REG(3, 0, 0, 2, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, - ARM64_SYS_REG(3, 0, 0, 2, 7)); - --- -2.25.1 - diff --git a/target-arm-Update-ID-fields.patch b/target-arm-Update-ID-fields.patch deleted file mode 100644 index 94ed8027c9e238f384e767bc88c209749eee234e..0000000000000000000000000000000000000000 --- a/target-arm-Update-ID-fields.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 47c76d73a435884b66ce6417cb853893099be5eb Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Tue, 11 Aug 2020 10:18:57 +0800 -Subject: [PATCH 8/9] target/arm: Update ID fields - -Update definitions for ID fields, up to ARMv8.6. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/cpu.h | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 068c3fa2..eb875e11 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -1691,6 +1691,8 @@ FIELD(ID_ISAR6, DP, 4, 4) - FIELD(ID_ISAR6, FHM, 8, 4) - FIELD(ID_ISAR6, SB, 12, 4) - FIELD(ID_ISAR6, SPECRES, 16, 4) -+FIELD(ID_ISAR6, BF16, 20, 4) -+FIELD(ID_ISAR6, I8MM, 24, 4) - - FIELD(ID_MMFR3, CMAINTVA, 0, 4) - FIELD(ID_MMFR3, CMAINTSW, 4, 4) -@@ -1736,6 +1738,9 @@ FIELD(ID_AA64ISAR1, GPI, 28, 4) - FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) - FIELD(ID_AA64ISAR1, SB, 36, 4) - FIELD(ID_AA64ISAR1, SPECRES, 40, 4) -+FIELD(ID_AA64ISAR1, BF16, 44, 4) -+FIELD(ID_AA64ISAR1, DGH, 48, 4) -+FIELD(ID_AA64ISAR1, I8MM, 52, 4) - - FIELD(ID_AA64PFR0, EL0, 0, 4) - FIELD(ID_AA64PFR0, EL1, 4, 4) -@@ -1746,11 +1751,18 @@ FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) - FIELD(ID_AA64PFR0, GIC, 24, 4) - FIELD(ID_AA64PFR0, RAS, 28, 4) - FIELD(ID_AA64PFR0, SVE, 32, 4) -+FIELD(ID_AA64PFR0, SEL2, 36, 4) -+FIELD(ID_AA64PFR0, MPAM, 40, 4) -+FIELD(ID_AA64PFR0, AMU, 44, 4) -+FIELD(ID_AA64PFR0, DIT, 44, 4) -+FIELD(ID_AA64PFR0, CSV2, 56, 4) -+FIELD(ID_AA64PFR0, CSV3, 60, 4) - - FIELD(ID_AA64PFR1, BT, 0, 4) - FIELD(ID_AA64PFR1, SBSS, 4, 4) - FIELD(ID_AA64PFR1, MTE, 8, 4) - FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4) -+FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4) - - FIELD(ID_AA64MMFR0, PARANGE, 0, 4) - FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) -@@ -1764,6 +1776,8 @@ FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4) - FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4) - FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4) - FIELD(ID_AA64MMFR0, EXS, 44, 4) -+FIELD(ID_AA64MMFR0, FGT, 56, 4) -+FIELD(ID_AA64MMFR0, ECV, 60, 4) - - FIELD(ID_AA64MMFR1, HAFDBS, 0, 4) - FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4) -@@ -1773,6 +1787,8 @@ FIELD(ID_AA64MMFR1, LO, 16, 4) - FIELD(ID_AA64MMFR1, PAN, 20, 4) - FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) - FIELD(ID_AA64MMFR1, XNX, 28, 4) -+FIELD(ID_AA64MMFR1, TWED, 32, 4) -+FIELD(ID_AA64MMFR1, ETS, 36, 4) - - FIELD(ID_AA64MMFR2, CNP, 0, 4) - FIELD(ID_AA64MMFR2, UAO, 4, 4) -@@ -1799,6 +1815,7 @@ FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) - FIELD(ID_AA64DFR0, PMSVER, 32, 4) - FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) - FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) -+FIELD(ID_AA64DFR0, MUPMU, 48, 4) - - FIELD(ID_DFR0, COPDBG, 0, 4) - FIELD(ID_DFR0, COPSDBG, 4, 4) --- -2.25.1 - diff --git a/target-arm-Update-the-ID-registers-of-Kunpeng-920.patch b/target-arm-Update-the-ID-registers-of-Kunpeng-920.patch deleted file mode 100644 index 586dcbb1998a3e0e910feec54d326f577154711e..0000000000000000000000000000000000000000 --- a/target-arm-Update-the-ID-registers-of-Kunpeng-920.patch +++ /dev/null @@ -1,57 +0,0 @@ -From b54ca94f19a9b22537712638ae05d2095258eb80 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Sat, 19 Sep 2020 09:04:45 +0800 -Subject: [PATCH] target/arm: Update the ID registers of Kunpeng-920 - -The values of some ID registers in Kunpeng-920 are not exactly correct. -Let's update them. The values are read from Kunpeng-920 by calling -read_sysreg_s. - -Signed-off-by: Peng Liang ---- - target/arm/cpu64.c | 27 +++++++++++++++++++++++++-- - 1 file changed, 25 insertions(+), 2 deletions(-) - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 726d123d8e..a1649f8844 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -275,10 +275,33 @@ static void aarch64_kunpeng_920_initfn(Object *obj) - - cpu->midr = 0x480fd010; - cpu->ctr = 0x84448004; -- cpu->isar.regs[ID_AA64PFR0] = 0x11001111; -+ cpu->isar.regs[ID_ISAR0] = 0; -+ cpu->isar.regs[ID_ISAR1] = 0; -+ cpu->isar.regs[ID_ISAR2] = 0; -+ cpu->isar.regs[ID_ISAR3] = 0; -+ cpu->isar.regs[ID_ISAR4] = 0; -+ cpu->isar.regs[ID_ISAR5] = 0; -+ cpu->isar.regs[ID_MMFR0] = 0; -+ cpu->isar.regs[ID_MMFR1] = 0; -+ cpu->isar.regs[ID_MMFR2] = 0; -+ cpu->isar.regs[ID_MMFR3] = 0; -+ cpu->isar.regs[ID_MMFR4] = 0; -+ cpu->isar.regs[MVFR0] = 0; -+ cpu->isar.regs[MVFR1] = 0; -+ cpu->isar.regs[MVFR2] = 0; -+ cpu->isar.regs[ID_DFR0] = 0; -+ cpu->isar.regs[MVFR2] = 0; -+ cpu->isar.regs[MVFR2] = 0; -+ cpu->isar.regs[MVFR2] = 0; -+ cpu->id_pfr0 = 0; -+ cpu->id_pfr1 = 0; -+ cpu->isar.regs[ID_AA64PFR0] = 0x0000010011111111; - cpu->isar.regs[ID_AA64DFR0] = 0x110305408; -- cpu->isar.regs[ID_AA64ISAR0] = 0x10211120; -+ cpu->isar.regs[ID_AA64ISAR0] = 0x0001100010211120; -+ cpu->isar.regs[ID_AA64ISAR1] = 0x00011001; - cpu->isar.regs[ID_AA64MMFR0] = 0x101125; -+ cpu->isar.regs[ID_AA64MMFR1] = 0x10211122; -+ cpu->isar.regs[ID_AA64MMFR2] = 0x00001011; - } - - static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, --- -2.23.0 - diff --git a/target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch b/target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch deleted file mode 100644 index 0e32f85104cb492dba2d0e72aa6138342ef960db..0000000000000000000000000000000000000000 --- a/target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch +++ /dev/null @@ -1,36 +0,0 @@ -From f54cdca97bf86f5ca1df8471bc229b89797b287e Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 14 Feb 2020 17:51:02 +0000 -Subject: [PATCH 04/13] target/arm: Use FIELD macros for clearing ID_DFR0 - PERFMON field -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -We already define FIELD macros for ID_DFR0, so use them in the -one place where we're doing direct bit value manipulation. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Richard Henderson -Signed-off-by: Peter Maydell -Message-id: 20200214175116.9164-8-peter.maydell@linaro.org ---- - target/arm/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index dbd05e01..6ad211b1 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1523,7 +1523,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - #endif - } else { - cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); -- cpu->id_dfr0 &= ~(0xf << 24); -+ cpu->id_dfr0 = FIELD_DP32(cpu->id_dfr0, ID_DFR0, PERFMON, 0); - cpu->pmceid0 = 0; - cpu->pmceid1 = 0; - } --- -2.25.1 - diff --git a/target-arm-Use-float_status-copy-in-sme_fmopa_s.patch b/target-arm-Use-float_status-copy-in-sme_fmopa_s.patch new file mode 100644 index 0000000000000000000000000000000000000000..91143bf81228f5e571e5fc75baadf5e212cb4936 --- /dev/null +++ b/target-arm-Use-float_status-copy-in-sme_fmopa_s.patch @@ -0,0 +1,47 @@ +From 06da30c93dfd4cff013881582d25c3d04456376b Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 2 Jan 2025 10:40:17 +0800 +Subject: [PATCH] target/arm: Use float_status copy in sme_fmopa_s +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 31d93fedf41c24b0badb38cd9317590d1ef74e37 + +We made a copy above because the fp exception flags +are not propagated back to the FPST register, but +then failed to use the copy. + +Cc: qemu-stable@nongnu.org +Fixes: 558e956c719 ("target/arm: Implement FMOPA, FMOPS (non-widening)") +Signed-off-by: Daniyal Khan +Signed-off-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Alex Bennée +Message-id: 20240717060149.204788-2-richard.henderson@linaro.org +[rth: Split from a larger patch] +Signed-off-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Alex Bennée +Signed-off-by: Peter Maydell +Signed-off-by: gubin +--- + target/arm/tcg/sme_helper.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c +index 9a9b1a240c..ae4f39ed02 100644 +--- a/target/arm/tcg/sme_helper.c ++++ b/target/arm/tcg/sme_helper.c +@@ -916,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, + if (pb & 1) { + uint32_t *a = vza_row + H1_4(col); + uint32_t *m = vzm + H1_4(col); +- *a = float32_muladd(n, *m, *a, 0, vst); ++ *a = float32_muladd(n, *m, *a, 0, &fpst); + } + col += 4; + pb >>= 4; +-- +2.41.0.windows.1 + diff --git a/target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch b/target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch deleted file mode 100644 index 455dc843c105743750f7bc573b6fb86f3a5861b8..0000000000000000000000000000000000000000 --- a/target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ad6ce039cab07b6a99ccaa36fbb0043ae85a74c9 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Mon, 21 Sep 2020 22:14:20 +0800 -Subject: [PATCH] target/arm: clear EL2 and EL3 only when kvm is not enabled - -When has_el2 and has_el3 are disabled, which is the default value for -virt machine, QEMU will clear the corresponding field in ID_PFR1_EL1 and -ID_AA64PFR0_EL1 to not expose EL3 and EL2 to guest. Because KVM doesn't -support to emulate ID registers in AArch64 before, it will not take -effect. Hence, clear EL2 and EL3 only when kvm is not enabled for -backwards compatibility. - -Signed-off-by: Peng Liang ---- - target/arm/cpu.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7ae2d3da56..3f62336acf 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1996,7 +1996,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - } - } - -- if (!cpu->has_el3) { -+ if (!cpu->has_el3 && !kvm_enabled()) { - /* If the has_el3 CPU property is disabled then we need to disable the - * feature. - */ -@@ -2037,7 +2037,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - cpu->pmceid1 = 0; - } - -- if (!arm_feature(env, ARM_FEATURE_EL2)) { -+ if (!arm_feature(env, ARM_FEATURE_EL2) && !kvm_enabled()) { - /* Disable the hypervisor feature bits in the processor feature - * registers if we don't have EL2. These are id_pfr1[15:12] and - * id_aa64pfr0_el1[11:8]. --- -2.23.0 - diff --git a/target-arm-convert-isar-regs-to-array.patch b/target-arm-convert-isar-regs-to-array.patch deleted file mode 100644 index 528371212aad42f034db62858b1a2da2cdcba79d..0000000000000000000000000000000000000000 --- a/target-arm-convert-isar-regs-to-array.patch +++ /dev/null @@ -1,1908 +0,0 @@ -From ac92f0f7bbf7cf063ba45fbfaf7e7970dd76544a Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Thu, 6 Aug 2020 16:14:25 +0800 -Subject: [PATCH 1/9] target/arm: convert isar regs to array - -The isar in ARMCPU is a struct, each field of which represents an ID -register. It's not convenient for us to support CPU feature in AArch64. -So let's change it to an array first and add an enum as the index of the -array for convenience. Since we will never access high 32-bits of ID -registers in AArch32, it's harmless to change the ID registers in -AArch32 to 64-bits. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - hw/intc/armv7m_nvic.c | 28 +-- - target/arm/cpu.c | 440 +++++++++++++++++++++-------------------- - target/arm/cpu.h | 178 +++++++++-------- - target/arm/cpu64.c | 158 +++++++-------- - target/arm/helper.c | 54 ++--- - target/arm/internals.h | 15 +- - target/arm/kvm64.c | 68 +++---- - 7 files changed, 478 insertions(+), 463 deletions(-) - -diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c -index f7ef6ad1..5013ec97 100644 ---- a/hw/intc/armv7m_nvic.c -+++ b/hw/intc/armv7m_nvic.c -@@ -1223,29 +1223,29 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) - case 0xd44: /* PFR1. */ - return cpu->id_pfr1; - case 0xd48: /* DFR0. */ -- return cpu->isar.id_dfr0; -+ return cpu->isar.regs[ID_DFR0]; - case 0xd4c: /* AFR0. */ - return cpu->id_afr0; - case 0xd50: /* MMFR0. */ -- return cpu->isar.id_mmfr0; -+ return cpu->isar.regs[ID_MMFR0]; - case 0xd54: /* MMFR1. */ -- return cpu->isar.id_mmfr1; -+ return cpu->isar.regs[ID_MMFR1]; - case 0xd58: /* MMFR2. */ -- return cpu->isar.id_mmfr2; -+ return cpu->isar.regs[ID_MMFR2]; - case 0xd5c: /* MMFR3. */ -- return cpu->isar.id_mmfr3; -+ return cpu->isar.regs[ID_MMFR3]; - case 0xd60: /* ISAR0. */ -- return cpu->isar.id_isar0; -+ return cpu->isar.regs[ID_ISAR0]; - case 0xd64: /* ISAR1. */ -- return cpu->isar.id_isar1; -+ return cpu->isar.regs[ID_ISAR1]; - case 0xd68: /* ISAR2. */ -- return cpu->isar.id_isar2; -+ return cpu->isar.regs[ID_ISAR2]; - case 0xd6c: /* ISAR3. */ -- return cpu->isar.id_isar3; -+ return cpu->isar.regs[ID_ISAR3]; - case 0xd70: /* ISAR4. */ -- return cpu->isar.id_isar4; -+ return cpu->isar.regs[ID_ISAR4]; - case 0xd74: /* ISAR5. */ -- return cpu->isar.id_isar5; -+ return cpu->isar.regs[ID_ISAR5]; - case 0xd78: /* CLIDR */ - return cpu->clidr; - case 0xd7c: /* CTR */ -@@ -1450,11 +1450,11 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) - } - return cpu->env.v7m.fpdscr[attrs.secure]; - case 0xf40: /* MVFR0 */ -- return cpu->isar.mvfr0; -+ return cpu->isar.regs[MVFR0]; - case 0xf44: /* MVFR1 */ -- return cpu->isar.mvfr1; -+ return cpu->isar.regs[MVFR1]; - case 0xf48: /* MVFR2 */ -- return cpu->isar.mvfr2; -+ return cpu->isar.regs[MVFR2]; - default: - bad_offset: - qemu_log_mask(LOG_GUEST_ERROR, "NVIC: Bad read offset 0x%x\n", offset); -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index c3728e3d..5bcdad0c 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -170,9 +170,9 @@ static void arm_cpu_reset(CPUState *s) - g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu); - - env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; -- env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0; -- env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; -- env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; -+ env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.regs[MVFR0]; -+ env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.regs[MVFR1]; -+ env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.regs[MVFR2]; - - cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; - s->halted = cpu->start_powered_off; -@@ -1251,19 +1251,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - unset_feature(env, ARM_FEATURE_VFP3); - unset_feature(env, ARM_FEATURE_VFP4); - -- t = cpu->isar.id_aa64isar1; -+ t = cpu->isar.regs[ID_AA64ISAR1]; - t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0); -- cpu->isar.id_aa64isar1 = t; -+ cpu->isar.regs[ID_AA64ISAR1] = t; - -- t = cpu->isar.id_aa64pfr0; -+ t = cpu->isar.regs[ID_AA64PFR0]; - t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf); -- cpu->isar.id_aa64pfr0 = t; -+ cpu->isar.regs[ID_AA64PFR0] = t; - -- u = cpu->isar.id_isar6; -+ u = cpu->isar.regs[ID_ISAR6]; - u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0); -- cpu->isar.id_isar6 = u; -+ cpu->isar.regs[ID_ISAR6] = u; - -- u = cpu->isar.mvfr0; -+ u = cpu->isar.regs[MVFR0]; - u = FIELD_DP32(u, MVFR0, FPSP, 0); - u = FIELD_DP32(u, MVFR0, FPDP, 0); - u = FIELD_DP32(u, MVFR0, FPTRAP, 0); -@@ -1271,17 +1271,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - u = FIELD_DP32(u, MVFR0, FPSQRT, 0); - u = FIELD_DP32(u, MVFR0, FPSHVEC, 0); - u = FIELD_DP32(u, MVFR0, FPROUND, 0); -- cpu->isar.mvfr0 = u; -+ cpu->isar.regs[MVFR0] = u; - -- u = cpu->isar.mvfr1; -+ u = cpu->isar.regs[MVFR1]; - u = FIELD_DP32(u, MVFR1, FPFTZ, 0); - u = FIELD_DP32(u, MVFR1, FPDNAN, 0); - u = FIELD_DP32(u, MVFR1, FPHP, 0); -- cpu->isar.mvfr1 = u; -+ cpu->isar.regs[MVFR1] = u; - -- u = cpu->isar.mvfr2; -+ u = cpu->isar.regs[MVFR2]; - u = FIELD_DP32(u, MVFR2, FPMISC, 0); -- cpu->isar.mvfr2 = u; -+ cpu->isar.regs[MVFR2] = u; - } - - if (!cpu->has_neon) { -@@ -1290,56 +1290,56 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - - unset_feature(env, ARM_FEATURE_NEON); - -- t = cpu->isar.id_aa64isar0; -+ t = cpu->isar.regs[ID_AA64ISAR0]; - t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0); -- cpu->isar.id_aa64isar0 = t; -+ cpu->isar.regs[ID_AA64ISAR0] = t; - -- t = cpu->isar.id_aa64isar1; -+ t = cpu->isar.regs[ID_AA64ISAR1]; - t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0); -- cpu->isar.id_aa64isar1 = t; -+ cpu->isar.regs[ID_AA64ISAR1] = t; - -- t = cpu->isar.id_aa64pfr0; -+ t = cpu->isar.regs[ID_AA64PFR0]; - t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf); -- cpu->isar.id_aa64pfr0 = t; -+ cpu->isar.regs[ID_AA64PFR0] = t; - -- u = cpu->isar.id_isar5; -+ u = cpu->isar.regs[ID_ISAR5]; - u = FIELD_DP32(u, ID_ISAR5, RDM, 0); - u = FIELD_DP32(u, ID_ISAR5, VCMA, 0); -- cpu->isar.id_isar5 = u; -+ cpu->isar.regs[ID_ISAR5] = u; - -- u = cpu->isar.id_isar6; -+ u = cpu->isar.regs[ID_ISAR6]; - u = FIELD_DP32(u, ID_ISAR6, DP, 0); - u = FIELD_DP32(u, ID_ISAR6, FHM, 0); -- cpu->isar.id_isar6 = u; -+ cpu->isar.regs[ID_ISAR6] = u; - -- u = cpu->isar.mvfr1; -+ u = cpu->isar.regs[MVFR1]; - u = FIELD_DP32(u, MVFR1, SIMDLS, 0); - u = FIELD_DP32(u, MVFR1, SIMDINT, 0); - u = FIELD_DP32(u, MVFR1, SIMDSP, 0); - u = FIELD_DP32(u, MVFR1, SIMDHP, 0); - u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0); -- cpu->isar.mvfr1 = u; -+ cpu->isar.regs[MVFR1] = u; - -- u = cpu->isar.mvfr2; -+ u = cpu->isar.regs[MVFR2]; - u = FIELD_DP32(u, MVFR2, SIMDMISC, 0); -- cpu->isar.mvfr2 = u; -+ cpu->isar.regs[MVFR2] = u; - } - - if (!cpu->has_neon && !cpu->has_vfp) { - uint64_t t; - uint32_t u; - -- t = cpu->isar.id_aa64isar0; -+ t = cpu->isar.regs[ID_AA64ISAR0]; - t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0); -- cpu->isar.id_aa64isar0 = t; -+ cpu->isar.regs[ID_AA64ISAR0] = t; - -- t = cpu->isar.id_aa64isar1; -+ t = cpu->isar.regs[ID_AA64ISAR1]; - t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0); -- cpu->isar.id_aa64isar1 = t; -+ cpu->isar.regs[ID_AA64ISAR1] = t; - -- u = cpu->isar.mvfr0; -+ u = cpu->isar.regs[MVFR0]; - u = FIELD_DP32(u, MVFR0, SIMDREG, 0); -- cpu->isar.mvfr0 = u; -+ cpu->isar.regs[MVFR0] = u; - } - - if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) { -@@ -1347,19 +1347,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - - unset_feature(env, ARM_FEATURE_THUMB_DSP); - -- u = cpu->isar.id_isar1; -+ u = cpu->isar.regs[ID_ISAR1]; - u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1); -- cpu->isar.id_isar1 = u; -+ cpu->isar.regs[ID_ISAR1] = u; - -- u = cpu->isar.id_isar2; -+ u = cpu->isar.regs[ID_ISAR2]; - u = FIELD_DP32(u, ID_ISAR2, MULTU, 1); - u = FIELD_DP32(u, ID_ISAR2, MULTS, 1); -- cpu->isar.id_isar2 = u; -+ cpu->isar.regs[ID_ISAR2] = u; - -- u = cpu->isar.id_isar3; -+ u = cpu->isar.regs[ID_ISAR3]; - u = FIELD_DP32(u, ID_ISAR3, SIMD, 1); - u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0); -- cpu->isar.id_isar3 = u; -+ cpu->isar.regs[ID_ISAR3] = u; - } - - /* Some features automatically imply others: */ -@@ -1499,7 +1499,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. - */ - cpu->id_pfr1 &= ~0xf0; -- cpu->isar.id_aa64pfr0 &= ~0xf000; -+ cpu->isar.regs[ID_AA64PFR0] &= ~0xf000; - } - - if (!cpu->has_el2) { -@@ -1522,9 +1522,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - cpu); - #endif - } else { -- cpu->isar.id_aa64dfr0 = -- FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); -- cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); -+ cpu->isar.regs[ID_AA64DFR0] = -+ FIELD_DP64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, PMUVER, 0); -+ cpu->isar.regs[ID_DFR0] = FIELD_DP32(cpu->isar.regs[ID_DFR0], ID_DFR0, -+ PERFMON, 0); - cpu->pmceid0 = 0; - cpu->pmceid1 = 0; - } -@@ -1534,7 +1535,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) - * registers if we don't have EL2. These are id_pfr1[15:12] and - * id_aa64pfr0_el1[11:8]. - */ -- cpu->isar.id_aa64pfr0 &= ~0xf00; -+ cpu->isar.regs[ID_AA64PFR0] &= ~0xf00; - cpu->id_pfr1 &= ~0xf000; - } - -@@ -1675,13 +1676,15 @@ static void arm926_initfn(Object *obj) - * ARMv5 does not have the ID_ISAR registers, but we can still - * set the field to indicate Jazelle support within QEMU. - */ -- cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); -+ cpu->isar.regs[ID_ISAR1] = FIELD_DP32(cpu->isar.regs[ID_ISAR1], ID_ISAR1, -+ JAZELLE, 1); - /* - * Similarly, we need to set MVFR0 fields to enable double precision - * and short vector support even though ARMv5 doesn't have this register. - */ -- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); -- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); -+ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, -+ FPSHVEC, 1); -+ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, FPDP, 1); - } - - static void arm946_initfn(Object *obj) -@@ -1717,13 +1720,15 @@ static void arm1026_initfn(Object *obj) - * ARMv5 does not have the ID_ISAR registers, but we can still - * set the field to indicate Jazelle support within QEMU. - */ -- cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); -+ cpu->isar.regs[ID_ISAR1] = FIELD_DP32(cpu->isar.regs[ID_ISAR1], ID_ISAR1, -+ JAZELLE, 1); - /* - * Similarly, we need to set MVFR0 fields to enable double precision - * and short vector support even though ARMv5 doesn't have this register. - */ -- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); -- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); -+ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, -+ FPSHVEC, 1); -+ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, FPDP, 1); - - { - /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ -@@ -1756,22 +1761,22 @@ static void arm1136_r2_initfn(Object *obj) - set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); - cpu->midr = 0x4107b362; - cpu->reset_fpsid = 0x410120b4; -- cpu->isar.mvfr0 = 0x11111111; -- cpu->isar.mvfr1 = 0x00000000; -+ cpu->isar.regs[MVFR0] = 0x11111111; -+ cpu->isar.regs[MVFR1] = 0x00000000; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; -- cpu->isar.id_dfr0 = 0x2; -+ cpu->isar.regs[ID_DFR0] = 0x2; - cpu->id_afr0 = 0x3; -- cpu->isar.id_mmfr0 = 0x01130003; -- cpu->isar.id_mmfr1 = 0x10030302; -- cpu->isar.id_mmfr2 = 0x01222110; -- cpu->isar.id_isar0 = 0x00140011; -- cpu->isar.id_isar1 = 0x12002111; -- cpu->isar.id_isar2 = 0x11231111; -- cpu->isar.id_isar3 = 0x01102131; -- cpu->isar.id_isar4 = 0x141; -+ cpu->isar.regs[ID_MMFR0] = 0x01130003; -+ cpu->isar.regs[ID_MMFR1] = 0x10030302; -+ cpu->isar.regs[ID_MMFR2] = 0x01222110; -+ cpu->isar.regs[ID_ISAR0] = 0x00140011; -+ cpu->isar.regs[ID_ISAR1] = 0x12002111; -+ cpu->isar.regs[ID_ISAR2] = 0x11231111; -+ cpu->isar.regs[ID_ISAR3] = 0x01102131; -+ cpu->isar.regs[ID_ISAR4] = 0x141; - cpu->reset_auxcr = 7; - } - -@@ -1788,22 +1793,22 @@ static void arm1136_initfn(Object *obj) - set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); - cpu->midr = 0x4117b363; - cpu->reset_fpsid = 0x410120b4; -- cpu->isar.mvfr0 = 0x11111111; -- cpu->isar.mvfr1 = 0x00000000; -+ cpu->isar.regs[MVFR0] = 0x11111111; -+ cpu->isar.regs[MVFR1] = 0x00000000; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; -- cpu->isar.id_dfr0 = 0x2; -+ cpu->isar.regs[ID_DFR0] = 0x2; - cpu->id_afr0 = 0x3; -- cpu->isar.id_mmfr0 = 0x01130003; -- cpu->isar.id_mmfr1 = 0x10030302; -- cpu->isar.id_mmfr2 = 0x01222110; -- cpu->isar.id_isar0 = 0x00140011; -- cpu->isar.id_isar1 = 0x12002111; -- cpu->isar.id_isar2 = 0x11231111; -- cpu->isar.id_isar3 = 0x01102131; -- cpu->isar.id_isar4 = 0x141; -+ cpu->isar.regs[ID_MMFR0] = 0x01130003; -+ cpu->isar.regs[ID_MMFR1] = 0x10030302; -+ cpu->isar.regs[ID_MMFR2] = 0x01222110; -+ cpu->isar.regs[ID_ISAR0] = 0x00140011; -+ cpu->isar.regs[ID_ISAR1] = 0x12002111; -+ cpu->isar.regs[ID_ISAR2] = 0x11231111; -+ cpu->isar.regs[ID_ISAR3] = 0x01102131; -+ cpu->isar.regs[ID_ISAR4] = 0x141; - cpu->reset_auxcr = 7; - } - -@@ -1821,22 +1826,22 @@ static void arm1176_initfn(Object *obj) - set_feature(&cpu->env, ARM_FEATURE_EL3); - cpu->midr = 0x410fb767; - cpu->reset_fpsid = 0x410120b5; -- cpu->isar.mvfr0 = 0x11111111; -- cpu->isar.mvfr1 = 0x00000000; -+ cpu->isar.regs[MVFR0] = 0x11111111; -+ cpu->isar.regs[MVFR1] = 0x00000000; - cpu->ctr = 0x1dd20d2; - cpu->reset_sctlr = 0x00050078; - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x11; -- cpu->isar.id_dfr0 = 0x33; -+ cpu->isar.regs[ID_DFR0] = 0x33; - cpu->id_afr0 = 0; -- cpu->isar.id_mmfr0 = 0x01130003; -- cpu->isar.id_mmfr1 = 0x10030302; -- cpu->isar.id_mmfr2 = 0x01222100; -- cpu->isar.id_isar0 = 0x0140011; -- cpu->isar.id_isar1 = 0x12002111; -- cpu->isar.id_isar2 = 0x11231121; -- cpu->isar.id_isar3 = 0x01102131; -- cpu->isar.id_isar4 = 0x01141; -+ cpu->isar.regs[ID_MMFR0] = 0x01130003; -+ cpu->isar.regs[ID_MMFR1] = 0x10030302; -+ cpu->isar.regs[ID_MMFR2] = 0x01222100; -+ cpu->isar.regs[ID_ISAR0] = 0x0140011; -+ cpu->isar.regs[ID_ISAR1] = 0x12002111; -+ cpu->isar.regs[ID_ISAR2] = 0x11231121; -+ cpu->isar.regs[ID_ISAR3] = 0x01102131; -+ cpu->isar.regs[ID_ISAR4] = 0x01141; - cpu->reset_auxcr = 7; - } - -@@ -1852,21 +1857,21 @@ static void arm11mpcore_initfn(Object *obj) - set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); - cpu->midr = 0x410fb022; - cpu->reset_fpsid = 0x410120b4; -- cpu->isar.mvfr0 = 0x11111111; -- cpu->isar.mvfr1 = 0x00000000; -+ cpu->isar.regs[MVFR0] = 0x11111111; -+ cpu->isar.regs[MVFR1] = 0x00000000; - cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ - cpu->id_pfr0 = 0x111; - cpu->id_pfr1 = 0x1; -- cpu->isar.id_dfr0 = 0; -+ cpu->isar.regs[ID_DFR0] = 0; - cpu->id_afr0 = 0x2; -- cpu->isar.id_mmfr0 = 0x01100103; -- cpu->isar.id_mmfr1 = 0x10020302; -- cpu->isar.id_mmfr2 = 0x01222000; -- cpu->isar.id_isar0 = 0x00100011; -- cpu->isar.id_isar1 = 0x12002111; -- cpu->isar.id_isar2 = 0x11221011; -- cpu->isar.id_isar3 = 0x01102131; -- cpu->isar.id_isar4 = 0x141; -+ cpu->isar.regs[ID_MMFR0] = 0x01100103; -+ cpu->isar.regs[ID_MMFR1] = 0x10020302; -+ cpu->isar.regs[ID_MMFR2] = 0x01222000; -+ cpu->isar.regs[ID_ISAR0] = 0x00100011; -+ cpu->isar.regs[ID_ISAR1] = 0x12002111; -+ cpu->isar.regs[ID_ISAR2] = 0x11221011; -+ cpu->isar.regs[ID_ISAR3] = 0x01102131; -+ cpu->isar.regs[ID_ISAR4] = 0x141; - cpu->reset_auxcr = 1; - } - -@@ -1889,19 +1894,19 @@ static void cortex_m3_initfn(Object *obj) - cpu->pmsav7_dregion = 8; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000200; -- cpu->isar.id_dfr0 = 0x00100000; -+ cpu->isar.regs[ID_DFR0] = 0x00100000; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x00000030; -- cpu->isar.id_mmfr1 = 0x00000000; -- cpu->isar.id_mmfr2 = 0x00000000; -- cpu->isar.id_mmfr3 = 0x00000000; -- cpu->isar.id_isar0 = 0x01141110; -- cpu->isar.id_isar1 = 0x02111000; -- cpu->isar.id_isar2 = 0x21112231; -- cpu->isar.id_isar3 = 0x01111110; -- cpu->isar.id_isar4 = 0x01310102; -- cpu->isar.id_isar5 = 0x00000000; -- cpu->isar.id_isar6 = 0x00000000; -+ cpu->isar.regs[ID_MMFR0] = 0x00000030; -+ cpu->isar.regs[ID_MMFR1] = 0x00000000; -+ cpu->isar.regs[ID_MMFR2] = 0x00000000; -+ cpu->isar.regs[ID_MMFR3] = 0x00000000; -+ cpu->isar.regs[ID_ISAR0] = 0x01141110; -+ cpu->isar.regs[ID_ISAR1] = 0x02111000; -+ cpu->isar.regs[ID_ISAR2] = 0x21112231; -+ cpu->isar.regs[ID_ISAR3] = 0x01111110; -+ cpu->isar.regs[ID_ISAR4] = 0x01310102; -+ cpu->isar.regs[ID_ISAR5] = 0x00000000; -+ cpu->isar.regs[ID_ISAR6] = 0x00000000; - } - - static void cortex_m4_initfn(Object *obj) -@@ -1915,24 +1920,24 @@ static void cortex_m4_initfn(Object *obj) - set_feature(&cpu->env, ARM_FEATURE_VFP4); - cpu->midr = 0x410fc240; /* r0p0 */ - cpu->pmsav7_dregion = 8; -- cpu->isar.mvfr0 = 0x10110021; -- cpu->isar.mvfr1 = 0x11000011; -- cpu->isar.mvfr2 = 0x00000000; -+ cpu->isar.regs[MVFR0] = 0x10110021; -+ cpu->isar.regs[MVFR1] = 0x11000011; -+ cpu->isar.regs[MVFR2] = 0x00000000; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000200; -- cpu->isar.id_dfr0 = 0x00100000; -+ cpu->isar.regs[ID_DFR0] = 0x00100000; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x00000030; -- cpu->isar.id_mmfr1 = 0x00000000; -- cpu->isar.id_mmfr2 = 0x00000000; -- cpu->isar.id_mmfr3 = 0x00000000; -- cpu->isar.id_isar0 = 0x01141110; -- cpu->isar.id_isar1 = 0x02111000; -- cpu->isar.id_isar2 = 0x21112231; -- cpu->isar.id_isar3 = 0x01111110; -- cpu->isar.id_isar4 = 0x01310102; -- cpu->isar.id_isar5 = 0x00000000; -- cpu->isar.id_isar6 = 0x00000000; -+ cpu->isar.regs[ID_MMFR0] = 0x00000030; -+ cpu->isar.regs[ID_MMFR1] = 0x00000000; -+ cpu->isar.regs[ID_MMFR2] = 0x00000000; -+ cpu->isar.regs[ID_MMFR3] = 0x00000000; -+ cpu->isar.regs[ID_ISAR0] = 0x01141110; -+ cpu->isar.regs[ID_ISAR1] = 0x02111000; -+ cpu->isar.regs[ID_ISAR2] = 0x21112231; -+ cpu->isar.regs[ID_ISAR3] = 0x01111110; -+ cpu->isar.regs[ID_ISAR4] = 0x01310102; -+ cpu->isar.regs[ID_ISAR5] = 0x00000000; -+ cpu->isar.regs[ID_ISAR6] = 0x00000000; - } - - static void cortex_m33_initfn(Object *obj) -@@ -1948,24 +1953,24 @@ static void cortex_m33_initfn(Object *obj) - cpu->midr = 0x410fd213; /* r0p3 */ - cpu->pmsav7_dregion = 16; - cpu->sau_sregion = 8; -- cpu->isar.mvfr0 = 0x10110021; -- cpu->isar.mvfr1 = 0x11000011; -- cpu->isar.mvfr2 = 0x00000040; -+ cpu->isar.regs[MVFR0] = 0x10110021; -+ cpu->isar.regs[MVFR1] = 0x11000011; -+ cpu->isar.regs[MVFR2] = 0x00000040; - cpu->id_pfr0 = 0x00000030; - cpu->id_pfr1 = 0x00000210; -- cpu->isar.id_dfr0 = 0x00200000; -+ cpu->isar.regs[ID_DFR0] = 0x00200000; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x00101F40; -- cpu->isar.id_mmfr1 = 0x00000000; -- cpu->isar.id_mmfr2 = 0x01000000; -- cpu->isar.id_mmfr3 = 0x00000000; -- cpu->isar.id_isar0 = 0x01101110; -- cpu->isar.id_isar1 = 0x02212000; -- cpu->isar.id_isar2 = 0x20232232; -- cpu->isar.id_isar3 = 0x01111131; -- cpu->isar.id_isar4 = 0x01310132; -- cpu->isar.id_isar5 = 0x00000000; -- cpu->isar.id_isar6 = 0x00000000; -+ cpu->isar.regs[ID_MMFR0] = 0x00101F40; -+ cpu->isar.regs[ID_MMFR1] = 0x00000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01000000; -+ cpu->isar.regs[ID_MMFR3] = 0x00000000; -+ cpu->isar.regs[ID_ISAR0] = 0x01101110; -+ cpu->isar.regs[ID_ISAR1] = 0x02212000; -+ cpu->isar.regs[ID_ISAR2] = 0x20232232; -+ cpu->isar.regs[ID_ISAR3] = 0x01111131; -+ cpu->isar.regs[ID_ISAR4] = 0x01310132; -+ cpu->isar.regs[ID_ISAR5] = 0x00000000; -+ cpu->isar.regs[ID_ISAR6] = 0x00000000; - cpu->clidr = 0x00000000; - cpu->ctr = 0x8000c000; - } -@@ -2004,19 +2009,19 @@ static void cortex_r5_initfn(Object *obj) - cpu->midr = 0x411fc153; /* r1p3 */ - cpu->id_pfr0 = 0x0131; - cpu->id_pfr1 = 0x001; -- cpu->isar.id_dfr0 = 0x010400; -+ cpu->isar.regs[ID_DFR0] = 0x010400; - cpu->id_afr0 = 0x0; -- cpu->isar.id_mmfr0 = 0x0210030; -- cpu->isar.id_mmfr1 = 0x00000000; -- cpu->isar.id_mmfr2 = 0x01200000; -- cpu->isar.id_mmfr3 = 0x0211; -- cpu->isar.id_isar0 = 0x02101111; -- cpu->isar.id_isar1 = 0x13112111; -- cpu->isar.id_isar2 = 0x21232141; -- cpu->isar.id_isar3 = 0x01112131; -- cpu->isar.id_isar4 = 0x0010142; -- cpu->isar.id_isar5 = 0x0; -- cpu->isar.id_isar6 = 0x0; -+ cpu->isar.regs[ID_MMFR0] = 0x0210030; -+ cpu->isar.regs[ID_MMFR1] = 0x00000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01200000; -+ cpu->isar.regs[ID_MMFR3] = 0x0211; -+ cpu->isar.regs[ID_ISAR0] = 0x02101111; -+ cpu->isar.regs[ID_ISAR1] = 0x13112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232141; -+ cpu->isar.regs[ID_ISAR3] = 0x01112131; -+ cpu->isar.regs[ID_ISAR4] = 0x0010142; -+ cpu->isar.regs[ID_ISAR5] = 0x0; -+ cpu->isar.regs[ID_ISAR6] = 0x0; - cpu->mp_is_up = true; - cpu->pmsav7_dregion = 16; - define_arm_cp_regs(cpu, cortexr5_cp_reginfo); -@@ -2028,8 +2033,8 @@ static void cortex_r5f_initfn(Object *obj) - - cortex_r5_initfn(obj); - set_feature(&cpu->env, ARM_FEATURE_VFP3); -- cpu->isar.mvfr0 = 0x10110221; -- cpu->isar.mvfr1 = 0x00000011; -+ cpu->isar.regs[MVFR0] = 0x10110221; -+ cpu->isar.regs[MVFR1] = 0x00000011; - } - - static const ARMCPRegInfo cortexa8_cp_reginfo[] = { -@@ -2053,24 +2058,24 @@ static void cortex_a8_initfn(Object *obj) - set_feature(&cpu->env, ARM_FEATURE_EL3); - cpu->midr = 0x410fc080; - cpu->reset_fpsid = 0x410330c0; -- cpu->isar.mvfr0 = 0x11110222; -- cpu->isar.mvfr1 = 0x00011111; -+ cpu->isar.regs[MVFR0] = 0x11110222; -+ cpu->isar.regs[MVFR1] = 0x00011111; - cpu->ctr = 0x82048004; - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x1031; - cpu->id_pfr1 = 0x11; -- cpu->isar.id_dfr0 = 0x400; -+ cpu->isar.regs[ID_DFR0] = 0x400; - cpu->id_afr0 = 0; -- cpu->isar.id_mmfr0 = 0x31100003; -- cpu->isar.id_mmfr1 = 0x20000000; -- cpu->isar.id_mmfr2 = 0x01202000; -- cpu->isar.id_mmfr3 = 0x11; -- cpu->isar.id_isar0 = 0x00101111; -- cpu->isar.id_isar1 = 0x12112111; -- cpu->isar.id_isar2 = 0x21232031; -- cpu->isar.id_isar3 = 0x11112131; -- cpu->isar.id_isar4 = 0x00111142; -- cpu->isar.dbgdidr = 0x15141000; -+ cpu->isar.regs[ID_MMFR0] = 0x31100003; -+ cpu->isar.regs[ID_MMFR1] = 0x20000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01202000; -+ cpu->isar.regs[ID_MMFR3] = 0x11; -+ cpu->isar.regs[ID_ISAR0] = 0x00101111; -+ cpu->isar.regs[ID_ISAR1] = 0x12112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232031; -+ cpu->isar.regs[ID_ISAR3] = 0x11112131; -+ cpu->isar.regs[ID_ISAR4] = 0x00111142; -+ cpu->isar.regs[DBGDIDR] = 0x15141000; - cpu->clidr = (1 << 27) | (2 << 24) | 3; - cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ - cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ -@@ -2126,24 +2131,24 @@ static void cortex_a9_initfn(Object *obj) - set_feature(&cpu->env, ARM_FEATURE_CBAR); - cpu->midr = 0x410fc090; - cpu->reset_fpsid = 0x41033090; -- cpu->isar.mvfr0 = 0x11110222; -- cpu->isar.mvfr1 = 0x01111111; -+ cpu->isar.regs[MVFR0] = 0x11110222; -+ cpu->isar.regs[MVFR1] = 0x01111111; - cpu->ctr = 0x80038003; - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x1031; - cpu->id_pfr1 = 0x11; -- cpu->isar.id_dfr0 = 0x000; -+ cpu->isar.regs[ID_DFR0] = 0x000; - cpu->id_afr0 = 0; -- cpu->isar.id_mmfr0 = 0x00100103; -- cpu->isar.id_mmfr1 = 0x20000000; -- cpu->isar.id_mmfr2 = 0x01230000; -- cpu->isar.id_mmfr3 = 0x00002111; -- cpu->isar.id_isar0 = 0x00101111; -- cpu->isar.id_isar1 = 0x13112111; -- cpu->isar.id_isar2 = 0x21232041; -- cpu->isar.id_isar3 = 0x11112131; -- cpu->isar.id_isar4 = 0x00111142; -- cpu->isar.dbgdidr = 0x35141000; -+ cpu->isar.regs[ID_MMFR0] = 0x00100103; -+ cpu->isar.regs[ID_MMFR1] = 0x20000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01230000; -+ cpu->isar.regs[ID_MMFR3] = 0x00002111; -+ cpu->isar.regs[ID_ISAR0] = 0x00101111; -+ cpu->isar.regs[ID_ISAR1] = 0x13112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232041; -+ cpu->isar.regs[ID_ISAR3] = 0x11112131; -+ cpu->isar.regs[ID_ISAR4] = 0x00111142; -+ cpu->isar.regs[DBGDIDR] = 0x35141000; - cpu->clidr = (1 << 27) | (1 << 24) | 3; - cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ - cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ -@@ -2191,27 +2196,27 @@ static void cortex_a7_initfn(Object *obj) - cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7; - cpu->midr = 0x410fc075; - cpu->reset_fpsid = 0x41023075; -- cpu->isar.mvfr0 = 0x10110222; -- cpu->isar.mvfr1 = 0x11111111; -+ cpu->isar.regs[MVFR0] = 0x10110222; -+ cpu->isar.regs[MVFR1] = 0x11111111; - cpu->ctr = 0x84448003; - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x00001131; - cpu->id_pfr1 = 0x00011011; -- cpu->isar.id_dfr0 = 0x02010555; -+ cpu->isar.regs[ID_DFR0] = 0x02010555; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x10101105; -- cpu->isar.id_mmfr1 = 0x40000000; -- cpu->isar.id_mmfr2 = 0x01240000; -- cpu->isar.id_mmfr3 = 0x02102211; -+ cpu->isar.regs[ID_MMFR0] = 0x10101105; -+ cpu->isar.regs[ID_MMFR1] = 0x40000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01240000; -+ cpu->isar.regs[ID_MMFR3] = 0x02102211; - /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but - * table 4-41 gives 0x02101110, which includes the arm div insns. - */ -- cpu->isar.id_isar0 = 0x02101110; -- cpu->isar.id_isar1 = 0x13112111; -- cpu->isar.id_isar2 = 0x21232041; -- cpu->isar.id_isar3 = 0x11112131; -- cpu->isar.id_isar4 = 0x10011142; -- cpu->isar.dbgdidr = 0x3515f005; -+ cpu->isar.regs[ID_ISAR0] = 0x02101110; -+ cpu->isar.regs[ID_ISAR1] = 0x13112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232041; -+ cpu->isar.regs[ID_ISAR3] = 0x11112131; -+ cpu->isar.regs[ID_ISAR4] = 0x10011142; -+ cpu->isar.regs[DBGDIDR] = 0x3515f005; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ -@@ -2237,24 +2242,24 @@ static void cortex_a15_initfn(Object *obj) - cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; - cpu->midr = 0x412fc0f1; - cpu->reset_fpsid = 0x410430f0; -- cpu->isar.mvfr0 = 0x10110222; -- cpu->isar.mvfr1 = 0x11111111; -+ cpu->isar.regs[MVFR0] = 0x10110222; -+ cpu->isar.regs[MVFR1] = 0x11111111; - cpu->ctr = 0x8444c004; - cpu->reset_sctlr = 0x00c50078; - cpu->id_pfr0 = 0x00001131; - cpu->id_pfr1 = 0x00011011; -- cpu->isar.id_dfr0 = 0x02010555; -+ cpu->isar.regs[ID_DFR0] = 0x02010555; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x10201105; -- cpu->isar.id_mmfr1 = 0x20000000; -- cpu->isar.id_mmfr2 = 0x01240000; -- cpu->isar.id_mmfr3 = 0x02102211; -- cpu->isar.id_isar0 = 0x02101110; -- cpu->isar.id_isar1 = 0x13112111; -- cpu->isar.id_isar2 = 0x21232041; -- cpu->isar.id_isar3 = 0x11112131; -- cpu->isar.id_isar4 = 0x10011142; -- cpu->isar.dbgdidr = 0x3515f021; -+ cpu->isar.regs[ID_MMFR0] = 0x10201105; -+ cpu->isar.regs[ID_MMFR1] = 0x20000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01240000; -+ cpu->isar.regs[ID_MMFR3] = 0x02102211; -+ cpu->isar.regs[ID_ISAR0] = 0x02101110; -+ cpu->isar.regs[ID_ISAR1] = 0x13112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232041; -+ cpu->isar.regs[ID_ISAR3] = 0x11112131; -+ cpu->isar.regs[ID_ISAR4] = 0x10011142; -+ cpu->isar.regs[DBGDIDR] = 0x3515f021; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ -@@ -2447,7 +2452,8 @@ static void arm_max_initfn(Object *obj) - cortex_a15_initfn(obj); - - /* old-style VFP short-vector support */ -- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); -+ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, -+ FPSHVEC, 1); - - #ifdef CONFIG_USER_ONLY - /* We don't set these in system emulation mode for the moment, -@@ -2458,39 +2464,39 @@ static void arm_max_initfn(Object *obj) - { - uint32_t t; - -- t = cpu->isar.id_isar5; -+ t = cpu->isar.regs[ID_ISAR5]; - t = FIELD_DP32(t, ID_ISAR5, AES, 2); - t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); - t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); - t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); - t = FIELD_DP32(t, ID_ISAR5, RDM, 1); - t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); -- cpu->isar.id_isar5 = t; -+ cpu->isar.regs[ID_ISAR5] = t; - -- t = cpu->isar.id_isar6; -+ t = cpu->isar.regs[ID_ISAR6]; - t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); - t = FIELD_DP32(t, ID_ISAR6, DP, 1); - t = FIELD_DP32(t, ID_ISAR6, FHM, 1); - t = FIELD_DP32(t, ID_ISAR6, SB, 1); - t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); -- cpu->isar.id_isar6 = t; -+ cpu->isar.regs[ID_ISAR6] = t; - -- t = cpu->isar.mvfr1; -+ t = cpu->isar.regs[MVFR1]; - t = FIELD_DP32(t, MVFR1, FPHP, 2); /* v8.0 FP support */ -- cpu->isar.mvfr1 = t; -+ cpu->isar.regs[MVFR1] = t; - -- t = cpu->isar.mvfr2; -+ t = cpu->isar.regs[MVFR2]; - t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */ - t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ -- cpu->isar.mvfr2 = t; -+ cpu->isar.regs[MVFR2] = t; - -- t = cpu->isar.id_mmfr3; -+ t = cpu->isar.regs[ID_MMFR3]; - t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ -- cpu->isar.id_mmfr3 = t; -+ cpu->isar.regs[ID_MMFR3] = t; - -- t = cpu->isar.id_mmfr4; -+ t = cpu->isar.regs[ID_MMFR4]; - t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ -- cpu->isar.id_mmfr4 = t; -+ cpu->isar.regs[ID_MMFR4] = t; - } - #endif - } -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 56d8cd8c..7bb481fb 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -63,6 +63,37 @@ - #define ARMV7M_EXCP_PENDSV 14 - #define ARMV7M_EXCP_SYSTICK 15 - -+typedef enum CPUIDReg { -+ MIDR_EL1, -+ ID_ISAR0, -+ ID_ISAR1, -+ ID_ISAR2, -+ ID_ISAR3, -+ ID_ISAR4, -+ ID_ISAR5, -+ ID_ISAR6, -+ ID_MMFR0, -+ ID_MMFR1, -+ ID_MMFR2, -+ ID_MMFR3, -+ ID_MMFR4, -+ ID_AA64ISAR0, -+ ID_AA64ISAR1, -+ ID_AA64PFR0, -+ ID_AA64PFR1, -+ ID_AA64MMFR0, -+ ID_AA64MMFR1, -+ ID_AA64MMFR2, -+ ID_AA64DFR0, -+ ID_AA64DFR1, -+ ID_DFR0, -+ MVFR0, -+ MVFR1, -+ MVFR2, -+ DBGDIDR, -+ ID_MAX, -+} CPUIDReg; -+ - /* For M profile, some registers are banked secure vs non-secure; - * these are represented as a 2-element array where the first element - * is the non-secure copy and the second is the secure copy. -@@ -855,32 +886,7 @@ struct ARMCPU { - * field by reading the value from the KVM vCPU. - */ - struct ARMISARegisters { -- uint32_t id_isar0; -- uint32_t id_isar1; -- uint32_t id_isar2; -- uint32_t id_isar3; -- uint32_t id_isar4; -- uint32_t id_isar5; -- uint32_t id_isar6; -- uint32_t id_mmfr0; -- uint32_t id_mmfr1; -- uint32_t id_mmfr2; -- uint32_t id_mmfr3; -- uint32_t id_mmfr4; -- uint32_t mvfr0; -- uint32_t mvfr1; -- uint32_t mvfr2; -- uint32_t id_dfr0; -- uint32_t dbgdidr; -- uint64_t id_aa64isar0; -- uint64_t id_aa64isar1; -- uint64_t id_aa64pfr0; -- uint64_t id_aa64pfr1; -- uint64_t id_aa64mmfr0; -- uint64_t id_aa64mmfr1; -- uint64_t id_aa64mmfr2; -- uint64_t id_aa64dfr0; -- uint64_t id_aa64dfr1; -+ uint64_t regs[ID_MAX]; - } isar; - uint32_t midr; - uint32_t revidr; -@@ -3358,77 +3364,77 @@ extern const uint64_t pred_esz_masks[4]; - */ - static inline bool isar_feature_thumb_div(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR0], ID_ISAR0, DIVIDE) != 0; - } - - static inline bool isar_feature_arm_div(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; -+ return FIELD_EX32(id->regs[ID_ISAR0], ID_ISAR0, DIVIDE) > 1; - } - - static inline bool isar_feature_jazelle(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR1], ID_ISAR1, JAZELLE) != 0; - } - - static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, AES) != 0; - } - - static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; -+ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, AES) > 1; - } - - static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, SHA1) != 0; - } - - static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, SHA2) != 0; - } - - static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, CRC32) != 0; - } - - static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, RDM) != 0; - } - - static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, VCMA) != 0; - } - - static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, JSCVT) != 0; - } - - static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, DP) != 0; - } - - static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, FHM) != 0; - } - - static inline bool isar_feature_aa32_sb(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, SB) != 0; - } - - static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; -+ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, SPECRES) != 0; - } - - static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) -@@ -3438,24 +3444,24 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) - * the ARMv8.2-FP16 extension is implemented for aa32 mode. - * At which point we can properly set and check MVFR1.FPHP. - */ -- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; -+ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, FP) == 1; - } - - static inline bool isar_feature_aa32_fp_d32(const ARMISARegisters *id) - { - /* Return true if D16-D31 are implemented */ -- return FIELD_EX64(id->mvfr0, MVFR0, SIMDREG) >= 2; -+ return FIELD_EX64(id->regs[MVFR0], MVFR0, SIMDREG) >= 2; - } - - static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0; -+ return FIELD_EX64(id->regs[MVFR0], MVFR0, FPSHVEC) > 0; - } - - static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id) - { - /* Return true if CPU supports double precision floating point */ -- return FIELD_EX64(id->mvfr0, MVFR0, FPDP) > 0; -+ return FIELD_EX64(id->regs[MVFR0], MVFR0, FPDP) > 0; - } - - /* -@@ -3465,49 +3471,49 @@ static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id) - */ - static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 0; -+ return FIELD_EX64(id->regs[MVFR1], MVFR1, FPHP) > 0; - } - - static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 1; -+ return FIELD_EX64(id->regs[MVFR1], MVFR1, FPHP) > 1; - } - - static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 1; -+ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 1; - } - - static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 2; -+ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 2; - } - - static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 3; -+ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 3; - } - - static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) - { -- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4; -+ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 4; - } - - static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; -+ return FIELD_EX32(id->regs[ID_MMFR3], ID_MMFR3, PAN) != 0; - } - - static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) - { -- return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; -+ return FIELD_EX32(id->regs[ID_MMFR3], ID_MMFR3, PAN) >= 2; - } - - static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) - { - /* 0xf means "non-standard IMPDEF PMU" */ -- return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && -- FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; -+ return FIELD_EX32(id->regs[ID_DFR0], ID_DFR0, PERFMON) >= 4 && -+ FIELD_EX32(id->regs[ID_DFR0], ID_DFR0, PERFMON) != 0xf; - } - - /* -@@ -3515,92 +3521,92 @@ static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) - */ - static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, AES) != 0; - } - - static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, AES) > 1; - } - - static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA1) != 0; - } - - static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA2) != 0; - } - - static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA2) > 1; - } - - static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, CRC32) != 0; - } - - static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, ATOMIC) != 0; - } - - static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, RDM) != 0; - } - - static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA3) != 0; - } - - static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SM3) != 0; - } - - static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SM4) != 0; - } - - static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, DP) != 0; - } - - static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, FHM) != 0; - } - - static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, TS) != 0; - } - - static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, TS) >= 2; - } - - static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, RNDR) != 0; - } - - static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, JSCVT) != 0; - } - - static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, FCMA) != 0; - } - - static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) -@@ -3611,7 +3617,7 @@ static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) - * defined algorithms, and thus API+GPI, and this predicate controls - * migration of the 128-bit keys. - */ -- return (id->id_aa64isar1 & -+ return (id->regs[ID_AA64ISAR1] & - (FIELD_DP64(0, ID_AA64ISAR1, APA, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, API, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, GPA, 0xf) | -@@ -3620,59 +3626,59 @@ static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) - - static inline bool isar_feature_aa64_sb(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, SB) != 0; - } - - static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, SPECRES) != 0; - } - - static inline bool isar_feature_aa64_frint(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0; -+ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, FRINTTS) != 0; - } - - static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) - { - /* We always set the AdvSIMD and FP fields identically wrt FP16. */ -- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; -+ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, FP) == 1; - } - - static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2; -+ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, EL0) >= 2; - } - - static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; -+ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, SVE) != 0; - } - - static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; -+ return FIELD_EX64(id->regs[ID_AA64MMFR1], ID_AA64MMFR1, LO) != 0; - } - - static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; -+ return FIELD_EX64(id->regs[ID_AA64MMFR1], ID_AA64MMFR1, PAN) != 0; - } - - static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; -+ return FIELD_EX64(id->regs[ID_AA64MMFR1], ID_AA64MMFR1, PAN) >= 2; - } - - static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; -+ return FIELD_EX64(id->regs[ID_AA64PFR1], ID_AA64PFR1, BT) != 0; - } - - static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) - { -- return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && -- FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; -+ return FIELD_EX64(id->regs[ID_AA64DFR0], ID_AA64DFR0, PMUVER) >= 4 && -+ FIELD_EX64(id->regs[ID_AA64DFR0], ID_AA64DFR0, PMUVER) != 0xf; - } - - static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index d450b8c8..fe648752 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -116,31 +116,31 @@ static void aarch64_a57_initfn(Object *obj) - cpu->midr = 0x411fd070; - cpu->revidr = 0x00000000; - cpu->reset_fpsid = 0x41034070; -- cpu->isar.mvfr0 = 0x10110222; -- cpu->isar.mvfr1 = 0x12111111; -- cpu->isar.mvfr2 = 0x00000043; -+ cpu->isar.regs[MVFR0] = 0x10110222; -+ cpu->isar.regs[MVFR1] = 0x12111111; -+ cpu->isar.regs[MVFR2] = 0x00000043; - cpu->ctr = 0x8444c004; - cpu->reset_sctlr = 0x00c50838; - cpu->id_pfr0 = 0x00000131; - cpu->id_pfr1 = 0x00011011; -- cpu->isar.id_dfr0 = 0x03010066; -+ cpu->isar.regs[ID_DFR0] = 0x03010066; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x10101105; -- cpu->isar.id_mmfr1 = 0x40000000; -- cpu->isar.id_mmfr2 = 0x01260000; -- cpu->isar.id_mmfr3 = 0x02102211; -- cpu->isar.id_isar0 = 0x02101110; -- cpu->isar.id_isar1 = 0x13112111; -- cpu->isar.id_isar2 = 0x21232042; -- cpu->isar.id_isar3 = 0x01112131; -- cpu->isar.id_isar4 = 0x00011142; -- cpu->isar.id_isar5 = 0x00011121; -- cpu->isar.id_isar6 = 0; -- cpu->isar.id_aa64pfr0 = 0x00002222; -- cpu->isar.id_aa64dfr0 = 0x10305106; -- cpu->isar.id_aa64isar0 = 0x00011120; -- cpu->isar.id_aa64mmfr0 = 0x00001124; -- cpu->isar.dbgdidr = 0x3516d000; -+ cpu->isar.regs[ID_MMFR0] = 0x10101105; -+ cpu->isar.regs[ID_MMFR1] = 0x40000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01260000; -+ cpu->isar.regs[ID_MMFR3] = 0x02102211; -+ cpu->isar.regs[ID_ISAR0] = 0x02101110; -+ cpu->isar.regs[ID_ISAR1] = 0x13112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232042; -+ cpu->isar.regs[ID_ISAR3] = 0x01112131; -+ cpu->isar.regs[ID_ISAR4] = 0x00011142; -+ cpu->isar.regs[ID_ISAR5] = 0x00011121; -+ cpu->isar.regs[ID_ISAR6] = 0; -+ cpu->isar.regs[ID_AA64PFR0] = 0x00002222; -+ cpu->isar.regs[ID_AA64DFR0] = 0x10305106; -+ cpu->isar.regs[ID_AA64ISAR0] = 0x00011120; -+ cpu->isar.regs[ID_AA64MMFR0] = 0x00001124; -+ cpu->isar.regs[DBGDIDR] = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ -@@ -170,31 +170,31 @@ static void aarch64_a53_initfn(Object *obj) - cpu->midr = 0x410fd034; - cpu->revidr = 0x00000000; - cpu->reset_fpsid = 0x41034070; -- cpu->isar.mvfr0 = 0x10110222; -- cpu->isar.mvfr1 = 0x12111111; -- cpu->isar.mvfr2 = 0x00000043; -+ cpu->isar.regs[MVFR0] = 0x10110222; -+ cpu->isar.regs[MVFR1] = 0x12111111; -+ cpu->isar.regs[MVFR2] = 0x00000043; - cpu->ctr = 0x84448004; /* L1Ip = VIPT */ - cpu->reset_sctlr = 0x00c50838; - cpu->id_pfr0 = 0x00000131; - cpu->id_pfr1 = 0x00011011; -- cpu->isar.id_dfr0 = 0x03010066; -+ cpu->isar.regs[ID_DFR0] = 0x03010066; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x10101105; -- cpu->isar.id_mmfr1 = 0x40000000; -- cpu->isar.id_mmfr2 = 0x01260000; -- cpu->isar.id_mmfr3 = 0x02102211; -- cpu->isar.id_isar0 = 0x02101110; -- cpu->isar.id_isar1 = 0x13112111; -- cpu->isar.id_isar2 = 0x21232042; -- cpu->isar.id_isar3 = 0x01112131; -- cpu->isar.id_isar4 = 0x00011142; -- cpu->isar.id_isar5 = 0x00011121; -- cpu->isar.id_isar6 = 0; -- cpu->isar.id_aa64pfr0 = 0x00002222; -- cpu->isar.id_aa64dfr0 = 0x10305106; -- cpu->isar.id_aa64isar0 = 0x00011120; -- cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ -- cpu->isar.dbgdidr = 0x3516d000; -+ cpu->isar.regs[ID_MMFR0] = 0x10101105; -+ cpu->isar.regs[ID_MMFR1] = 0x40000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01260000; -+ cpu->isar.regs[ID_MMFR3] = 0x02102211; -+ cpu->isar.regs[ID_ISAR0] = 0x02101110; -+ cpu->isar.regs[ID_ISAR1] = 0x13112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232042; -+ cpu->isar.regs[ID_ISAR3] = 0x01112131; -+ cpu->isar.regs[ID_ISAR4] = 0x00011142; -+ cpu->isar.regs[ID_ISAR5] = 0x00011121; -+ cpu->isar.regs[ID_ISAR6] = 0; -+ cpu->isar.regs[ID_AA64PFR0] = 0x00002222; -+ cpu->isar.regs[ID_AA64DFR0] = 0x10305106; -+ cpu->isar.regs[ID_AA64ISAR0] = 0x00011120; -+ cpu->isar.regs[ID_AA64MMFR0] = 0x00001122; /* 40 bit physical addr */ -+ cpu->isar.regs[DBGDIDR] = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ -@@ -224,30 +224,30 @@ static void aarch64_a72_initfn(Object *obj) - cpu->midr = 0x410fd083; - cpu->revidr = 0x00000000; - cpu->reset_fpsid = 0x41034080; -- cpu->isar.mvfr0 = 0x10110222; -- cpu->isar.mvfr1 = 0x12111111; -- cpu->isar.mvfr2 = 0x00000043; -+ cpu->isar.regs[MVFR0] = 0x10110222; -+ cpu->isar.regs[MVFR1] = 0x12111111; -+ cpu->isar.regs[MVFR2] = 0x00000043; - cpu->ctr = 0x8444c004; - cpu->reset_sctlr = 0x00c50838; - cpu->id_pfr0 = 0x00000131; - cpu->id_pfr1 = 0x00011011; -- cpu->isar.id_dfr0 = 0x03010066; -+ cpu->isar.regs[ID_DFR0] = 0x03010066; - cpu->id_afr0 = 0x00000000; -- cpu->isar.id_mmfr0 = 0x10201105; -- cpu->isar.id_mmfr1 = 0x40000000; -- cpu->isar.id_mmfr2 = 0x01260000; -- cpu->isar.id_mmfr3 = 0x02102211; -- cpu->isar.id_isar0 = 0x02101110; -- cpu->isar.id_isar1 = 0x13112111; -- cpu->isar.id_isar2 = 0x21232042; -- cpu->isar.id_isar3 = 0x01112131; -- cpu->isar.id_isar4 = 0x00011142; -- cpu->isar.id_isar5 = 0x00011121; -- cpu->isar.id_aa64pfr0 = 0x00002222; -- cpu->isar.id_aa64dfr0 = 0x10305106; -- cpu->isar.id_aa64isar0 = 0x00011120; -- cpu->isar.id_aa64mmfr0 = 0x00001124; -- cpu->isar.dbgdidr = 0x3516d000; -+ cpu->isar.regs[ID_MMFR0] = 0x10201105; -+ cpu->isar.regs[ID_MMFR1] = 0x40000000; -+ cpu->isar.regs[ID_MMFR2] = 0x01260000; -+ cpu->isar.regs[ID_MMFR3] = 0x02102211; -+ cpu->isar.regs[ID_ISAR0] = 0x02101110; -+ cpu->isar.regs[ID_ISAR1] = 0x13112111; -+ cpu->isar.regs[ID_ISAR2] = 0x21232042; -+ cpu->isar.regs[ID_ISAR3] = 0x01112131; -+ cpu->isar.regs[ID_ISAR4] = 0x00011142; -+ cpu->isar.regs[ID_ISAR5] = 0x00011121; -+ cpu->isar.regs[ID_AA64PFR0] = 0x00002222; -+ cpu->isar.regs[ID_AA64DFR0] = 0x10305106; -+ cpu->isar.regs[ID_AA64ISAR0] = 0x00011120; -+ cpu->isar.regs[ID_AA64MMFR0] = 0x00001124; -+ cpu->isar.regs[DBGDIDR] = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ -@@ -275,10 +275,10 @@ static void aarch64_kunpeng_920_initfn(Object *obj) - - cpu->midr = 0x480fd010; - cpu->ctr = 0x84448004; -- cpu->isar.id_aa64pfr0 = 0x11001111; -- cpu->isar.id_aa64dfr0 = 0x110305408; -- cpu->isar.id_aa64isar0 = 0x10211120; -- cpu->isar.id_aa64mmfr0 = 0x101125; -+ cpu->isar.regs[ID_AA64PFR0] = 0x11001111; -+ cpu->isar.regs[ID_AA64DFR0] = 0x110305408; -+ cpu->isar.regs[ID_AA64ISAR0] = 0x10211120; -+ cpu->isar.regs[ID_AA64MMFR0] = 0x101125; - } - - static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, -@@ -321,7 +321,7 @@ static void aarch64_max_initfn(Object *obj) - uint32_t u; - aarch64_a57_initfn(obj); - -- t = cpu->isar.id_aa64isar0; -+ t = cpu->isar.regs[ID_AA64ISAR0]; - t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */ - t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */ -@@ -335,9 +335,9 @@ static void aarch64_max_initfn(Object *obj) - t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* v8.5-CondM */ - t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); -- cpu->isar.id_aa64isar0 = t; -+ cpu->isar.regs[ID_AA64ISAR0] = t; - -- t = cpu->isar.id_aa64isar1; -+ t = cpu->isar.regs[ID_AA64ISAR1]; - t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */ -@@ -347,45 +347,45 @@ static void aarch64_max_initfn(Object *obj) - t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); -- cpu->isar.id_aa64isar1 = t; -+ cpu->isar.regs[ID_AA64ISAR1] = t; - -- t = cpu->isar.id_aa64pfr0; -+ t = cpu->isar.regs[ID_AA64PFR0]; - t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); - t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); - t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); -- cpu->isar.id_aa64pfr0 = t; -+ cpu->isar.regs[ID_AA64PFR0] = t; - -- t = cpu->isar.id_aa64pfr1; -+ t = cpu->isar.regs[ID_AA64PFR1]; - t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); -- cpu->isar.id_aa64pfr1 = t; -+ cpu->isar.regs[ID_AA64PFR1] = t; - -- t = cpu->isar.id_aa64mmfr1; -+ t = cpu->isar.regs[ID_AA64MMFR1]; - t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* HPD */ - t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); - t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */ -- cpu->isar.id_aa64mmfr1 = t; -+ cpu->isar.regs[ID_AA64MMFR1] = t; - - /* Replicate the same data to the 32-bit id registers. */ -- u = cpu->isar.id_isar5; -+ u = cpu->isar.regs[ID_ISAR5]; - u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */ - u = FIELD_DP32(u, ID_ISAR5, SHA1, 1); - u = FIELD_DP32(u, ID_ISAR5, SHA2, 1); - u = FIELD_DP32(u, ID_ISAR5, CRC32, 1); - u = FIELD_DP32(u, ID_ISAR5, RDM, 1); - u = FIELD_DP32(u, ID_ISAR5, VCMA, 1); -- cpu->isar.id_isar5 = u; -+ cpu->isar.regs[ID_ISAR5] = u; - -- u = cpu->isar.id_isar6; -+ u = cpu->isar.regs[ID_ISAR6]; - u = FIELD_DP32(u, ID_ISAR6, JSCVT, 1); - u = FIELD_DP32(u, ID_ISAR6, DP, 1); - u = FIELD_DP32(u, ID_ISAR6, FHM, 1); - u = FIELD_DP32(u, ID_ISAR6, SB, 1); - u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); -- cpu->isar.id_isar6 = u; -+ cpu->isar.regs[ID_ISAR6] = u; - -- u = cpu->isar.id_mmfr3; -+ u = cpu->isar.regs[ID_MMFR3]; - u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ -- cpu->isar.id_mmfr3 = u; -+ cpu->isar.regs[ID_MMFR3] = u; - - /* - * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 49cd7a7e..459af431 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -5597,7 +5597,7 @@ static void define_debug_regs(ARMCPU *cpu) - ARMCPRegInfo dbgdidr = { - .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL0_R, .accessfn = access_tda, -- .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr, -+ .type = ARM_CP_CONST, .resetvalue = cpu->isar.regs[DBGDIDR], - }; - - /* Note that all these register fields hold "number of Xs minus 1". */ -@@ -5672,7 +5672,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) - static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) - { - ARMCPU *cpu = env_archcpu(env); -- uint64_t pfr0 = cpu->isar.id_aa64pfr0; -+ uint64_t pfr0 = cpu->isar.regs[ID_AA64PFR0]; - - if (env->gicv3state) { - pfr0 |= 1 << 24; -@@ -5898,7 +5898,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_dfr0 }, -+ .resetvalue = cpu->isar.regs[ID_DFR0] }, - { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -5906,51 +5906,51 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_mmfr0 }, -+ .resetvalue = cpu->isar.regs[ID_MMFR0] }, - { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_mmfr1 }, -+ .resetvalue = cpu->isar.regs[ID_MMFR1] }, - { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_mmfr2 }, -+ .resetvalue = cpu->isar.regs[ID_MMFR2] }, - { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_mmfr3 }, -+ .resetvalue = cpu->isar.regs[ID_MMFR3] }, - { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_isar0 }, -+ .resetvalue = cpu->isar.regs[ID_ISAR0] }, - { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_isar1 }, -+ .resetvalue = cpu->isar.regs[ID_ISAR1] }, - { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_isar2 }, -+ .resetvalue = cpu->isar.regs[ID_ISAR2] }, - { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_isar3 }, -+ .resetvalue = cpu->isar.regs[ID_ISAR3] }, - { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_isar4 }, -+ .resetvalue = cpu->isar.regs[ID_ISAR4] }, - { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_isar5 }, -+ .resetvalue = cpu->isar.regs[ID_ISAR5] }, - { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_mmfr4 }, -+ .resetvalue = cpu->isar.regs[ID_MMFR4] }, - { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_isar6 }, -+ .resetvalue = cpu->isar.regs[ID_ISAR6] }, - REGINFO_SENTINEL - }; - define_arm_cp_regs(cpu, v6_idregs); -@@ -6074,7 +6074,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64pfr1}, -+ .resetvalue = cpu->isar.regs[ID_AA64PFR1]}, - { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -6103,11 +6103,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64dfr0 }, -+ .resetvalue = cpu->isar.regs[ID_AA64DFR0] }, - { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64dfr1 }, -+ .resetvalue = cpu->isar.regs[ID_AA64DFR1] }, - { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -6135,11 +6135,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64isar0 }, -+ .resetvalue = cpu->isar.regs[ID_AA64ISAR0] }, - { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64isar1 }, -+ .resetvalue = cpu->isar.regs[ID_AA64ISAR1] }, - { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -6167,15 +6167,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64mmfr0 }, -+ .resetvalue = cpu->isar.regs[ID_AA64MMFR0] }, - { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64mmfr1 }, -+ .resetvalue = cpu->isar.regs[ID_AA64MMFR1] }, - { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.id_aa64mmfr2 }, -+ .resetvalue = cpu->isar.regs[ID_AA64MMFR2] }, - { .name = "ID_AA64MMFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -6199,15 +6199,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) - { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.mvfr0 }, -+ .resetvalue = cpu->isar.regs[MVFR0] }, - { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.mvfr1 }, -+ .resetvalue = cpu->isar.regs[MVFR1] }, - { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, - .access = PL1_R, .type = ARM_CP_CONST, -- .resetvalue = cpu->isar.mvfr2 }, -+ .resetvalue = cpu->isar.regs[MVFR2] }, - { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3, - .access = PL1_R, .type = ARM_CP_CONST, -@@ -6426,7 +6426,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) - define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo); - define_arm_cp_regs(cpu, vmsa_cp_reginfo); - /* TTCBR2 is introduced with ARMv8.2-A32HPD. */ -- if (FIELD_EX32(cpu->isar.id_mmfr4, ID_MMFR4, HPDS) != 0) { -+ if (FIELD_EX32(cpu->isar.regs[ID_MMFR4], ID_MMFR4, HPDS) != 0) { - define_one_arm_cp_reg(cpu, &ttbcr2_reginfo); - } - } -diff --git a/target/arm/internals.h b/target/arm/internals.h -index 1d01ecc4..2da13ba8 100644 ---- a/target/arm/internals.h -+++ b/target/arm/internals.h -@@ -237,7 +237,7 @@ static inline unsigned int arm_pamax(ARMCPU *cpu) - [5] = 48, - }; - unsigned int parange = -- FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); -+ FIELD_EX64(cpu->isar.regs[ID_AA64MMFR0], ID_AA64MMFR0, PARANGE); - - /* id_aa64mmfr0 is a read-only register so values outside of the - * supported mappings can be considered an implementation error. */ -@@ -865,9 +865,9 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) - static inline int arm_num_brps(ARMCPU *cpu) - { - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; -+ return FIELD_EX64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, BRPS) + 1; - } else { -- return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; -+ return FIELD_EX32(cpu->isar.regs[DBGDIDR], DBGDIDR, BRPS) + 1; - } - } - -@@ -879,9 +879,9 @@ static inline int arm_num_brps(ARMCPU *cpu) - static inline int arm_num_wrps(ARMCPU *cpu) - { - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; -+ return FIELD_EX64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, WRPS) + 1; - } else { -- return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; -+ return FIELD_EX32(cpu->isar.regs[DBGDIDR], DBGDIDR, WRPS) + 1; - } - } - -@@ -893,9 +893,10 @@ static inline int arm_num_wrps(ARMCPU *cpu) - static inline int arm_num_ctx_cmps(ARMCPU *cpu) - { - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { -- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; -+ return FIELD_EX64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, -+ CTX_CMPS) + 1; - } else { -- return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; -+ return FIELD_EX32(cpu->isar.regs[DBGDIDR], DBGDIDR, CTX_CMPS) + 1; - } - } - -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 2a88b8df..06cf31e8 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -455,7 +455,7 @@ static inline void unset_feature(uint64_t *features, int feature) - *features &= ~(1ULL << feature); - } - --static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) -+static int read_sys_reg32(int fd, uint64_t *pret, uint64_t id) - { - uint64_t ret; - struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; -@@ -509,7 +509,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - ahcf->target = init.target; - ahcf->dtb_compatible = "arm,arm-v8"; - -- err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, -+ err = read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64PFR0], - ARM64_SYS_REG(3, 0, 0, 4, 0)); - if (unlikely(err < 0)) { - /* -@@ -528,24 +528,24 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - * ??? Either of these sounds like too much effort just - * to work around running a modern host kernel. - */ -- ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ -+ ahcf->isar.regs[ID_AA64PFR0] = 0x00000011; /* EL1&0, AArch64 only */ - err = 0; - } else { -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64PFR1], - ARM64_SYS_REG(3, 0, 0, 4, 1)); -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64DFR0], - ARM64_SYS_REG(3, 0, 0, 5, 0)); -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64DFR1], - ARM64_SYS_REG(3, 0, 0, 5, 1)); -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64ISAR0], - ARM64_SYS_REG(3, 0, 0, 6, 0)); -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64ISAR1], - ARM64_SYS_REG(3, 0, 0, 6, 1)); -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64MMFR0], - ARM64_SYS_REG(3, 0, 0, 7, 0)); -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64MMFR1], - ARM64_SYS_REG(3, 0, 0, 7, 1)); -- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, -+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64MMFR2], - ARM64_SYS_REG(3, 0, 0, 7, 2)); - - /* -@@ -555,38 +555,38 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - * than skipping the reads and leaving 0, as we must avoid - * considering the values in every case. - */ -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_DFR0], - ARM64_SYS_REG(3, 0, 0, 1, 2)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR0], - ARM64_SYS_REG(3, 0, 0, 1, 4)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR1], - ARM64_SYS_REG(3, 0, 0, 1, 5)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR2], - ARM64_SYS_REG(3, 0, 0, 1, 6)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR3], - ARM64_SYS_REG(3, 0, 0, 1, 7)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR0], - ARM64_SYS_REG(3, 0, 0, 2, 0)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR1], - ARM64_SYS_REG(3, 0, 0, 2, 1)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR2], - ARM64_SYS_REG(3, 0, 0, 2, 2)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR3], - ARM64_SYS_REG(3, 0, 0, 2, 3)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR4], - ARM64_SYS_REG(3, 0, 0, 2, 4)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR5], - ARM64_SYS_REG(3, 0, 0, 2, 5)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR4], - ARM64_SYS_REG(3, 0, 0, 2, 6)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR6], - ARM64_SYS_REG(3, 0, 0, 2, 7)); - -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[MVFR0], - ARM64_SYS_REG(3, 0, 0, 3, 0)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[MVFR1], - ARM64_SYS_REG(3, 0, 0, 3, 1)); -- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, -+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[MVFR2], - ARM64_SYS_REG(3, 0, 0, 3, 2)); - - /* -@@ -599,14 +599,16 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. - * We only do this if the CPU supports AArch32 at EL1. - */ -- if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { -- int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); -- int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); -+ if (FIELD_EX32(ahcf->isar.regs[ID_AA64PFR0], ID_AA64PFR0, EL1) >= 2) { -+ int wrps = FIELD_EX64(ahcf->isar.regs[ID_AA64DFR0], -+ ID_AA64DFR0, WRPS); -+ int brps = FIELD_EX64(ahcf->isar.regs[ID_AA64DFR0], -+ ID_AA64DFR0, BRPS); - int ctx_cmps = -- FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); -+ FIELD_EX64(ahcf->isar.regs[ID_AA64DFR0], ID_AA64DFR0, CTX_CMPS); - int version = 6; /* ARMv8 debug architecture */ - bool has_el3 = -- !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); -+ !!FIELD_EX32(ahcf->isar.regs[ID_AA64PFR0], ID_AA64PFR0, EL3); - uint32_t dbgdidr = 0; - - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); -@@ -616,7 +618,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); - dbgdidr |= (1 << 15); /* RES1 bit */ -- ahcf->isar.dbgdidr = dbgdidr; -+ ahcf->isar.regs[DBGDIDR] = dbgdidr; - } - } - --- -2.25.1 - diff --git a/target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch deleted file mode 100644 index 41c67cf1b8024af9f48888ecd782e9927a2f166e..0000000000000000000000000000000000000000 --- a/target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 860035652c7866b033762f6d90f81d5ddedf855c Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 22 Apr 2020 17:08:43 +0800 -Subject: [PATCH] target/arm/cpu: Add the kvm-no-adjvtime CPU property - -kvm-no-adjvtime is a KVM specific CPU property and a first of its -kind. To accommodate it we also add kvm_arm_add_vcpu_properties() -and a KVM specific CPU properties description to the CPU features -document. - -Signed-off-by: Andrew Jones -Message-id: 20200120101023.16030-7-drjones@redhat.com -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e9a2a959..cfda6cc5 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1748,6 +1748,11 @@ static void machvirt_init(MachineState *machine) - } - } - -+ if (vmc->kvm_no_adjvtime && -+ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { -+ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); -+ } -+ - if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { - object_property_set_bool(cpuobj, false, "pmu", NULL); - } -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 43a6ce91..a9d6977a 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -107,6 +107,7 @@ typedef struct { - bool claim_edge_triggered_timers; - bool smbios_old_sys_ver; - bool no_highmem_ecam; -+ bool kvm_no_adjvtime; - } VirtMachineClass; - - typedef struct { -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index bc3da9a3..39bbe7e2 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2441,6 +2441,7 @@ static void arm_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - cortex_a15_initfn(obj); - -@@ -2629,6 +2630,7 @@ static void arm_host_initfn(Object *obj) - ARMCPU *cpu = ARM_CPU(obj); - - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - arm_cpu_post_init(obj); - } - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index dbf44b92..b30ca7c9 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -312,6 +312,7 @@ static void aarch64_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - uint64_t t; - uint32_t u; -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 21fb7ecd..327b3bc3 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -16,6 +16,8 @@ - #include "qemu-common.h" - #include "qemu/timer.h" - #include "qemu/error-report.h" -+#include "qom/object.h" -+#include "qapi/error.h" - #include "sysemu/sysemu.h" - #include "sysemu/kvm.h" - #include "sysemu/kvm_int.h" -@@ -162,6 +164,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - env->features = arm_host_cpu_features.features; - } - -+static bool kvm_no_adjvtime_get(Object *obj, Error **errp) -+{ -+ return !ARM_CPU(obj)->kvm_adjvtime; -+} -+ -+static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) -+{ -+ ARM_CPU(obj)->kvm_adjvtime = !value; -+} -+ -+/* KVM VCPU properties should be prefixed with "kvm-". */ -+void kvm_arm_add_vcpu_properties(Object *obj) -+{ -+ if (!kvm_enabled()) { -+ return; -+ } -+ -+ ARM_CPU(obj)->kvm_adjvtime = true; -+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, -+ kvm_no_adjvtime_set, &error_abort); -+ object_property_set_description(obj, "kvm-no-adjvtime", -+ "Set on to disable the adjustment of " -+ "the virtual counter. VM stopped time " -+ "will be counted.", &error_abort); -+} -+ - int kvm_arm_get_max_vm_ipa_size(MachineState *ms) - { - KVMState *s = KVM_STATE(ms->accelerator); -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 97560d4e..0de5f83e 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -230,6 +230,15 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); - */ - void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - -+/** -+ * kvm_arm_add_vcpu_properties: -+ * @obj: The CPU object to add the properties to -+ * -+ * Add all KVM specific CPU properties to the CPU object. These -+ * are the CPU properties with "kvm-" prefixed names. -+ */ -+void kvm_arm_add_vcpu_properties(Object *obj); -+ - /** - * kvm_arm_get_max_vm_ipa_size: - * @ms: Machine state handle -@@ -294,6 +303,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - cpu->host_cpu_probe_failed = true; - } - -+static inline void kvm_arm_add_vcpu_properties(Object *obj) {} -+ - static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms) - { - return -ENOENT; --- -2.23.0 diff --git a/target-arm-cpu-Inform-about-reading-confidential-CPU.patch b/target-arm-cpu-Inform-about-reading-confidential-CPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..772775ed6862c7cdec1de37d073c66eb11f2e699 --- /dev/null +++ b/target-arm-cpu-Inform-about-reading-confidential-CPU.patch @@ -0,0 +1,37 @@ +From 21bfc55d5d2580bcf61e174c95cd3fe27c608b27 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Tue, 7 Feb 2023 13:05:40 +0000 +Subject: [PATCH] target/arm/cpu: Inform about reading confidential CPU + registers + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/f7dbc9b0e0677feabac408bed8fb9fcbd9b946c3 + +The host cannot access registers of a Realm. Instead of showing all +registers as zero in "info registers", display a message about this +restriction. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + target/arm/cpu.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 09d391bd34..3de2e1a3c3 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1082,6 +1082,11 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) + const char *ns_status; + bool sve; + ++ if (cpu->kvm_rme) { ++ qemu_fprintf(f, "the CPU registers are confidential to the realm\n"); ++ return; ++ } ++ + qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc); + for (i = 0; i < 32; i++) { + if (i == 31) { +-- +2.33.0 + diff --git a/target-arm-cpu-Set-number-of-PMU-counters-in-KVM.patch b/target-arm-cpu-Set-number-of-PMU-counters-in-KVM.patch new file mode 100644 index 0000000000000000000000000000000000000000..8db2bbaffb192922c6dfec833bad88b39810be86 --- /dev/null +++ b/target-arm-cpu-Set-number-of-PMU-counters-in-KVM.patch @@ -0,0 +1,193 @@ +From 4febb6917e0e09279c86ce1679566bb9bc63b0df Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Thu, 7 Dec 2023 17:32:13 +0000 +Subject: [PATCH] target/arm/cpu: Set number of PMU counters in KVM + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/22f6eef79582fc88a779bc5baa502bcd6e592f8f + +Add a "num-pmu-counters" CPU parameter to configure the number of +counters that KVM presents to the guest. This is needed for Realm VMs, +whose parameters include the number of PMU counters and influence the +Realm Initial Measurement. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/arm-qmp-cmds.c + target/arm/kvm.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + target/arm/arm-qmp-cmds.c | 2 +- + target/arm/cpu.h | 3 +++ + target/arm/cpu64.c | 41 +++++++++++++++++++++++++++++++++++++++ + target/arm/kvm.c | 32 ++++++++++++++++++++++++++++++ + target/arm/kvm64.c | 2 +- + target/arm/kvm_arm.h | 1 + + 6 files changed, 79 insertions(+), 2 deletions(-) + +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index 98b3498428..d201d319bd 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -96,7 +96,7 @@ static const char *cpu_model_advertised_features[] = { + "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", + "kvm-no-adjvtime", "kvm-steal-time", + "pauth", "pauth-impdef", "pauth-qarma3", +- "num-breakpoints", "num-watchpoints", ++ "num-breakpoints", "num-watchpoints", "num-pmu-counters", + NULL + }; + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 223d8abd8a..cb546a93e2 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1128,6 +1128,7 @@ struct ArchCPU { + /* Allows to override the default configuration */ + uint8_t num_bps; + uint8_t num_wps; ++ int8_t num_pmu_ctrs; + }; + + typedef struct ARMCPUInfo { +@@ -2477,6 +2478,8 @@ FIELD(MFAR, FPA, 12, 40) + FIELD(MFAR, NSE, 62, 1) + FIELD(MFAR, NS, 63, 1) + ++FIELD(PMCR, N, 11, 5) ++ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); + + /* If adding a feature bit which corresponds to a Linux ELF +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index c0edffb679..4cf8446b6e 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -643,12 +643,53 @@ static void arm_cpu_set_num_bps(Object *obj, Visitor *v, const char *name, + cpu->num_bps = val; + } + ++static void arm_cpu_get_num_pmu_ctrs(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ uint8_t val; ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ if (cpu->num_pmu_ctrs == -1) { ++ val = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); ++ } else { ++ val = cpu->num_pmu_ctrs; ++ } ++ ++ visit_type_uint8(v, name, &val, errp); ++} ++ ++static void arm_cpu_set_num_pmu_ctrs(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ uint8_t val; ++ ARMCPU *cpu = ARM_CPU(obj); ++ uint8_t max_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); ++ ++ if (!visit_type_uint8(v, name, &val, errp)) { ++ return; ++ } ++ ++ if (val > max_ctrs) { ++ error_setg(errp, "invalid number of PMU counters"); ++ return; ++ } ++ ++ cpu->num_pmu_ctrs = val; ++} ++ + static void aarch64_add_kvm_writable_properties(Object *obj) + { ++ ARMCPU *cpu = ARM_CPU(obj); ++ + object_property_add(obj, "num-breakpoints", "uint8", arm_cpu_get_num_bps, + arm_cpu_set_num_bps, NULL, NULL); + object_property_add(obj, "num-watchpoints", "uint8", arm_cpu_get_num_wps, + arm_cpu_set_num_wps, NULL, NULL); ++ ++ cpu->num_pmu_ctrs = -1; ++ object_property_add(obj, "num-pmu-counters", "uint8", ++ arm_cpu_get_num_pmu_ctrs, arm_cpu_set_num_pmu_ctrs, ++ NULL, NULL); + } + #endif /* CONFIG_KVM */ + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index bf17da37e5..f45783a9da 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -724,9 +724,41 @@ static void kvm_arm_configure_aa64dfr0(ARMCPU *cpu) + } + } + ++static void kvm_arm_configure_pmcr(ARMCPU *cpu) ++{ ++ int ret; ++ uint64_t val, newval; ++ CPUState *cs = CPU(cpu); ++ ++ if (cpu->num_pmu_ctrs == -1) { ++ return; ++ } ++ ++ newval = FIELD_DP64(cpu->isar.reset_pmcr_el0, PMCR, N, cpu->num_pmu_ctrs); ++ ret = kvm_set_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &newval); ++ if (ret) { ++ error_report("Failed to set KVM_REG_ARM_PMCR_EL0"); ++ return; ++ } ++ ++ /* ++ * Check if the write succeeded, since older versions of KVM ignore it. ++ */ ++ ret = kvm_get_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &val); ++ if (ret) { ++ error_report("Failed to get KVM_REG_ARM_PMCR_EL0"); ++ return; ++ } ++ ++ if (val != newval) { ++ error_report("Failed to update KVM_REG_ARM_PMCR_EL0"); ++ } ++} ++ + static void kvm_arm_configure_vcpu_regs(ARMCPU *cpu) + { + kvm_arm_configure_aa64dfr0(cpu); ++ kvm_arm_configure_pmcr(cpu); + } + + void kvm_arm_reset_vcpu(ARMCPU *cpu) +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index e84bc9f94d..6a8aad0f06 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -438,7 +438,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + if (pmu_supported) { + /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ + err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, +- ARM64_SYS_REG(3, 3, 9, 12, 0)); ++ KVM_REG_ARM_PMCR_EL0); + } + + if (sve_supported) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 63b5d9affd..4a9707a435 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -19,6 +19,7 @@ + #define KVM_ARM_VGIC_V3 (1 << 1) + + #define KVM_REG_ARM_ID_AA64DFR0_EL1 ARM64_SYS_REG(3, 0, 0, 5, 0) ++#define KVM_REG_ARM_PMCR_EL0 ARM64_SYS_REG(3, 3, 9, 12, 0) + + /** + * kvm_arm_init_debug() - initialize guest debug capabilities +-- +2.33.0 + diff --git a/target-arm-cpu-Set-number-of-breakpoints-and-watchpo.patch b/target-arm-cpu-Set-number-of-breakpoints-and-watchpo.patch new file mode 100644 index 0000000000000000000000000000000000000000..60184a27ac70c0e6a3e1bd011d54078eae56ed5f --- /dev/null +++ b/target-arm-cpu-Set-number-of-breakpoints-and-watchpo.patch @@ -0,0 +1,253 @@ +From 3b881e82b73be727e783e1762084025233fba0cc Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 4 Dec 2023 18:48:19 +0000 +Subject: [PATCH] target/arm/cpu: Set number of breakpoints and watchpoints in + KVM + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/99082dee9c26b2b0f0f4d39bc9f6f99e73701e2f + +Add "num-breakpoints" and "num-watchpoints" CPU parameters to configure +the debug features that KVM presents to the guest. The KVM vCPU +configuration is modified by calling SET_ONE_REG on the ID register. + +This is needed for Realm VMs, whose parameters include breakpoints and +watchpoints, and influence the Realm Initial Measurement. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/arm-qmp-cmds.c + target/arm/kvm.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + target/arm/arm-qmp-cmds.c | 1 + + target/arm/cpu.h | 4 ++ + target/arm/cpu64.c | 77 +++++++++++++++++++++++++++++++++++++++ + target/arm/kvm.c | 54 +++++++++++++++++++++++++++ + target/arm/kvm64.c | 2 +- + target/arm/kvm_arm.h | 2 + + 6 files changed, 139 insertions(+), 1 deletion(-) + +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index b53d5efe13..98b3498428 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -96,6 +96,7 @@ static const char *cpu_model_advertised_features[] = { + "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", + "kvm-no-adjvtime", "kvm-steal-time", + "pauth", "pauth-impdef", "pauth-qarma3", ++ "num-breakpoints", "num-watchpoints", + NULL + }; + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 12305effd4..223d8abd8a 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1124,6 +1124,10 @@ struct ArchCPU { + + /* Generic timer counter frequency, in Hz */ + uint64_t gt_cntfrq_hz; ++ ++ /* Allows to override the default configuration */ ++ uint8_t num_bps; ++ uint8_t num_wps; + }; + + typedef struct ARMCPUInfo { +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 6eca55ac29..c0edffb679 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -576,6 +576,82 @@ void aarch64_add_pauth_properties(Object *obj) + } + } + ++#if defined(CONFIG_KVM) ++static void arm_cpu_get_num_wps(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ uint8_t val; ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ val = cpu->num_wps; ++ if (val == 0) { ++ val = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; ++ } ++ ++ visit_type_uint8(v, name, &val, errp); ++} ++ ++static void arm_cpu_set_num_wps(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ uint8_t val; ++ ARMCPU *cpu = ARM_CPU(obj); ++ uint8_t max_wps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; ++ ++ if (!visit_type_uint8(v, name, &val, errp)) { ++ return; ++ } ++ ++ if (val < 2 || val > max_wps) { ++ error_setg(errp, "invalid number of watchpoints"); ++ return; ++ } ++ ++ cpu->num_wps = val; ++} ++ ++static void arm_cpu_get_num_bps(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ uint8_t val; ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ val = cpu->num_bps; ++ if (val == 0) { ++ val = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; ++ } ++ ++ visit_type_uint8(v, name, &val, errp); ++} ++ ++static void arm_cpu_set_num_bps(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ uint8_t val; ++ ARMCPU *cpu = ARM_CPU(obj); ++ uint8_t max_bps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; ++ ++ if (!visit_type_uint8(v, name, &val, errp)) { ++ return; ++ } ++ ++ if (val < 2 || val > max_bps) { ++ error_setg(errp, "invalid number of breakpoints"); ++ return; ++ } ++ ++ cpu->num_bps = val; ++} ++ ++static void aarch64_add_kvm_writable_properties(Object *obj) ++{ ++ object_property_add(obj, "num-breakpoints", "uint8", arm_cpu_get_num_bps, ++ arm_cpu_set_num_bps, NULL, NULL); ++ object_property_add(obj, "num-watchpoints", "uint8", arm_cpu_get_num_wps, ++ arm_cpu_set_num_wps, NULL, NULL); ++} ++#endif /* CONFIG_KVM */ ++ + void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) + { + uint64_t t; +@@ -789,6 +865,7 @@ static void aarch64_host_initfn(Object *obj) + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + aarch64_add_sve_properties(obj); + aarch64_add_pauth_properties(obj); ++ aarch64_add_kvm_writable_properties(obj); + } + #elif defined(CONFIG_HVF) + ARMCPU *cpu = ARM_CPU(obj); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index cec95483f3..bf17da37e5 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -681,6 +681,54 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu) + } + } + ++static void kvm_arm_configure_aa64dfr0(ARMCPU *cpu) ++{ ++ int ret; ++ uint64_t val, newval; ++ CPUState *cs = CPU(cpu); ++ ++ if (!cpu->num_bps && !cpu->num_wps) { ++ return; ++ } ++ ++ newval = cpu->isar.id_aa64dfr0; ++ if (cpu->num_bps) { ++ uint64_t ctx_cmps = FIELD_EX64(newval, ID_AA64DFR0, CTX_CMPS); ++ ++ /* CTX_CMPs is never greater than BRPs */ ++ ctx_cmps = MIN(ctx_cmps, cpu->num_bps - 1); ++ newval = FIELD_DP64(newval, ID_AA64DFR0, BRPS, cpu->num_bps - 1); ++ newval = FIELD_DP64(newval, ID_AA64DFR0, CTX_CMPS, ctx_cmps); ++ } ++ if (cpu->num_wps) { ++ newval = FIELD_DP64(newval, ID_AA64DFR0, WRPS, cpu->num_wps - 1); ++ } ++ ret = kvm_set_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &newval); ++ if (ret) { ++ error_report("Failed to set KVM_REG_ARM_ID_AA64DFR0_EL1"); ++ return; ++ } ++ ++ /* ++ * Check if the write succeeded. KVM does offer the writable mask for this ++ * register, but this way we also check if the value we wrote was sane. ++ */ ++ ret = kvm_get_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &val); ++ if (ret) { ++ error_report("Failed to get KVM_REG_ARM_ID_AA64DFR0_EL1"); ++ return; ++ } ++ ++ if (val != newval) { ++ error_report("Failed to update KVM_REG_ARM_ID_AA64DFR0_EL1"); ++ } ++} ++ ++static void kvm_arm_configure_vcpu_regs(ARMCPU *cpu) ++{ ++ kvm_arm_configure_aa64dfr0(cpu); ++} ++ + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; +@@ -694,6 +742,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); + abort(); + } ++ ++ /* ++ * Before loading the KVM values into CPUState, update the KVM configuration ++ */ ++ kvm_arm_configure_vcpu_regs(cpu); ++ + if (!write_kvmstate_to_list(cpu)) { + fprintf(stderr, "write_kvmstate_to_list failed\n"); + abort(); +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index d314927027..e84bc9f94d 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -338,7 +338,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, + ARM64_SYS_REG(3, 0, 0, 4, 5)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, +- ARM64_SYS_REG(3, 0, 0, 5, 0)); ++ KVM_REG_ARM_ID_AA64DFR0_EL1); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, + ARM64_SYS_REG(3, 0, 0, 5, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 78ff8b7375..63b5d9affd 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -18,6 +18,8 @@ + #define KVM_ARM_VGIC_V2 (1 << 0) + #define KVM_ARM_VGIC_V3 (1 << 1) + ++#define KVM_REG_ARM_ID_AA64DFR0_EL1 ARM64_SYS_REG(3, 0, 0, 5, 0) ++ + /** + * kvm_arm_init_debug() - initialize guest debug capabilities + * @s: KVMState +-- +2.33.0 + diff --git a/target-arm-fix-exception-syndrome-for-AArch32-bkpt-i.patch b/target-arm-fix-exception-syndrome-for-AArch32-bkpt-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..3380c331d9d108ab9d165cf8177309cdd1933584 --- /dev/null +++ b/target-arm-fix-exception-syndrome-for-AArch32-bkpt-i.patch @@ -0,0 +1,91 @@ +From 3031ddd4dd45a706def011a9d6afdacd2557d147 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 28 Nov 2024 14:26:43 +0800 +Subject: [PATCH] target/arm: fix exception syndrome for AArch32 bkpt insn +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from f670be1aad33e801779af580398895b9455747ee + +Debug exceptions that target AArch32 Hyp mode are reported differently +than on AAarch64. Internally, Qemu uses the AArch64 syndromes. Therefore +such exceptions need to be either converted to a prefetch abort +(breakpoints, vector catch) or a data abort (watchpoints). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Jan Klötzke +Reviewed-by: Richard Henderson +Message-id: 20240127202758.3326381-1-jan.kloetzke@kernkonzept.com +Signed-off-by: Peter Maydell +Signed-off-by: gubin +--- + target/arm/helper.c | 18 ++++++++++++++++++ + target/arm/syndrome.h | 8 ++++++++ + 2 files changed, 26 insertions(+) + +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 793aa89cc6..35b8eaf15a 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -10848,6 +10848,24 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) + } + + if (env->exception.target_el == 2) { ++ /* Debug exceptions are reported differently on AArch32 */ ++ switch (syn_get_ec(env->exception.syndrome)) { ++ case EC_BREAKPOINT: ++ case EC_BREAKPOINT_SAME_EL: ++ case EC_AA32_BKPT: ++ case EC_VECTORCATCH: ++ env->exception.syndrome = syn_insn_abort(arm_current_el(env) == 2, ++ 0, 0, 0x22); ++ break; ++ case EC_WATCHPOINT: ++ env->exception.syndrome = syn_set_ec(env->exception.syndrome, ++ EC_DATAABORT); ++ break; ++ case EC_WATCHPOINT_SAME_EL: ++ env->exception.syndrome = syn_set_ec(env->exception.syndrome, ++ EC_DATAABORT_SAME_EL); ++ break; ++ } + arm_cpu_do_interrupt_aarch32_hyp(cs); + return; + } +diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h +index 95454b5b3b..eccb759da6 100644 +--- a/target/arm/syndrome.h ++++ b/target/arm/syndrome.h +@@ -25,6 +25,8 @@ + #ifndef TARGET_ARM_SYNDROME_H + #define TARGET_ARM_SYNDROME_H + ++#include "qemu/bitops.h" ++ + /* Valid Syndrome Register EC field values */ + enum arm_exception_class { + EC_UNCATEGORIZED = 0x00, +@@ -80,6 +82,7 @@ typedef enum { + SME_ET_InactiveZA, + } SMEExceptionType; + ++#define ARM_EL_EC_LENGTH 6 + #define ARM_EL_EC_SHIFT 26 + #define ARM_EL_IL_SHIFT 25 + #define ARM_EL_ISV_SHIFT 24 +@@ -91,6 +94,11 @@ static inline uint32_t syn_get_ec(uint32_t syn) + return syn >> ARM_EL_EC_SHIFT; + } + ++static inline uint32_t syn_set_ec(uint32_t syn, uint32_t ec) ++{ ++ return deposit32(syn, ARM_EL_EC_SHIFT, ARM_EL_EC_LENGTH, ec); ++} ++ + /* + * Utility functions for constructing various kinds of syndrome value. + * Note that in general we follow the AArch64 syndrome values; in a +-- +2.41.0.windows.1 + diff --git a/target-arm-fix-qemu-arm-target-build-error.patch b/target-arm-fix-qemu-arm-target-build-error.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc848b701d4b8479db68a14700adfad076d9e4b6 --- /dev/null +++ b/target-arm-fix-qemu-arm-target-build-error.patch @@ -0,0 +1,41 @@ +From d9940c5d6b3b7ad1173a16c58246196a03b3d317 Mon Sep 17 00:00:00 2001 +From: huangyan +Date: Fri, 4 Jul 2025 00:31:45 +0800 +Subject: [PATCH] target-arm: fix qemu-arm target build error * handle PSCI + calls in qemu-arm + +this change the same as upstream: +98128601ac8ff23df8a4c48acff00f9614613463: +* target-arm: add emulation of PSCI calls for system emulation + +Ported-by: huangyan huangyan@cdjrlc.com +Original-author: wangziliang wangziliang@kylinos.cn +--- + target/arm/internals.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 20b9c1da38..a02a98d72a 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -314,10 +314,17 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len); + /* Callback function for when a watchpoint or breakpoint triggers. */ + void arm_debug_excp_handler(CPUState *cs); + ++#ifdef CONFIG_USER_ONLY ++static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) ++{ ++ return false; ++} ++#else + /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ + bool arm_is_psci_call(ARMCPU *cpu, int excp_type); + /* Actually handle a PSCI call */ + void arm_handle_psci_call(ARMCPU *cpu); ++#endif + + /** + * arm_clear_exclusive: clear the exclusive monitor +-- +2.33.0 + diff --git a/target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch b/target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch deleted file mode 100644 index cf9bb73b8f5bf63c5e073042ca137266fd28e894..0000000000000000000000000000000000000000 --- a/target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch +++ /dev/null @@ -1,66 +0,0 @@ -From dfedc889fafd35efd4f8382b7672bf0e556f9f45 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Mon, 7 Sep 2020 14:07:07 +0800 -Subject: [PATCH] target/arm: ignore evtstrm and cpuid CPU features - -evtstrm and cpuid cann't be controlled by VMM: -1. evtstrm: The generic timer is configured to generate events at a - frequency of approximately 100KHz. It's controlled by the linux - kernel config CONFIG_ARM_ARCH_TIMER_EVTSTREAM. -2. cpuid: EL0 access to certain ID registers is available. It's always - set by linux kernel after 77c97b4ee2129 ("arm64: cpufeature: Expose - CPUID registers by emulation"). -However, they are exposed by getauxval() and /proc/cpuinfo. Hence, -let's report and ignore the CPU features if someone set them. - -Signed-off-by: Peng Liang ---- - target/arm/cpu64.c | 29 ++++++++++++++++++++++++++++- - 1 file changed, 28 insertions(+), 1 deletion(-) - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 7de20848..726d123d 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -506,10 +506,37 @@ static void arm_cpu_parse_featurestr(const char *typename, char *features, - } - } - -+static const char *unconfigurable_feats[] = { -+ "evtstrm", -+ "cpuid", -+ NULL -+}; -+ -+static bool is_configurable_feat(const char *name) -+{ -+ int i; -+ -+ for (i = 0; unconfigurable_feats[i]; ++i) { -+ if (g_strcmp0(unconfigurable_feats[i], name) == 0) { -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static void - cpu_add_feat_as_prop(const char *typename, const char *name, const char *val) - { -- GlobalProperty *prop = g_new0(typeof(*prop), 1); -+ GlobalProperty *prop; -+ -+ if (!is_configurable_feat(name)) { -+ info_report("CPU feature '%s' is not configurable by QEMU. Ignore it.", -+ name); -+ return; -+ } -+ -+ prop = g_new0(typeof(*prop), 1); - prop->driver = typename; - prop->property = g_strdup(name); - prop->value = g_strdup(val); --- -2.28.0 - diff --git a/target-arm-introduce-CPU-feature-dependency-mechanis.patch b/target-arm-introduce-CPU-feature-dependency-mechanis.patch deleted file mode 100644 index 8c47cba243d4890cfc205c6c9b5b04b37705664f..0000000000000000000000000000000000000000 --- a/target-arm-introduce-CPU-feature-dependency-mechanis.patch +++ /dev/null @@ -1,184 +0,0 @@ -From da538bb9d1acc22543a2b7b07ae35a62386bf226 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Thu, 6 Aug 2020 16:14:46 +0800 -Subject: [PATCH 5/9] target/arm: introduce CPU feature dependency mechanism - -Some CPU features are dependent on other CPU features. For example, -ID_AA64PFR0_EL1.FP field and ID_AA64PFR0_EL1.AdvSIMD must have the same -value, which means FP and ADVSIMD are dependent on each other, FPHP and -ADVSIMDHP are dependent on each other. - -This commit introduces a mechanism for CPU feature dependency in -AArch64. We build a directed graph from the CPU feature dependency -relationship, each edge from->to means the `to` CPU feature is dependent -on the `from` CPU feature. And we will automatically enable/disable CPU -feature according to the directed graph. - -For example, a, b, and c CPU features are in relationship a->b->c, which -means c is dependent on b and b is dependent on a. If c is enabled by -user, then a and b is enabled automatically. And if a is disabled by -user, then b and c is disabled automatically. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/cpu.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 129 insertions(+) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 3f63312c..d5576538 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1306,6 +1306,103 @@ static struct CPUFeatureInfo cpu_features[] = { - }, - }; - -+typedef struct CPUFeatureDep { -+ CPUFeatureInfo from, to; -+} CPUFeatureDep; -+ -+static const CPUFeatureDep feature_dependencies[] = { -+ { -+ .from = FIELD_INFO("fp", ID_AA64PFR0, FP, true, 0, 0xf, false), -+ .to = FIELD_INFO("asimd", ID_AA64PFR0, ADVSIMD, true, 0, 0xf, false), -+ }, -+ { -+ .from = FIELD_INFO("asimd", ID_AA64PFR0, ADVSIMD, true, 0, 0xf, false), -+ .to = FIELD_INFO("fp", ID_AA64PFR0, FP, true, 0, 0xf, false), -+ }, -+ { -+ .from = { -+ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_FP_LENGTH, -+ .shift = R_ID_AA64PFR0_FP_SHIFT, .sign = true, .min_value = 1, -+ .ni_value = 0, .name = "fphp", .is_32bit = false, -+ }, -+ .to = { -+ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_ADVSIMD_LENGTH, -+ .shift = R_ID_AA64PFR0_ADVSIMD_SHIFT, .sign = true, .min_value = 1, -+ .ni_value = 0, .name = "asimdhp", .is_32bit = false, -+ }, -+ }, -+ { -+ .from = { -+ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_ADVSIMD_LENGTH, -+ .shift = R_ID_AA64PFR0_ADVSIMD_SHIFT, .sign = true, .min_value = 1, -+ .ni_value = 0, .name = "asimdhp", .is_32bit = false, -+ }, -+ .to = { -+ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_FP_LENGTH, -+ .shift = R_ID_AA64PFR0_FP_SHIFT, .sign = true, .min_value = 1, -+ .ni_value = 0, .name = "fphp", .is_32bit = false, -+ }, -+ }, -+ { -+ -+ .from = FIELD_INFO("aes", ID_AA64ISAR0, AES, false, 1, 0, false), -+ .to = { -+ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_AES_LENGTH, -+ .shift = R_ID_AA64ISAR0_AES_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "pmull", .is_32bit = false, -+ }, -+ }, -+ { -+ -+ .from = FIELD_INFO("sha2", ID_AA64ISAR0, SHA2, false, 1, 0, false), -+ .to = { -+ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, -+ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "sha512", .is_32bit = false, -+ }, -+ }, -+ { -+ .from = FIELD_INFO("lrcpc", ID_AA64ISAR1, LRCPC, false, 1, 0, false), -+ .to = { -+ .reg = ID_AA64ISAR1, .length = R_ID_AA64ISAR1_LRCPC_LENGTH, -+ .shift = R_ID_AA64ISAR1_LRCPC_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "ilrcpc", .is_32bit = false, -+ }, -+ }, -+ { -+ .from = FIELD_INFO("sm3", ID_AA64ISAR0, SM3, false, 1, 0, false), -+ .to = FIELD_INFO("sm4", ID_AA64ISAR0, SM4, false, 1, 0, false), -+ }, -+ { -+ .from = FIELD_INFO("sm4", ID_AA64ISAR0, SM4, false, 1, 0, false), -+ .to = FIELD_INFO("sm3", ID_AA64ISAR0, SM3, false, 1, 0, false), -+ }, -+ { -+ .from = FIELD_INFO("sha1", ID_AA64ISAR0, SHA1, false, 1, 0, false), -+ .to = FIELD_INFO("sha2", ID_AA64ISAR0, SHA2, false, 1, 0, false), -+ }, -+ { -+ .from = FIELD_INFO("sha1", ID_AA64ISAR0, SHA1, false, 1, 0, false), -+ .to = FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), -+ }, -+ { -+ .from = FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), -+ .to = { -+ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, -+ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "sha512", .is_32bit = false, -+ }, -+ }, -+ { -+ .from = { -+ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, -+ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "sha512", .is_32bit = false, -+ }, -+ .to = FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), -+ }, -+}; -+ - static void arm_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) - { -@@ -1342,13 +1439,45 @@ static void arm_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, - } - - if (value) { -+ if (object_property_get_bool(obj, feat->name, NULL)) { -+ return; -+ } - isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], - feat->shift, feat->length, - feat->min_value); -+ /* Auto enable the features which current feature is dependent on. */ -+ for (int i = 0; i < ARRAY_SIZE(feature_dependencies); ++i) { -+ const CPUFeatureDep *d = &feature_dependencies[i]; -+ if (strcmp(d->to.name, feat->name) != 0) { -+ continue; -+ } -+ -+ object_property_set_bool(obj, true, d->from.name, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ } - } else { -+ if (!object_property_get_bool(obj, feat->name, NULL)) { -+ return; -+ } - isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], - feat->shift, feat->length, - feat->ni_value); -+ /* Auto disable the features which are dependent on current feature. */ -+ for (int i = 0; i < ARRAY_SIZE(feature_dependencies); ++i) { -+ const CPUFeatureDep *d = &feature_dependencies[i]; -+ if (strcmp(d->from.name, feat->name) != 0) { -+ continue; -+ } -+ -+ object_property_set_bool(obj, false, d->to.name, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ } - } - } - --- -2.25.1 - diff --git a/target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch b/target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch deleted file mode 100644 index 0477419196061a5e452363845ffd4591bfc5ef21..0000000000000000000000000000000000000000 --- a/target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 7ed595242f52d0654982d41a9c2a63be2bc3378e Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Thu, 6 Aug 2020 16:14:55 +0800 -Subject: [PATCH 6/9] target/arm: introduce KVM_CAP_ARM_CPU_FEATURE - -Introduce KVM_CAP_ARM_CPU_FEATURE to check whether KVM supports to set -CPU features in ARM. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - linux-headers/linux/kvm.h | 2 ++ - target/arm/cpu.c | 5 +++++ - target/arm/kvm64.c | 14 ++++++++++++++ - target/arm/kvm_arm.h | 7 +++++++ - 4 files changed, 28 insertions(+) - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index 744e888e..4844edc3 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -995,6 +995,8 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 - #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 - -+#define KVM_CAP_ARM_CPU_FEATURE 555 -+ - #ifdef KVM_CAP_IRQ_ROUTING - - struct kvm_irq_routing_irqchip { -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index d5576538..db46afba 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1427,6 +1427,11 @@ static void arm_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, - Error *local_err = NULL; - bool value; - -+ if (!kvm_arm_cpu_feature_supported()) { -+ warn_report("KVM doesn't support to set CPU feature in arm. " -+ "Setting to `%s` is ignored.", name); -+ return; -+ } - if (dev->realized) { - qdev_prop_set_after_realize(dev, name, errp); - return; -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 06cf31e8..05345556 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -644,6 +644,20 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - return true; - } - -+bool kvm_arm_cpu_feature_supported(void) -+{ -+ static bool cpu_feature_initialized; -+ static bool cpu_feature_supported; -+ -+ if (!cpu_feature_initialized) { -+ cpu_feature_supported = kvm_check_extension(kvm_state, -+ KVM_CAP_ARM_CPU_FEATURE); -+ cpu_feature_initialized = true; -+ } -+ -+ return cpu_feature_supported; -+} -+ - #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 - - int kvm_arch_init_vcpu(CPUState *cs) -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 9b7104d6..49e80878 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -239,6 +239,13 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - */ - void kvm_arm_add_vcpu_properties(Object *obj); - -+/** -+ * kvm_arm_cpu_feature_supported: -+ * -+ * Returns true if KVM can set CPU features and false otherwise. -+ */ -+bool kvm_arm_cpu_feature_supported(void); -+ - /** - * kvm_arm_get_max_vm_ipa_size: - * @ms: Machine state handle --- -2.25.1 - diff --git a/target-arm-kvm-Create-scratch-VM-as-Realm-if-necessa.patch b/target-arm-kvm-Create-scratch-VM-as-Realm-if-necessa.patch new file mode 100644 index 0000000000000000000000000000000000000000..077ffffeff3d912225250b5ccea9cb1bd242fd20 --- /dev/null +++ b/target-arm-kvm-Create-scratch-VM-as-Realm-if-necessa.patch @@ -0,0 +1,47 @@ +From 64f88add04d798c28bfa5e61a134ccde67fcada9 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 4 Dec 2023 18:48:36 +0000 +Subject: [PATCH] target/arm/kvm: Create scratch VM as Realm if necessary + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/bf7f456dfa60a022ac690004ddb08695b23ccde4 + +Some ID registers have a different value for a Realm VM, for example +ID_AA64DFR0_EL1 contains the number of breakpoints/watchpoints +implemented by RMM instead of the hardware. + +Even though RMM is in charge of setting up most Realm registers, KVM +still provides GET_ONE_REG interface on a Realm VM to probe the VM's +capabilities. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + target/arm/kvm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 83462f3f62..cec95483f3 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -73,6 +73,7 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, + { + int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1; + int max_vm_pa_size; ++ int vm_type; + + kvmfd = qemu_open_old("/dev/kvm", O_RDWR); + if (kvmfd < 0) { +@@ -82,8 +83,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, + if (max_vm_pa_size < 0) { + max_vm_pa_size = 0; + } ++ vm_type = kvm_arm_rme_vm_type(MACHINE(qdev_get_machine())); + do { +- vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); ++ vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size | vm_type); + } while (vmfd == -1 && errno == EINTR); + if (vmfd < 0) { + goto err; +-- +2.33.0 + diff --git a/target-arm-kvm-Implement-virtual-time-adjustment.patch b/target-arm-kvm-Implement-virtual-time-adjustment.patch deleted file mode 100644 index 86450c4d8f1739527a7065ec9242706605487b0e..0000000000000000000000000000000000000000 --- a/target-arm-kvm-Implement-virtual-time-adjustment.patch +++ /dev/null @@ -1,290 +0,0 @@ -From 77ee224418fac859acecd9aca4d18555ced42db6 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Tue, 21 Apr 2020 17:32:31 +0800 -Subject: [PATCH 3/4] target/arm/kvm: Implement virtual time adjustment - -When a VM is stopped (such as when it's paused) guest virtual time -should stop counting. Otherwise, when the VM is resumed it will -experience time jumps and its kernel may report soft lockups. Not -counting virtual time while the VM is stopped has the side effect -of making the guest's time appear to lag when compared with real -time, and even with time derived from the physical counter. For -this reason, this change, which is enabled by default, comes with -a KVM CPU feature allowing it to be disabled, restoring legacy -behavior. - -This patch only provides the implementation of the virtual time -adjustment. A subsequent patch will provide the CPU property -allowing the change to be enabled and disabled. - -Reported-by: Bijan Mottahedeh -Signed-off-by: Andrew Jones -Message-id: 20200120101023.16030-6-drjones@redhat.com -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell ---- - target/arm/cpu.h | 7 ++++ - target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++ - target/arm/kvm32.c | 2 + - target/arm/kvm64.c | 2 + - target/arm/kvm_arm.h | 37 ++++++++++++++++++ - target/arm/machine.c | 7 ++++ - 6 files changed, 147 insertions(+) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 94c990cd..e19531a7 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -816,6 +816,13 @@ struct ARMCPU { - /* KVM init features for this CPU */ - uint32_t kvm_init_features[7]; - -+ /* KVM CPU state */ -+ -+ /* KVM virtual time adjustment */ -+ bool kvm_adjvtime; -+ bool kvm_vtime_dirty; -+ uint64_t kvm_vtime; -+ - /* Uniprocessor system with MP extensions */ - bool mp_is_up; - -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index cc7a46df..21fb7ecd 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -336,6 +336,22 @@ static int compare_u64(const void *a, const void *b) - return 0; - } - -+/* -+ * cpreg_values are sorted in ascending order by KVM register ID -+ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find -+ * the storage for a KVM register by ID with a binary search. -+ */ -+static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) -+{ -+ uint64_t *res; -+ -+ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, -+ sizeof(uint64_t), compare_u64); -+ assert(res); -+ -+ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; -+} -+ - /* Initialize the ARMCPU cpreg list according to the kernel's - * definition of what CPU registers it knows about (and throw away - * the previous TCG-created cpreg list). -@@ -489,6 +505,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) - return ok; - } - -+void kvm_arm_cpu_pre_save(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_vtime_dirty) { -+ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; -+ } -+} -+ -+void kvm_arm_cpu_post_load(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_adjvtime) { -+ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); -+ cpu->kvm_vtime_dirty = true; -+ } -+} -+ - void kvm_arm_reset_vcpu(ARMCPU *cpu) - { - int ret; -@@ -556,6 +589,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) - return 0; - } - -+void kvm_arm_get_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = true; -+} -+ -+void kvm_arm_put_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (!cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = false; -+} -+ - int kvm_put_vcpu_events(ARMCPU *cpu) - { - CPUARMState *env = &cpu->env; -@@ -667,6 +744,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) - return MEMTXATTRS_UNSPECIFIED; - } - -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state) -+{ -+ CPUState *cs = opaque; -+ ARMCPU *cpu = ARM_CPU(cs); -+ -+ if (running) { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_put_virtual_time(cs); -+ } -+ } else { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_get_virtual_time(cs); -+ } -+ } -+} - - int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) - { -diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c -index 51f78f72..ee158830 100644 ---- a/target/arm/kvm32.c -+++ b/target/arm/kvm32.c -@@ -195,6 +195,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index f2f0a92e..4f0bf000 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -609,6 +609,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 32d97ce5..97560d4e 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -113,6 +113,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); - */ - bool write_kvmstate_to_list(ARMCPU *cpu); - -+/** -+ * kvm_arm_cpu_pre_save: -+ * @cpu: ARMCPU -+ * -+ * Called after write_kvmstate_to_list() from cpu_pre_save() to update -+ * the cpreg list with KVM CPU state. -+ */ -+void kvm_arm_cpu_pre_save(ARMCPU *cpu); -+ -+/** -+ * kvm_arm_cpu_post_load: -+ * @cpu: ARMCPU -+ * -+ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. -+ */ -+void kvm_arm_cpu_post_load(ARMCPU *cpu); -+ - /** - * kvm_arm_reset_vcpu: - * @cpu: ARMCPU -@@ -241,6 +258,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -+/** -+ * kvm_arm_get_virtual_time: -+ * @cs: CPUState -+ * -+ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. -+ */ -+void kvm_arm_get_virtual_time(CPUState *cs); -+ -+/** -+ * kvm_arm_put_virtual_time: -+ * @cs: CPUState -+ * -+ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. -+ */ -+void kvm_arm_put_virtual_time(CPUState *cs); -+ -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state); -+ - int kvm_arm_vgic_probe(void); - - void kvm_arm_pmu_set_irq(CPUState *cs, int irq); -@@ -272,6 +307,8 @@ static inline int kvm_arm_vgic_probe(void) - static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} - static inline void kvm_arm_pmu_init(CPUState *cs) {} - -+static inline void kvm_arm_get_virtual_time(CPUState *cs) {} -+static inline void kvm_arm_put_virtual_time(CPUState *cs) {} - #endif - - static inline const char *gic_class_name(void) -diff --git a/target/arm/machine.c b/target/arm/machine.c -index 3fd319a3..ee3c59a6 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -644,6 +644,12 @@ static int cpu_pre_save(void *opaque) - /* This should never fail */ - abort(); - } -+ -+ /* -+ * kvm_arm_cpu_pre_save() must be called after -+ * write_kvmstate_to_list() -+ */ -+ kvm_arm_cpu_pre_save(cpu); - } else { - if (!write_cpustate_to_list(cpu, false)) { - /* This should never fail. */ -@@ -746,6 +752,7 @@ static int cpu_post_load(void *opaque, int version_id) - * we're using it. - */ - write_list_to_cpustate(cpu); -+ kvm_arm_cpu_post_load(cpu); - } else { - if (!write_list_to_cpustate(cpu)) { - return -1; --- -2.23.0 diff --git a/target-arm-kvm-Return-immediately-on-error-in-kvm_ar.patch b/target-arm-kvm-Return-immediately-on-error-in-kvm_ar.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c97a2f7a4e18b793c5cc5c84e7fb083572f6057 --- /dev/null +++ b/target-arm-kvm-Return-immediately-on-error-in-kvm_ar.patch @@ -0,0 +1,77 @@ +From 06d0249f7fc42d05b8461e6b2675f8d1fddb0707 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 21 Feb 2024 15:50:42 +0000 +Subject: [PATCH] target/arm/kvm: Return immediately on error in + kvm_arch_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/1385e5d0517c42a8a3d18c4eb36db48e86370aa3 + +Returning an error to kvm_init() is fatal anyway, no need to continue +the initialization. + +Leave the `ret` variable in the function scope because it will be reused +when adding RME support. + +Signed-off-by: Jean-Philippe Brucker +Reviewed-by: Philippe Mathieu-Daudé +Conflicts: + target/arm/kvm.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + target/arm/kvm.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index ab31515a2a..e32a064f94 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -276,7 +276,7 @@ static void kvm_update_ipiv_cap(KVMState *s) + int kvm_arch_init(MachineState *ms, KVMState *s) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +- int ret = 0; ++ int ret; + + /* For ARM interrupt delivery is always asynchronous, + * whether we are using an in-kernel VGIC or not. +@@ -295,7 +295,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { + error_report("Using more than 256 vcpus requires a host kernel " + "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); +- ret = -EINVAL; ++ return -EINVAL; + } + + if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) { +@@ -317,13 +317,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + warn_report("Eager Page Split support not available"); + } else if (!(s->kvm_eager_split_size & sizes)) { + error_report("Eager Page Split requested chunk size not valid"); +- ret = -EINVAL; ++ return -EINVAL; + } else { + ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0, + s->kvm_eager_split_size); + if (ret < 0) { + error_report("Enabling of Eager Page Split failed: %s", + strerror(-ret)); ++ return ret; + } + } + } +@@ -348,7 +349,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + kvm_arm_init_debug(s); + kvm_update_ipiv_cap(s); + +- return ret; ++ return 0; + } + + unsigned long kvm_arch_vcpu_id(CPUState *cpu) +-- +2.33.0 + diff --git a/target-arm-kvm-Split-kvm_arch_get-put_registers.patch b/target-arm-kvm-Split-kvm_arch_get-put_registers.patch new file mode 100644 index 0000000000000000000000000000000000000000..13f20f72b1e3840b283e8131cbea79ee93591d29 --- /dev/null +++ b/target-arm-kvm-Split-kvm_arch_get-put_registers.patch @@ -0,0 +1,85 @@ +From 4b69d18a5600e610d08584fafb87030e272ebb2b Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Tue, 21 Jun 2022 11:52:14 +0100 +Subject: [PATCH] target/arm/kvm: Split kvm_arch_get/put_registers + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/a66c2761d7d6ba0f1f0db383cbad158e4cced72f + +The confidential guest support in KVM limits the number of registers +that we can read and write. Split the get/put_registers function to +prepare for it. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/kvm.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + target/arm/kvm64.c | 30 ++++++++++++++++++++++++++++-- + 1 file changed, 28 insertions(+), 2 deletions(-) + +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 651f603dd8..20a357061c 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -838,7 +838,7 @@ static int kvm_arch_put_sve(CPUState *cs) + return 0; + } + +-int kvm_arch_put_registers(CPUState *cs, int level) ++static int kvm_arm_put_core_regs(CPUState *cs, int level) + { + uint64_t val; + uint32_t fpr; +@@ -941,6 +941,19 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + ++ return 0; ++} ++ ++int kvm_arch_put_registers(CPUState *cs, int level) ++{ ++ int ret; ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ ret = kvm_arm_put_core_regs(cs, level); ++ if (ret) { ++ return ret; ++ } ++ + write_cpustate_to_list(cpu, true); + + if (!write_list_to_kvmstate(cpu, level)) { +@@ -1024,7 +1037,7 @@ static int kvm_arch_get_sve(CPUState *cs) + return 0; + } + +-int kvm_arch_get_registers(CPUState *cs) ++static int kvm_arm_get_core_regs(CPUState *cs) + { + uint64_t val; + unsigned int el; +@@ -1127,6 +1140,19 @@ int kvm_arch_get_registers(CPUState *cs) + } + vfp_set_fpcr(env, fpr); + ++ return 0; ++} ++ ++int kvm_arch_get_registers(CPUState *cs) ++{ ++ int ret; ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ ret = kvm_arm_get_core_regs(cs); ++ if (ret) { ++ return ret; ++ } ++ + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; +-- +2.33.0 + diff --git a/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch b/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b4922cc702a47bb45f2fa001c86719a8bb9c0c9 --- /dev/null +++ b/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch @@ -0,0 +1,50 @@ +From a079801cd3ae6484cad6826f20bcf4ecc7e97ead Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 5 May 2021 15:43:27 +0200 +Subject: [PATCH] target/arm/kvm: Write CPU state back to KVM on reset + +When a KVM vCPU is reset following a PSCI CPU_ON call, its power state +is not synchronized with KVM at the moment. Because the vCPU is not +marked dirty, we miss the call to kvm_arch_put_registers() that writes +to KVM's MP_STATE. Force mp_state synchronization. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Salil Mehta +--- + target/arm/kvm.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 70cf15b550..aca652621f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -636,11 +636,12 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu) + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; ++ CPUState *cs = CPU(cpu); + + /* Re-init VCPU so that all registers are set to + * their respective reset values. + */ +- ret = kvm_arm_vcpu_init(CPU(cpu)); ++ ret = kvm_arm_vcpu_init(cs); + if (ret < 0) { + fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); + abort(); +@@ -657,6 +658,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + * for the same reason we do so in kvm_arch_get_registers(). + */ + write_list_to_cpustate(cpu); ++ ++ /* ++ * Ensure we call kvm_arch_put_registers(). The vCPU isn't marked dirty if ++ * it was parked in KVM and is now booting from a PSCI CPU_ON call. ++ */ ++ cs->vcpu_dirty = true; + } + + void kvm_arm_create_host_vcpu(ARMCPU *cpu) +-- +2.27.0 + diff --git a/target-arm-kvm-rme-Add-DMA-remapping-for-the-shared-.patch b/target-arm-kvm-rme-Add-DMA-remapping-for-the-shared-.patch new file mode 100644 index 0000000000000000000000000000000000000000..9c11a569b43777a4152023df01353a31f236a53b --- /dev/null +++ b/target-arm-kvm-rme-Add-DMA-remapping-for-the-shared-.patch @@ -0,0 +1,363 @@ +From 3b1146d0a9d5e7a31e84b1c26b7331c84d0b5b05 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 8 Jan 2025 17:34:11 +0000 +Subject: [PATCH] target/arm/kvm-rme: Add DMA remapping for the shared memory + region + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/1efc2744bf6ac5fc074baedd42d3d40ed73c6405 + +In Arm CCA, the guest-physical address space is split in half. The top +half represents memory shared between guest and host, and the bottom +half is private to the guest. From QEMU's point of view, the two halves +are merged into a single region, and pages within this region are either +shared or private. + +Addresses used by device DMA can potentially target both halves. +Physical devices assigned to the VM access the top half, until they are +authenticated using features like PCIe CMA-SPDM at which point they can +also access memory private to the guest. + +Virtual devices implemented by the host are only allowed to access the +top half. For emulated MMIO, KVM strips the GPA before returning to +QEMU, so the GPA already belongs to QEMU's merged view of guest memory. +However DMA addresses cannot be stripped this way and need special +handling by the VMM: + +* When emulating DMA the VMM needs to translate the addresses into its + merged view. Add an IOMMU memory region on the top half, that + retargets DMA accesses to the merged sysmem. + +* when creating IOMMU mappings for (unauthenticated) VFIO devices, the VMM + needs to map the top half of guest-physical addresses to the shared pages. + Install RAM discard listeners that issue IOMMU map and unmap requests + to IOMMU listeners such as VFIO. + +The resulting mtree looks like this: + + address-space: vfio-pci + 0000000000000000-ffffffffffffffff (prio 0, i/o): bus master container + 0000000000000000-000001ffffffffff (prio 0, i/o): alias bus master @realm-dma-region 0000000000000000-000001ffffffffff + + memory-region: realm-dma-region + 0000000000000000-000001ffffffffff (prio 0, i/o): realm-dma-region + +There are at least two problems with this approach: given that we use +the PCI bus master address space, a vIOMMU cannot install its own +address space at the moment. And since sysbus devices can't have an +IOMMU at the moment, DMA from non-PCI devices isn't supported. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + hw/arm/virt.c | 2 + + target/arm/kvm-rme.c | 222 +++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm_arm.h | 15 +++ + 3 files changed, 239 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 51f7c940f4..95f6acf655 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2880,6 +2880,8 @@ static void machvirt_init(MachineState *machine) + vms->fw_cfg, OBJECT(vms)); + } + ++ kvm_arm_rme_init_gpa_space(vms->highest_gpa, vms->bus); ++ + vms->bootinfo.ram_size = machine->ram_size; + vms->bootinfo.board_id = -1; + vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +index 5e785fa3b6..299af009d9 100644 +--- a/target/arm/kvm-rme.c ++++ b/target/arm/kvm-rme.c +@@ -9,6 +9,7 @@ + #include "hw/boards.h" + #include "hw/core/cpu.h" + #include "hw/loader.h" ++#include "hw/pci/pci.h" + #include "kvm_arm.h" + #include "migration/blocker.h" + #include "qapi/error.h" +@@ -24,6 +25,35 @@ OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) + + #define RME_PAGE_SIZE qemu_real_host_page_size() + ++/* ++ * Realms have a split guest-physical address space: the bottom half is private ++ * to the realm, and the top half is shared with the host. Within QEMU, we use a ++ * merged view of both halves. Most of RAM is private to the guest and not ++ * accessible to us, but the guest shares some pages with us. ++ * ++ * For DMA, devices generally target the shared half (top) of the guest address ++ * space. Only the devices trusted by the guest (using mechanisms like TDISP for ++ * device authentication) can access the bottom half. ++ * ++ * RealmDmaRegion performs remapping of top-half accesses to system memory. ++ */ ++struct RealmDmaRegion { ++ IOMMUMemoryRegion parent_obj; ++}; ++ ++#define TYPE_REALM_DMA_REGION "realm-dma-region" ++OBJECT_DECLARE_SIMPLE_TYPE(RealmDmaRegion, REALM_DMA_REGION) ++OBJECT_DEFINE_SIMPLE_TYPE(RealmDmaRegion, realm_dma_region, ++ REALM_DMA_REGION, IOMMU_MEMORY_REGION); ++ ++typedef struct RealmPrivateSharedListener { ++ MemoryRegion *mr; ++ hwaddr offset_within_region; ++ uint64_t granularity; ++ PrivateSharedListener listener; ++ QLIST_ENTRY(RealmPrivateSharedListener) rpsl_next; ++} RealmPrivateSharedListener; ++ + typedef struct { + hwaddr base; + hwaddr size; +@@ -39,6 +69,12 @@ struct RmeGuest { + RmeGuestMeasurementAlgorithm measurement_algo; + + RmeRamRegion init_ram; ++ uint8_t ipa_bits; ++ ++ RealmDmaRegion *dma_region; ++ QLIST_HEAD(, RealmPrivateSharedListener) ram_discard_list; ++ MemoryListener memory_listener; ++ AddressSpace dma_as; + }; + + OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, +@@ -305,6 +341,7 @@ static void rme_guest_init(Object *obj) + + static void rme_guest_finalize(Object *obj) + { ++ memory_listener_unregister(&rme_guest->memory_listener); + } + + static gint rme_compare_ram_regions(gconstpointer a, gconstpointer b) +@@ -404,3 +441,188 @@ int kvm_arm_rme_vm_type(MachineState *ms) + } + return 0; + } ++ ++static int rme_ram_discard_notify(StateChangeListener *scl, ++ MemoryRegionSection *section, ++ bool populate) ++{ ++ hwaddr gpa, next; ++ IOMMUTLBEvent event; ++ const hwaddr end = section->offset_within_address_space + ++ int128_get64(section->size); ++ const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1); ++ PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl); ++ RealmPrivateSharedListener *rpsl = container_of(psl, RealmPrivateSharedListener, ++ listener); ++ ++ assert(rme_guest->dma_region != NULL); ++ ++ event.type = populate ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &address_space_memory; ++ event.entry.perm = populate ? IOMMU_RW : IOMMU_NONE; ++ event.entry.addr_mask = rpsl->granularity - 1; ++ ++ assert(end <= address_mask); ++ ++ /* ++ * Create IOMMU mappings from the top half of the address space to the RAM ++ * region. ++ */ ++ for (gpa = section->offset_within_address_space; gpa < end; gpa = next) { ++ event.entry.iova = gpa + address_mask + 1; ++ event.entry.translated_addr = gpa; ++ memory_region_notify_iommu(IOMMU_MEMORY_REGION(rme_guest->dma_region), ++ 0, event); ++ ++ next = ROUND_UP(gpa + 1, rpsl->granularity); ++ next = MIN(next, end); ++ } ++ ++ return 0; ++} ++ ++static int rme_ram_discard_notify_populate(StateChangeListener *scl, ++ MemoryRegionSection *section) ++{ ++ return rme_ram_discard_notify(scl, section, /* populate */ true); ++} ++ ++static int rme_ram_discard_notify_discard(StateChangeListener *scl, ++ MemoryRegionSection *section) ++{ ++ return rme_ram_discard_notify(scl, section, /* populate */ false); ++} ++ ++/* Install a RAM discard listener */ ++static void rme_listener_region_add(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ RealmPrivateSharedListener *rpsl; ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); ++ ++ ++ if (!gsm) { ++ return; ++ } ++ ++ rpsl = g_new0(RealmPrivateSharedListener, 1); ++ rpsl->mr = section->mr; ++ rpsl->offset_within_region = section->offset_within_region; ++ rpsl->granularity = generic_state_manager_get_min_granularity(gsm, ++ section->mr); ++ QLIST_INSERT_HEAD(&rme_guest->ram_discard_list, rpsl, rpsl_next); ++ ++ private_shared_listener_init(&rpsl->listener, ++ rme_ram_discard_notify_populate, ++ rme_ram_discard_notify_discard, true); ++ generic_state_manager_register_listener(gsm, &rpsl->listener.scl, section); ++} ++ ++static void rme_listener_region_del(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ RealmPrivateSharedListener *rpsl; ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); ++ ++ if (!gsm) { ++ return; ++ } ++ ++ QLIST_FOREACH(rpsl, &rme_guest->ram_discard_list, rpsl_next) { ++ if (MEMORY_REGION(rpsl->mr) == section->mr && ++ rpsl->offset_within_region == section->offset_within_region) { ++ generic_state_manager_unregister_listener(gsm, &rpsl->listener.scl); ++ g_free(rpsl); ++ break; ++ } ++ } ++} ++ ++static AddressSpace *rme_dma_get_address_space(PCIBus *bus, void *opaque, ++ int devfn) ++{ ++ return &rme_guest->dma_as; ++} ++ ++static const PCIIOMMUOps rme_dma_ops = { ++ .get_address_space = rme_dma_get_address_space, ++}; ++ ++void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus) ++{ ++ RealmDmaRegion *dma_region; ++ const unsigned int ipa_bits = 64 - clz64(highest_gpa) + 1; ++ ++ if (!rme_guest) { ++ return; ++ } ++ ++ assert(ipa_bits < 64); ++ ++ /* ++ * Setup a DMA translation from the shared top half of the guest-physical ++ * address space to our merged view of RAM. ++ */ ++ dma_region = g_new0(RealmDmaRegion, 1); ++ ++ memory_region_init_iommu(dma_region, sizeof(*dma_region), ++ TYPE_REALM_DMA_REGION, OBJECT(rme_guest), ++ "realm-dma-region", 1ULL << ipa_bits); ++ address_space_init(&rme_guest->dma_as, MEMORY_REGION(dma_region), ++ TYPE_REALM_DMA_REGION); ++ rme_guest->dma_region = dma_region; ++ ++ pci_setup_iommu(pci_bus, &rme_dma_ops, NULL); ++ ++ /* ++ * Install notifiers to forward RAM discard changes to the IOMMU notifiers ++ * (ie. tell VFIO to map shared pages and unmap private ones). ++ */ ++ rme_guest->memory_listener = (MemoryListener) { ++ .name = "rme", ++ .region_add = rme_listener_region_add, ++ .region_del = rme_listener_region_del, ++ }; ++ memory_listener_register(&rme_guest->memory_listener, ++ &address_space_memory); ++ ++ rme_guest->ipa_bits = ipa_bits; ++} ++ ++static void realm_dma_region_init(Object *obj) ++{ ++} ++ ++static IOMMUTLBEntry realm_dma_region_translate(IOMMUMemoryRegion *mr, ++ hwaddr addr, ++ IOMMUAccessFlags flag, ++ int iommu_idx) ++{ ++ const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1); ++ IOMMUTLBEntry entry = { ++ .target_as = &address_space_memory, ++ .iova = addr, ++ .translated_addr = addr & address_mask, ++ .addr_mask = address_mask, ++ .perm = IOMMU_RW, ++ }; ++ ++ return entry; ++} ++ ++static void realm_dma_region_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) ++{ ++ /* Nothing is shared at boot */ ++} ++ ++static void realm_dma_region_finalize(Object *obj) ++{ ++} ++ ++static void realm_dma_region_class_init(ObjectClass *oc, void *data) ++{ ++ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(oc); ++ ++ imrc->translate = realm_dma_region_translate; ++ imrc->replay = realm_dma_region_replay; ++} +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 4a9707a435..b4d54e816f 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -441,6 +441,16 @@ int kvm_arm_rme_vcpu_init(CPUState *cs); + */ + void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size); + ++/** ++ * kvm_arm_rme_setup_gpa ++ * @highest_gpa: highest address of the lower half of the guest address space ++ * @pci_bus: The main PCI bus, for which PCI queries DMA address spaces ++ * ++ * Setup the guest-physical address space for a Realm. Install a memory region ++ * and notifier to manage the shared upper half of the address space. ++ */ ++void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus); ++ + #else + + /* +@@ -471,6 +481,11 @@ static inline void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) + { + } + ++static inline void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, ++ PCIBus *pci_bus) ++{ ++} ++ + /* + * These functions should never actually be called without KVM support. + */ +-- +2.33.0 + diff --git a/target-arm-kvm-rme-Add-Realm-Personalization-Value-p.patch b/target-arm-kvm-rme-Add-Realm-Personalization-Value-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..1408e429e61f10d5979f9140e566b049a6d8080a --- /dev/null +++ b/target-arm-kvm-rme-Add-Realm-Personalization-Value-p.patch @@ -0,0 +1,190 @@ +From 853f2c56d022c88aff929824ed5278c958a47a6d Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Tue, 7 Feb 2023 18:55:22 +0000 +Subject: [PATCH] target/arm/kvm-rme: Add Realm Personalization Value parameter +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/c2659aa7e7fde76a3bc9914f348ee5c2d7b4d15d + +The Realm Personalization Value (RPV) is provided by the user to +distinguish Realms that have the same initial measurement. + +The user provides a base64 string encoding 64 bytes. They are stored +into the RPV in the same order. + +Cc: Eric Blake +Cc: Markus Armbruster +Cc: Daniel P. Berrangé +Cc: Eduardo Habkost +Acked-by: Markus Armbruster +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + qapi/qom.json | 15 ++++++++ + target/arm/kvm-rme.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 100 insertions(+) + +diff --git a/qapi/qom.json b/qapi/qom.json +index e405c51da3..0120369454 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -952,6 +952,20 @@ + '*kae': 'uint32', + '*measurement-algo': 'TmmGuestMeasurementAlgo' } } + ++## ++# @RmeGuestProperties: ++# ++# Properties for rme-guest objects. ++# ++# @personalization-value: a base64 string encoding a 64-byte (512-bit) value. ++# This optional parameter allows to uniquely identify the VM instance ++# during attestation. (default: all-zero) ++# ++# Since: 10.0 ++## ++{ 'struct': 'RmeGuestProperties', ++ 'data': { '*personalization-value': 'str' } } ++ + ## + # @ObjectType: + # +@@ -1070,6 +1084,7 @@ + 'pr-manager-helper': { 'type': 'PrManagerHelperProperties', + 'if': 'CONFIG_LINUX' }, + 'qtest': 'QtestProperties', ++ 'rme-guest': 'RmeGuestProperties', + 'rng-builtin': 'RngProperties', + 'rng-egd': 'RngEgdProperties', + 'rng-random': { 'type': 'RngRandomProperties', +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +index 1f42187699..e8976e4740 100644 +--- a/target/arm/kvm-rme.c ++++ b/target/arm/kvm-rme.c +@@ -12,6 +12,7 @@ + #include "kvm_arm.h" + #include "migration/blocker.h" + #include "qapi/error.h" ++#include "qemu/base64.h" + #include "qemu/error-report.h" + #include "qom/object_interfaces.h" + #include "exec/confidential-guest-support.h" +@@ -33,6 +34,9 @@ struct RmeGuest { + Notifier rom_load_notifier; + GSList *ram_regions; + ++ char *personalization_value_str; ++ uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; ++ + RmeRamRegion init_ram; + }; + +@@ -42,6 +46,48 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, + + static RmeGuest *rme_guest; + ++static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) ++{ ++ int ret; ++ const char *cfg_str; ++ struct arm_rme_config args = { ++ .cfg = cfg, ++ }; ++ ++ switch (cfg) { ++ case ARM_RME_CONFIG_RPV: ++ memcpy(args.rpv, guest->personalization_value, ARM_RME_CONFIG_RPV_SIZE); ++ cfg_str = "personalization value"; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, ++ KVM_CAP_ARM_RME_CONFIG_REALM, (intptr_t)&args); ++ if (ret) { ++ error_setg_errno(errp, -ret, "failed to configure %s", cfg_str); ++ } ++ return ret; ++} ++ ++static int rme_configure(Error **errp) ++{ ++ int ret; ++ size_t option; ++ const uint32_t config_options[] = { ++ ARM_RME_CONFIG_RPV, ++ }; ++ ++ for (option = 0; option < ARRAY_SIZE(config_options); option++) { ++ ret = rme_configure_one(rme_guest, config_options[option], errp); ++ if (ret) { ++ return ret; ++ } ++ } ++ return 0; ++} ++ + static int rme_init_ram(RmeRamRegion *ram, Error **errp) + { + int ret; +@@ -122,6 +168,10 @@ static int rme_create_realm(Error **errp) + { + int ret; + ++ if (rme_configure(errp)) { ++ return -1; ++ } ++ + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_CREATE_REALM); + if (ret) { +@@ -167,8 +217,43 @@ static void rme_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++static char *rme_get_rpv(Object *obj, Error **errp) ++{ ++ RmeGuest *guest = RME_GUEST(obj); ++ ++ return g_strdup(guest->personalization_value_str); ++} ++ ++static void rme_set_rpv(Object *obj, const char *value, Error **errp) ++{ ++ RmeGuest *guest = RME_GUEST(obj); ++ g_autofree uint8_t *rpv = NULL; ++ size_t len; ++ ++ rpv = qbase64_decode(value, -1, &len, errp); ++ if (!rpv) { ++ return; ++ } ++ ++ if (len != sizeof(guest->personalization_value)) { ++ error_setg(errp, ++ "expecting a Realm Personalization Value of size %zu, got %zu\n", ++ sizeof(guest->personalization_value), len); ++ return; ++ } ++ memcpy(guest->personalization_value, rpv, len); ++ ++ /* Save the value so we don't need to encode it in the getter */ ++ g_free(guest->personalization_value_str); ++ guest->personalization_value_str = g_strdup(value); ++} ++ + static void rme_guest_class_init(ObjectClass *oc, void *data) + { ++ object_class_property_add_str(oc, "personalization-value", rme_get_rpv, ++ rme_set_rpv); ++ object_class_property_set_description(oc, "personalization-value", ++ "Realm personalization value (64 bytes encodede in base64)"); + } + + static void rme_guest_init(Object *obj) +-- +2.33.0 + diff --git a/target-arm-kvm-rme-Add-measurement-algorithm-propert.patch b/target-arm-kvm-rme-Add-measurement-algorithm-propert.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7b88e944b09024bc74bc78f3ec706860fc63c66 --- /dev/null +++ b/target-arm-kvm-rme-Add-measurement-algorithm-propert.patch @@ -0,0 +1,158 @@ +From 82c8a1979a23a073c3ed8965de10f79e3a676b2c Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Thu, 27 Oct 2022 19:22:48 +0100 +Subject: [PATCH] target/arm/kvm-rme: Add measurement algorithm property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/26ed0bafc44a8d4d8fcc46a1ee7a2b8aa35b1c33 + +This option selects which measurement algorithm to use for attestation. +Supported values are SHA256 and SHA512. Default to SHA512 arbitrarily. + +SHA512 is generally faster on 64-bit architectures. On a few arm64 CPUs +I tested SHA256 is much faster, but that's most likely because they only +support acceleration via FEAT_SHA256 (Armv8.0) and not FEAT_SHA512 +(Armv8.2). Future CPUs supporting RME are likely to also support +FEAT_SHA512. + +Cc: Eric Blake +Cc: Markus Armbruster +Cc: Daniel P. Berrangé +Cc: Eduardo Habkost +Acked-by: Markus Armbruster +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: houmingyong +--- + qapi/qom.json | 20 +++++++++++++++++++- + target/arm/kvm-rme.c | 38 ++++++++++++++++++++++++++++++++++++++ + 2 files changed, 57 insertions(+), 1 deletion(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 0120369454..02b45e1068 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -952,6 +952,20 @@ + '*kae': 'uint32', + '*measurement-algo': 'TmmGuestMeasurementAlgo' } } + ++## ++# @RmeGuestMeasurementAlgorithm: ++# ++# @sha256: Use the SHA256 algorithm ++# ++# @sha512: Use the SHA512 algorithm ++# ++# Algorithm to use for realm measurements ++# ++# Since: 10.0 ++## ++{ 'enum': 'RmeGuestMeasurementAlgorithm', ++ 'data': ['sha256', 'sha512'] } ++ + ## + # @RmeGuestProperties: + # +@@ -961,10 +975,14 @@ + # This optional parameter allows to uniquely identify the VM instance + # during attestation. (default: all-zero) + # ++# @measurement-algorithm: Realm measurement algorithm ++# (default: sha512) ++# + # Since: 10.0 + ## + { 'struct': 'RmeGuestProperties', +- 'data': { '*personalization-value': 'str' } } ++ 'data': { '*personalization-value': 'str', ++ '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm' } } + + ## + # @ObjectType: +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +index e8976e4740..5e785fa3b6 100644 +--- a/target/arm/kvm-rme.c ++++ b/target/arm/kvm-rme.c +@@ -36,6 +36,7 @@ struct RmeGuest { + + char *personalization_value_str; + uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; ++ RmeGuestMeasurementAlgorithm measurement_algo; + + RmeRamRegion init_ram; + }; +@@ -59,6 +60,19 @@ static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) + memcpy(args.rpv, guest->personalization_value, ARM_RME_CONFIG_RPV_SIZE); + cfg_str = "personalization value"; + break; ++ case ARM_RME_CONFIG_HASH_ALGO: ++ switch (guest->measurement_algo) { ++ case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: ++ args.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256; ++ break; ++ case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: ++ args.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ cfg_str = "hash algorithm"; ++ break; + default: + g_assert_not_reached(); + } +@@ -77,6 +91,7 @@ static int rme_configure(Error **errp) + size_t option; + const uint32_t config_options[] = { + ARM_RME_CONFIG_RPV, ++ ARM_RME_CONFIG_HASH_ALGO, + }; + + for (option = 0; option < ARRAY_SIZE(config_options); option++) { +@@ -248,12 +263,34 @@ static void rme_set_rpv(Object *obj, const char *value, Error **errp) + guest->personalization_value_str = g_strdup(value); + } + ++static int rme_get_measurement_algo(Object *obj, Error **errp) ++{ ++ RmeGuest *guest = RME_GUEST(obj); ++ ++ return guest->measurement_algo; ++} ++ ++static void rme_set_measurement_algo(Object *obj, int algo, Error **errp) ++{ ++ RmeGuest *guest = RME_GUEST(obj); ++ ++ guest->measurement_algo = algo; ++} ++ + static void rme_guest_class_init(ObjectClass *oc, void *data) + { + object_class_property_add_str(oc, "personalization-value", rme_get_rpv, + rme_set_rpv); + object_class_property_set_description(oc, "personalization-value", + "Realm personalization value (64 bytes encodede in base64)"); ++ ++ object_class_property_add_enum(oc, "measurement-algorithm", ++ "RmeGuestMeasurementAlgorithm", ++ &RmeGuestMeasurementAlgorithm_lookup, ++ rme_get_measurement_algo, ++ rme_set_measurement_algo); ++ object_class_property_set_description(oc, "measurement-algorithm", ++ "Realm measurement algorithm ('sha256', 'sha512')"); + } + + static void rme_guest_init(Object *obj) +@@ -263,6 +300,7 @@ static void rme_guest_init(Object *obj) + exit(1); + } + rme_guest = RME_GUEST(obj); ++ rme_guest->measurement_algo = RME_GUEST_MEASUREMENT_ALGORITHM_SHA512; + } + + static void rme_guest_finalize(Object *obj) +-- +2.33.0 + diff --git a/target-arm-kvm-rme-Add-measurement-log.patch b/target-arm-kvm-rme-Add-measurement-log.patch new file mode 100644 index 0000000000000000000000000000000000000000..cf5ac3188245f67464dc89028f9ad4d059330784 --- /dev/null +++ b/target-arm-kvm-rme-Add-measurement-log.patch @@ -0,0 +1,636 @@ +From 58bb383f608b5f4f58f9fac365efe742c1f0335c Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Thu, 7 Nov 2024 17:38:11 +0000 +Subject: [PATCH] target/arm/kvm-rme: Add measurement log + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/4a2fc9b28becfdae3d5662218b921f8970825bd6 + +Create an event log in the format defined by Trusted Computing Group +for TPM2. It contains information about the VMM, the Realm parameters, +any data loaded into guest memory before boot, and the initial vCPU +state. + +The guest can access this log from RAM and send it to a verifier, to +help the verifier independently compute the Realm Initial Measurement, +and check that the data we load into guest RAM is known-good images. +Without this log, in order to end up with the right Measurement, the +verifier needs to guess what is loaded, where and in what order. + +Cc: Stefan Berger +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/Kconfig + target/arm/kvm-rme.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + qapi/qom.json | 9 +- + target/arm/Kconfig | 1 + + target/arm/kvm-rme.c | 403 ++++++++++++++++++++++++++++++++++++++++++- + target/arm/kvm_arm.h | 15 ++ + 4 files changed, 426 insertions(+), 2 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 02b45e1068..e0590a6019 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -978,11 +978,18 @@ + # @measurement-algorithm: Realm measurement algorithm + # (default: sha512) + # ++# @measurement-log: Enable a measurement log for the Realm. All events ++# that contribute to the Realm Initial Measurement (RIM) are added ++# to a log in TCG TPM2 format, which is itself loaded into Realm ++# memory (unmeasured) and can then be read by a verifier to ++# reconstruct the RIM. ++# + # Since: 10.0 + ## + { 'struct': 'RmeGuestProperties', + 'data': { '*personalization-value': 'str', +- '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm' } } ++ '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm', ++ '*measurement-log': 'bool'} } + + ## + # @ObjectType: +diff --git a/target/arm/Kconfig b/target/arm/Kconfig +index bf57d739cd..14977f1d83 100644 +--- a/target/arm/Kconfig ++++ b/target/arm/Kconfig +@@ -9,3 +9,4 @@ config ARM + config AARCH64 + bool + select ARM ++ select TPM_LOG if KVM +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +index 299af009d9..26dda39df6 100644 +--- a/target/arm/kvm-rme.c ++++ b/target/arm/kvm-rme.c +@@ -10,11 +10,13 @@ + #include "hw/core/cpu.h" + #include "hw/loader.h" + #include "hw/pci/pci.h" ++#include "hw/tpm/tpm_log.h" + #include "kvm_arm.h" + #include "migration/blocker.h" + #include "qapi/error.h" + #include "qemu/base64.h" + #include "qemu/error-report.h" ++#include "qemu/units.h" + #include "qom/object_interfaces.h" + #include "exec/confidential-guest-support.h" + #include "sysemu/kvm.h" +@@ -25,6 +27,14 @@ OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) + + #define RME_PAGE_SIZE qemu_real_host_page_size() + ++#define RME_MEASUREMENT_LOG_SIZE (64 * KiB) ++ ++typedef struct RmeLogFiletype { ++ uint32_t event_type; ++ /* Description copied into the log event */ ++ const char *desc; ++} RmeLogFiletype; ++ + /* + * Realms have a split guest-physical address space: the bottom half is private + * to the realm, and the top half is shared with the host. Within QEMU, we use a +@@ -57,6 +67,8 @@ typedef struct RealmPrivateSharedListener { + typedef struct { + hwaddr base; + hwaddr size; ++ uint8_t *blob_ptr; ++ RmeLogFiletype *filetype; + } RmeRamRegion; + + struct RmeGuest { +@@ -67,22 +79,335 @@ struct RmeGuest { + char *personalization_value_str; + uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; + RmeGuestMeasurementAlgorithm measurement_algo; ++ bool use_measurement_log; + + RmeRamRegion init_ram; + uint8_t ipa_bits; ++ size_t num_cpus; + + RealmDmaRegion *dma_region; + QLIST_HEAD(, RealmPrivateSharedListener) ram_discard_list; + MemoryListener memory_listener; + AddressSpace dma_as; ++ ++ TpmLog *log; ++ GHashTable *images; + }; + + OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, + CONFIDENTIAL_GUEST_SUPPORT, + { TYPE_USER_CREATABLE }, { }) + ++typedef struct { ++ char signature[16]; ++ char name[32]; ++ char version[40]; ++ uint64_t ram_size; ++ uint32_t num_cpus; ++ uint64_t flags; ++} EventLogVmmVersion; ++ ++typedef struct { ++ uint32_t id; ++ uint32_t data_size; ++ uint8_t data[]; ++} EventLogTagged; ++ ++#define EVENT_LOG_TAG_REALM_CREATE 1 ++#define EVENT_LOG_TAG_INIT_RIPAS 2 ++#define EVENT_LOG_TAG_REC_CREATE 3 ++ ++#define REALM_PARAMS_FLAG_SVE (1 << 1) ++#define REALM_PARAMS_FLAG_PMU (1 << 2) ++ ++#define REC_CREATE_FLAG_RUNNABLE (1 << 0) ++ + static RmeGuest *rme_guest; + ++static int rme_init_measurement_log(MachineState *ms) ++{ ++ Object *log; ++ gpointer filename; ++ TpmLogDigestAlgo algo; ++ RmeLogFiletype *filetype; ++ ++ if (!rme_guest->use_measurement_log) { ++ return 0; ++ } ++ ++ switch (rme_guest->measurement_algo) { ++ case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: ++ algo = TPM_LOG_DIGEST_ALGO_SHA256; ++ break; ++ case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: ++ algo = TPM_LOG_DIGEST_ALGO_SHA512; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ log = object_new_with_props(TYPE_TPM_LOG, OBJECT(rme_guest), ++ "log", &error_fatal, ++ "digest-algo", TpmLogDigestAlgo_str(algo), ++ NULL); ++ ++ tpm_log_create(TPM_LOG(log), RME_MEASUREMENT_LOG_SIZE, &error_fatal); ++ rme_guest->log = TPM_LOG(log); ++ ++ /* ++ * Write down the image names we're expecting to encounter when handling the ++ * ROM load notifications, so we can record the type of image being loaded ++ * to help the verifier. ++ */ ++ rme_guest->images = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, ++ g_free); ++ ++ filename = g_strdup(ms->kernel_filename); ++ if (filename) { ++ filetype = g_new0(RmeLogFiletype, 1); ++ filetype->event_type = TCG_EV_POST_CODE2; ++ filetype->desc = "KERNEL"; ++ g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype); ++ } ++ ++ filename = g_strdup(ms->initrd_filename); ++ if (filename) { ++ filetype = g_new0(RmeLogFiletype, 1); ++ filetype->event_type = TCG_EV_POST_CODE2; ++ filetype->desc = "INITRD"; ++ g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype); ++ } ++ ++ filename = g_strdup(ms->firmware); ++ if (filename) { ++ filetype = g_new0(RmeLogFiletype, 1); ++ filetype->event_type = TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2; ++ filetype->desc = "FIRMWARE"; ++ g_hash_table_insert(rme_guest->images, filename, filetype); ++ } ++ ++ filename = g_strdup(ms->dtb); ++ if (!filename) { ++ filename = g_strdup("dtb"); ++ } ++ filetype = g_new0(RmeLogFiletype, 1); ++ filetype->event_type = TCG_EV_POST_CODE2; ++ filetype->desc = "DTB"; ++ g_hash_table_insert(rme_guest->images, filename, filetype); ++ ++ return 0; ++} ++ ++static int rme_log_event_tag(uint32_t id, uint8_t *data, size_t size, ++ Error **errp) ++{ ++ int ret; ++ EventLogTagged event = { ++ .id = id, ++ .data_size = size, ++ }; ++ GByteArray *bytes = g_byte_array_new(); ++ ++ if (!rme_guest->log) { ++ return 0; ++ } ++ ++ g_byte_array_append(bytes, (uint8_t *)&event, sizeof(event)); ++ g_byte_array_append(bytes, data, size); ++ ret = tpm_log_add_event(rme_guest->log, TCG_EV_EVENT_TAG, bytes->data, ++ bytes->len, NULL, 0, errp); ++ g_byte_array_free(bytes, true); ++ return ret; ++} ++ ++/* Log VM type and Realm Descriptor create */ ++static int rme_log_realm_create(Error **errp) ++{ ++ int ret; ++ ARMCPU *cpu; ++ EventLogVmmVersion vmm_version = { ++ .signature = "VM VERSION", ++ .name = "QEMU", ++ .version = QEMU_VERSION, ++ .ram_size = cpu_to_le64(rme_guest->init_ram.size), ++ .num_cpus = cpu_to_le32(rme_guest->num_cpus), ++ .flags = 0, ++ }; ++ struct { ++ uint64_t flags; ++ uint8_t s2sz; ++ uint8_t sve_vl; ++ uint8_t num_bps; ++ uint8_t num_wps; ++ uint8_t pmu_num_ctrs; ++ uint8_t hash_algo; ++ } params = { ++ .s2sz = rme_guest->ipa_bits, ++ }; ++ ++ if (!rme_guest->log) { ++ return 0; ++ } ++ ++ ret = tpm_log_add_event(rme_guest->log, TCG_EV_NO_ACTION, ++ (uint8_t *)&vmm_version, sizeof(vmm_version), ++ NULL, 0, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ /* With KVM all CPUs have the same capability */ ++ cpu = ARM_CPU(first_cpu); ++ if (cpu->has_pmu) { ++ params.flags |= REALM_PARAMS_FLAG_PMU; ++ params.pmu_num_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); ++ } ++ ++ if (cpu->sve_max_vq) { ++ params.flags |= REALM_PARAMS_FLAG_SVE; ++ params.sve_vl = cpu->sve_max_vq - 1; ++ } ++ params.num_bps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); ++ params.num_wps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); ++ ++ switch (rme_guest->measurement_algo) { ++ case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: ++ params.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256; ++ break; ++ case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: ++ params.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return rme_log_event_tag(EVENT_LOG_TAG_REALM_CREATE, (uint8_t *)¶ms, ++ sizeof(params), errp); ++} ++ ++/* unmeasured images are logged with @data == NULL */ ++static int rme_log_image(RmeLogFiletype *filetype, uint8_t *data, hwaddr base, ++ size_t size, Error **errp) ++{ ++ int ret; ++ size_t desc_size; ++ GByteArray *event = g_byte_array_new(); ++ struct UefiPlatformFirmwareBlob2Head head = {0}; ++ struct UefiPlatformFirmwareBlob2Tail tail = {0}; ++ ++ if (!rme_guest->log) { ++ return 0; ++ } ++ ++ if (!filetype) { ++ error_setg(errp, "cannot log image without a filetype"); ++ return -1; ++ } ++ ++ /* EV_POST_CODE2 strings are not NUL-terminated */ ++ desc_size = strlen(filetype->desc); ++ head.blob_description_size = desc_size; ++ tail.blob_base = cpu_to_le64(base); ++ tail.blob_size = cpu_to_le64(size); ++ ++ g_byte_array_append(event, (guint8 *)&head, sizeof(head)); ++ g_byte_array_append(event, (guint8 *)filetype->desc, desc_size); ++ g_byte_array_append(event, (guint8 *)&tail, sizeof(tail)); ++ ++ ret = tpm_log_add_event(rme_guest->log, filetype->event_type, event->data, ++ event->len, data, size, errp); ++ g_byte_array_free(event, true); ++ return ret; ++} ++ ++static int rme_log_ripas(hwaddr base, size_t size, Error **errp) ++{ ++ struct { ++ uint64_t base; ++ uint64_t size; ++ } init_ripas = { ++ .base = cpu_to_le64(base), ++ .size = cpu_to_le64(size), ++ }; ++ ++ return rme_log_event_tag(EVENT_LOG_TAG_INIT_RIPAS, (uint8_t *)&init_ripas, ++ sizeof(init_ripas), errp); ++} ++ ++static int rme_log_rec(uint64_t flags, uint64_t pc, uint64_t gprs[8], Error **errp) ++{ ++ struct { ++ uint64_t flags; ++ uint64_t pc; ++ uint64_t gprs[8]; ++ } rec_create = { ++ .flags = cpu_to_le64(flags), ++ .pc = cpu_to_le64(pc), ++ .gprs[0] = cpu_to_le64(gprs[0]), ++ .gprs[1] = cpu_to_le64(gprs[1]), ++ .gprs[2] = cpu_to_le64(gprs[2]), ++ .gprs[3] = cpu_to_le64(gprs[3]), ++ .gprs[4] = cpu_to_le64(gprs[4]), ++ .gprs[5] = cpu_to_le64(gprs[5]), ++ .gprs[6] = cpu_to_le64(gprs[6]), ++ .gprs[7] = cpu_to_le64(gprs[7]), ++ }; ++ ++ return rme_log_event_tag(EVENT_LOG_TAG_REC_CREATE, (uint8_t *)&rec_create, ++ sizeof(rec_create), errp); ++} ++ ++static int rme_populate_range(hwaddr base, size_t size, bool measure, ++ Error **errp); ++ ++static int rme_close_measurement_log(Error **errp) ++{ ++ int ret; ++ hwaddr base; ++ size_t size; ++ RmeLogFiletype filetype = { ++ .event_type = TCG_EV_POST_CODE2, ++ .desc = "LOG", ++ }; ++ ++ if (!rme_guest->log) { ++ return 0; ++ } ++ ++ base = object_property_get_uint(OBJECT(rme_guest->log), "load-addr", errp); ++ if (*errp) { ++ return -1; ++ } ++ ++ size = object_property_get_uint(OBJECT(rme_guest->log), "max-size", errp); ++ if (*errp) { ++ return -1; ++ } ++ ++ /* Log the log itself */ ++ ret = rme_log_image(&filetype, NULL, base, size, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = tpm_log_write_and_close(rme_guest->log, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = rme_populate_range(base, size, /* measure */ false, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ g_hash_table_destroy(rme_guest->images); ++ ++ /* The log is now in the guest. Free this object */ ++ object_unparent(OBJECT(rme_guest->log)); ++ rme_guest->log = NULL; ++ return 0; ++} ++ + static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) + { + int ret; +@@ -156,9 +481,10 @@ static int rme_init_ram(RmeRamRegion *ram, Error **errp) + error_setg_errno(errp, -ret, + "failed to init RAM [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", + start, end); ++ return ret; + } + +- return ret; ++ return rme_log_ripas(ram->base, ram->size, errp); + } + + static int rme_populate_range(hwaddr base, size_t size, bool measure, +@@ -194,23 +520,42 @@ static void rme_populate_ram_region(gpointer data, gpointer err) + } + + rme_populate_range(region->base, region->size, /* measure */ true, errp); ++ if (*errp) { ++ return; ++ } ++ ++ rme_log_image(region->filetype, region->blob_ptr, region->base, ++ region->size, errp); + } + + static int rme_init_cpus(Error **errp) + { + int ret; + CPUState *cs; ++ bool logged_primary_cpu = false; + + /* + * Now that do_cpu_reset() initialized the boot PC and + * kvm_cpu_synchronize_post_reset() registered it, we can finalize the REC. + */ + CPU_FOREACH(cs) { ++ ARMCPU *cpu = ARM_CPU(cs); ++ + ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC); + if (ret) { + error_setg_errno(errp, -ret, "failed to finalize vCPU"); + return ret; + } ++ ++ if (!logged_primary_cpu) { ++ ret = rme_log_rec(REC_CREATE_FLAG_RUNNABLE, cpu->env.pc, ++ cpu->env.xregs, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ logged_primary_cpu = true; ++ } + } + return 0; + } +@@ -230,6 +575,10 @@ static int rme_create_realm(Error **errp) + return -1; + } + ++ if (rme_log_realm_create(errp)) { ++ return -1; ++ } ++ + if (rme_init_ram(&rme_guest->init_ram, errp)) { + return -1; + } +@@ -244,6 +593,10 @@ static int rme_create_realm(Error **errp) + return -1; + } + ++ if (rme_close_measurement_log(errp)) { ++ return -1; ++ } ++ + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_ACTIVATE_REALM); + if (ret) { +@@ -313,6 +666,20 @@ static void rme_set_measurement_algo(Object *obj, int algo, Error **errp) + guest->measurement_algo = algo; + } + ++static bool rme_get_measurement_log(Object *obj, Error **errp) ++{ ++ RmeGuest *guest = RME_GUEST(obj); ++ ++ return guest->use_measurement_log; ++} ++ ++static void rme_set_measurement_log(Object *obj, bool value, Error **errp) ++{ ++ RmeGuest *guest = RME_GUEST(obj); ++ ++ guest->use_measurement_log = value; ++} ++ + static void rme_guest_class_init(ObjectClass *oc, void *data) + { + object_class_property_add_str(oc, "personalization-value", rme_get_rpv, +@@ -327,6 +694,12 @@ static void rme_guest_class_init(ObjectClass *oc, void *data) + rme_set_measurement_algo); + object_class_property_set_description(oc, "measurement-algorithm", + "Realm measurement algorithm ('sha256', 'sha512')"); ++ ++ object_class_property_add_bool(oc, "measurement-log", ++ rme_get_measurement_log, ++ rme_set_measurement_log); ++ object_class_property_set_description(oc, "measurement-log", ++ "Enable/disable Realm measurement log"); + } + + static void rme_guest_init(Object *obj) +@@ -370,6 +743,20 @@ static void rme_rom_load_notify(Notifier *notifier, void *data) + region = g_new0(RmeRamRegion, 1); + region->base = rom->addr; + region->size = rom->len; ++ /* ++ * TODO: double-check lifetime. Is data is still available when we measure ++ * it, while writing the log. Should be fine since data is kept for the next ++ * reset. ++ */ ++ region->blob_ptr = rom->blob_ptr; ++ ++ /* ++ * rme_guest->images is destroyed after ram_regions, so we can store ++ * filetype even if we don't own the struct. ++ */ ++ if (rme_guest->images) { ++ region->filetype = g_hash_table_lookup(rme_guest->images, rom->name); ++ } + + /* + * The Realm Initial Measurement (RIM) depends on the order in which we +@@ -399,6 +786,12 @@ int kvm_arm_rme_init(MachineState *ms) + return -ENODEV; + } + ++ if (rme_init_measurement_log(ms)) { ++ return -ENODEV; ++ } ++ ++ rme_guest->num_cpus = ms->smp.max_cpus; ++ + error_setg(&rme_mig_blocker, "RME: migration is not implemented"); + migrate_add_blocker(&rme_mig_blocker, &error_fatal); + +@@ -626,3 +1019,11 @@ static void realm_dma_region_class_init(ObjectClass *oc, void *data) + imrc->translate = realm_dma_region_translate; + imrc->replay = realm_dma_region_replay; + } ++ ++Object *kvm_arm_rme_get_measurement_log(void) ++{ ++ if (rme_guest && rme_guest->log) { ++ return OBJECT(rme_guest->log); ++ } ++ return NULL; ++} +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index b4d54e816f..8e9b2039c4 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -451,6 +451,16 @@ void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size); + */ + void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus); + ++/** ++ * kvm_arm_rme_get_measurement_log ++ * ++ * Obtain the measurement log object if enabled, in order to get its size and ++ * set its base address. ++ * ++ * Returns NULL if measurement log is disabled. ++ */ ++Object *kvm_arm_rme_get_measurement_log(void); ++ + #else + + /* +@@ -486,6 +496,11 @@ static inline void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, + { + } + ++static inline Object *kvm_arm_rme_get_measurement_log(void) ++{ ++ return NULL; ++} ++ + /* + * These functions should never actually be called without KVM support. + */ +-- +2.33.0 + diff --git a/target-arm-kvm-rme-Initialize-Realm-memory.patch b/target-arm-kvm-rme-Initialize-Realm-memory.patch new file mode 100644 index 0000000000000000000000000000000000000000..865e640edd2bddb6400f2e781e1fd5ee252927e5 --- /dev/null +++ b/target-arm-kvm-rme-Initialize-Realm-memory.patch @@ -0,0 +1,236 @@ +From 113dda44a4857134af03ea8001a656dfea730f0e Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 14 Jun 2023 16:54:00 +0100 +Subject: [PATCH] target/arm/kvm-rme: Initialize Realm memory + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/7f3408b58fee5e7aaf7cda65bd506f7b7ce4b789 + +Initialize the IPA state of RAM. Collect the images copied into guest +RAM into a sorted list, and issue POPULATE_REALM KVM ioctls once we've +created the Realm Descriptor. The images are part of the Realm Initial +Measurement. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/kvm-rme.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + target/arm/kvm-rme.c | 127 +++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm_arm.h | 14 +++++ + 2 files changed, 141 insertions(+) + +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +index b080552076..1f42187699 100644 +--- a/target/arm/kvm-rme.c ++++ b/target/arm/kvm-rme.c +@@ -8,6 +8,7 @@ + + #include "hw/boards.h" + #include "hw/core/cpu.h" ++#include "hw/loader.h" + #include "kvm_arm.h" + #include "migration/blocker.h" + #include "qapi/error.h" +@@ -20,8 +21,19 @@ + #define TYPE_RME_GUEST "rme-guest" + OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) + ++#define RME_PAGE_SIZE qemu_real_host_page_size() ++ ++typedef struct { ++ hwaddr base; ++ hwaddr size; ++} RmeRamRegion; ++ + struct RmeGuest { + ConfidentialGuestSupport parent_obj; ++ Notifier rom_load_notifier; ++ GSList *ram_regions; ++ ++ RmeRamRegion init_ram; + }; + + OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, +@@ -30,6 +42,63 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, + + static RmeGuest *rme_guest; + ++static int rme_init_ram(RmeRamRegion *ram, Error **errp) ++{ ++ int ret; ++ hwaddr start = QEMU_ALIGN_DOWN(ram->base, RME_PAGE_SIZE); ++ hwaddr end = QEMU_ALIGN_UP(ram->base + ram->size, RME_PAGE_SIZE); ++ struct arm_rme_init_ripas init_args = { ++ .base = start, ++ .size = end - start, ++ }; ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, ++ KVM_CAP_ARM_RME_INIT_RIPAS_REALM, ++ (intptr_t)&init_args); ++ if (ret) { ++ error_setg_errno(errp, -ret, ++ "failed to init RAM [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", ++ start, end); ++ } ++ ++ return ret; ++} ++ ++static int rme_populate_range(hwaddr base, size_t size, bool measure, ++ Error **errp) ++{ ++ int ret; ++ hwaddr start = QEMU_ALIGN_DOWN(base, RME_PAGE_SIZE); ++ hwaddr end = QEMU_ALIGN_UP(base + size, RME_PAGE_SIZE); ++ struct arm_rme_populate_realm populate_args = { ++ .base = start, ++ .size = end - start, ++ .flags = measure ? KVM_ARM_RME_POPULATE_FLAGS_MEASURE : 0, ++ }; ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, ++ KVM_CAP_ARM_RME_POPULATE_REALM, ++ (intptr_t)&populate_args); ++ if (ret) { ++ error_setg_errno(errp, -ret, ++ "failed to populate realm [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", ++ start, end); ++ } ++ return ret; ++} ++ ++static void rme_populate_ram_region(gpointer data, gpointer err) ++{ ++ Error **errp = err; ++ const RmeRamRegion *region = data; ++ ++ if (*errp) { ++ return; ++ } ++ ++ rme_populate_range(region->base, region->size, /* measure */ true, errp); ++} ++ + static int rme_init_cpus(Error **errp) + { + int ret; +@@ -60,6 +129,16 @@ static int rme_create_realm(Error **errp) + return -1; + } + ++ if (rme_init_ram(&rme_guest->init_ram, errp)) { ++ return -1; ++ } ++ ++ g_slist_foreach(rme_guest->ram_regions, rme_populate_ram_region, errp); ++ g_slist_free_full(g_steal_pointer(&rme_guest->ram_regions), g_free); ++ if (*errp) { ++ return -1; ++ } ++ + if (rme_init_cpus(errp)) { + return -1; + } +@@ -105,6 +184,43 @@ static void rme_guest_finalize(Object *obj) + { + } + ++static gint rme_compare_ram_regions(gconstpointer a, gconstpointer b) ++{ ++ const RmeRamRegion *ra = a; ++ const RmeRamRegion *rb = b; ++ ++ g_assert(ra->base != rb->base); ++ return ra->base < rb->base ? -1 : 1; ++} ++ ++static void rme_rom_load_notify(Notifier *notifier, void *data) ++{ ++ RmeRamRegion *region; ++ RomLoaderNotifyData *rom = data; ++ ++ if (rom->addr == -1) { ++ /* ++ * These blobs (ACPI tables) are not loaded into guest RAM at reset. ++ * Instead the firmware will load them via fw_cfg and measure them ++ * itself. ++ */ ++ return; ++ } ++ ++ region = g_new0(RmeRamRegion, 1); ++ region->base = rom->addr; ++ region->size = rom->len; ++ ++ /* ++ * The Realm Initial Measurement (RIM) depends on the order in which we ++ * initialize and populate the RAM regions. To help a verifier ++ * independently calculate the RIM, sort regions by GPA. ++ */ ++ rme_guest->ram_regions = g_slist_insert_sorted(rme_guest->ram_regions, ++ region, ++ rme_compare_ram_regions); ++} ++ + int kvm_arm_rme_init(MachineState *ms) + { + static Error *rme_mig_blocker; +@@ -132,10 +248,21 @@ int kvm_arm_rme_init(MachineState *ms) + */ + qemu_add_vm_change_state_handler(rme_vm_state_change, NULL); + ++ rme_guest->rom_load_notifier.notify = rme_rom_load_notify; ++ rom_add_load_notifier(&rme_guest->rom_load_notifier); ++ + cgs->ready = true; + return 0; + } + ++void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) ++{ ++ if (rme_guest) { ++ rme_guest->init_ram.base = base; ++ rme_guest->init_ram.size = size; ++ } ++} ++ + int kvm_arm_rme_vcpu_init(CPUState *cs) + { + ARMCPU *cpu = ARM_CPU(cs); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index b6a07eb80f..78ff8b7375 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -428,6 +428,16 @@ int kvm_arm_rme_vm_type(MachineState *ms); + */ + int kvm_arm_rme_vcpu_init(CPUState *cs); + ++/* ++ * kvm_arm_rme_init_guest_ram ++ * @base: base address of RAM ++ * @size: size of RAM ++ * ++ * If the user requested a Realm, set the base and size of guest RAM, in order ++ * to initialize the Realm IPA space. ++ */ ++void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size); ++ + #else + + /* +@@ -454,6 +464,10 @@ static inline bool kvm_arm_steal_time_supported(void) + return false; + } + ++static inline void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) ++{ ++} ++ + /* + * These functions should never actually be called without KVM support. + */ +-- +2.33.0 + diff --git a/target-arm-kvm-rme-Initialize-realm.patch b/target-arm-kvm-rme-Initialize-realm.patch new file mode 100644 index 0000000000000000000000000000000000000000..703996e0dad07f68e4e9677b15b46b5644f99797 --- /dev/null +++ b/target-arm-kvm-rme-Initialize-realm.patch @@ -0,0 +1,272 @@ +From fa74508ed08091c350f431438f42a78b54896e3e Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 9 Jan 2023 10:45:27 +0000 +Subject: [PATCH] target/arm/kvm-rme: Initialize realm + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/017b4eea65b93578831312e9548f8b3c6479fc08 + +The machine code calls kvm_arm_rme_vm_type() to get the VM flag and KVM +calls kvm_arm_rme_init() to prepare for launching a Realm. Once VM +creation is complete, create the Realm: + +* Create the realm descriptor, +* load images into Realm RAM (in another patch), +* finalize the REC (vCPU) after the registers are reset, +* activate the realm, at which point the realm is sealed. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/kvm.c + target/arm/kvm_arm.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + target/arm/kvm-rme.c | 105 +++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm.c | 7 ++- + target/arm/kvm_arm.h | 53 ++++++++++++++++------ + 3 files changed, 150 insertions(+), 15 deletions(-) + +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +index 1de65f2b1d..3c6fecc741 100644 +--- a/target/arm/kvm-rme.c ++++ b/target/arm/kvm-rme.c +@@ -11,6 +11,7 @@ + #include "kvm_arm.h" + #include "migration/blocker.h" + #include "qapi/error.h" ++#include "qemu/error-report.h" + #include "qom/object_interfaces.h" + #include "exec/confidential-guest-support.h" + #include "sysemu/kvm.h" +@@ -27,14 +28,118 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, + CONFIDENTIAL_GUEST_SUPPORT, + { TYPE_USER_CREATABLE }, { }) + ++static RmeGuest *rme_guest; ++ ++static int rme_init_cpus(Error **errp) ++{ ++ int ret; ++ CPUState *cs; ++ ++ /* ++ * Now that do_cpu_reset() initialized the boot PC and ++ * kvm_cpu_synchronize_post_reset() registered it, we can finalize the REC. ++ */ ++ CPU_FOREACH(cs) { ++ ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC); ++ if (ret) { ++ error_setg_errno(errp, -ret, "failed to finalize vCPU"); ++ return ret; ++ } ++ } ++ return 0; ++} ++ ++static int rme_create_realm(Error **errp) ++{ ++ int ret; ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, ++ KVM_CAP_ARM_RME_CREATE_REALM); ++ if (ret) { ++ error_setg_errno(errp, -ret, "failed to create Realm Descriptor"); ++ return -1; ++ } ++ ++ if (rme_init_cpus(errp)) { ++ return -1; ++ } ++ ++ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, ++ KVM_CAP_ARM_RME_ACTIVATE_REALM); ++ if (ret) { ++ error_setg_errno(errp, -ret, "failed to activate realm"); ++ return -1; ++ } ++ ++ kvm_mark_guest_state_protected(); ++ return 0; ++} ++ ++static void rme_vm_state_change(void *opaque, bool running, RunState state) ++{ ++ Error *err = NULL; ++ ++ if (!running) { ++ return; ++ } ++ ++ if (rme_create_realm(&err)) { ++ error_propagate_prepend(&error_fatal, err, "RME: "); ++ } ++} ++ + static void rme_guest_class_init(ObjectClass *oc, void *data) + { + } + + static void rme_guest_init(Object *obj) + { ++ if (rme_guest) { ++ error_report("a single instance of RmeGuest is supported"); ++ exit(1); ++ } ++ rme_guest = RME_GUEST(obj); + } + + static void rme_guest_finalize(Object *obj) + { + } ++ ++int kvm_arm_rme_init(MachineState *ms) ++{ ++ static Error *rme_mig_blocker; ++ ConfidentialGuestSupport *cgs = ms->cgs; ++ ++ if (!rme_guest) { ++ return 0; ++ } ++ ++ if (!cgs) { ++ error_report("missing -machine confidential-guest-support parameter"); ++ return -EINVAL; ++ } ++ ++ if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) { ++ return -ENODEV; ++ } ++ ++ error_setg(&rme_mig_blocker, "RME: migration is not implemented"); ++ migrate_add_blocker(&rme_mig_blocker, &error_fatal); ++ ++ /* ++ * The realm activation is done last, when the VM starts, after all images ++ * have been loaded and all vcpus finalized. ++ */ ++ qemu_add_vm_change_state_handler(rme_vm_state_change, NULL); ++ ++ cgs->ready = true; ++ return 0; ++} ++ ++int kvm_arm_rme_vm_type(MachineState *ms) ++{ ++ if (rme_guest) { ++ return KVM_VM_TYPE_ARM_REALM; ++ } ++ return 0; ++} +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index e32a064f94..83462f3f62 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -349,7 +349,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + kvm_arm_init_debug(s); + kvm_update_ipiv_cap(s); + +- return 0; ++ ret = kvm_arm_rme_init(ms); ++ if (ret) { ++ error_report("Failed to enable RME: %s", strerror(-ret)); ++ } ++ ++ return ret; + } + + unsigned long kvm_arch_vcpu_id(CPUState *cpu) +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index a29d4548f4..f17de8855a 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -38,20 +38,6 @@ void kvm_arm_init_debug(KVMState *s); + */ + int kvm_arm_vcpu_init(CPUState *cs); + +-/** +- * kvm_arm_vcpu_finalize: +- * @cs: CPUState +- * @feature: feature to finalize +- * +- * Finalizes the configuration of the specified VCPU feature by +- * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring +- * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of +- * KVM's API documentation. +- * +- * Returns: 0 if success else < 0 error code +- */ +-int kvm_arm_vcpu_finalize(CPUState *cs, int feature); +- + /** + * kvm_arm_register_device: + * @mr: memory region for this device +@@ -285,6 +271,14 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + */ + void kvm_arm_add_vcpu_properties(Object *obj); + ++/** ++ * @cs: CPUState ++ * @feature: a KVM_ARM_VCPU_* feature ++ * ++ * Finalize the configuration of the given vcpu feature. ++ */ ++int kvm_arm_vcpu_finalize(CPUState *cs, int feature); ++ + /** + * kvm_arm_steal_time_finalize: + * @cpu: ARMCPU for which to finalize kvm-steal-time +@@ -408,6 +402,22 @@ bool kvm_arm_tmm_enabled(void); + */ + int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction); + ++/** ++ * kvm_arm_rme_init ++ * @ms: the machine state ++ * ++ * Prepare the machine to be a Realm, if the user enabled it. ++ */ ++int kvm_arm_rme_init(MachineState *ms); ++ ++/** ++ * kvm_arm_rme_vm_type ++ * @ms: the machine state ++ * ++ * Returns the Realm KVM VM type if the user requested a Realm, 0 otherwise. ++ */ ++int kvm_arm_rme_vm_type(MachineState *ms); ++ + #else + + /* +@@ -447,6 +457,11 @@ static inline void kvm_arm_add_vcpu_properties(Object *obj) + g_assert_not_reached(); + } + ++static inline int kvm_arm_vcpu_finalize(CPUState *cs, int feature) ++{ ++ g_assert_not_reached(); ++} ++ + static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) + { + g_assert_not_reached(); +@@ -512,6 +527,16 @@ static inline int tmm_get_kae_num(void) + { + g_assert_not_reached(); + } ++ ++static inline int kvm_arm_rme_init(MachineState *ms) ++{ ++ g_assert_not_reached(); ++} ++ ++static inline int kvm_arm_rme_vm_type(MachineState *ms) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +-- +2.33.0 + diff --git a/target-arm-kvm-rme-Initialize-vCPU.patch b/target-arm-kvm-rme-Initialize-vCPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..44f493f3da6fe192bbb7eb732a6b68cb503aeeb3 --- /dev/null +++ b/target-arm-kvm-rme-Initialize-vCPU.patch @@ -0,0 +1,191 @@ +From 7f5d4809907044fd11fa040210f62b520f16ba02 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 9 Jan 2023 10:55:32 +0000 +Subject: [PATCH] target/arm/kvm-rme: Initialize vCPU + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/0808c64a827150c4a8576e52101386df9c08c136 + +The target code calls kvm_arm_vcpu_init() to mark the vCPU as part of a +Realm. For a Realm vCPU, only x0-x7 can be set at runtime. Before boot, +the PC can also be set, and is ignored at runtime. KVM also accepts a +few system register changes during initial configuration, as returned by +KVM_GET_REG_LIST. + +Signed-off-by: Jean-Philippe Brucker +Conflicts: + target/arm/kvm.c + target/arm/kvm_arm.h +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + target/arm/cpu.h | 3 +++ + target/arm/kvm-rme.c | 11 +++++++++ + target/arm/kvm64.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm_arm.h | 16 +++++++++++++ + 4 files changed, 83 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a5ba7f2a26..12305effd4 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -976,6 +976,9 @@ struct ArchCPU { + bool kvm_sve_finalized; + #endif /* CONFIG_KVM */ + ++ /* Realm Management Extension */ ++ bool kvm_rme; ++ + /* Uniprocessor system with MP extensions */ + bool mp_is_up; + +diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c +index 3c6fecc741..b080552076 100644 +--- a/target/arm/kvm-rme.c ++++ b/target/arm/kvm-rme.c +@@ -136,6 +136,17 @@ int kvm_arm_rme_init(MachineState *ms) + return 0; + } + ++int kvm_arm_rme_vcpu_init(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ if (rme_guest) { ++ cpu->kvm_rme = true; ++ cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_REC); ++ } ++ return 0; ++} ++ + int kvm_arm_rme_vm_type(MachineState *ms) + { + if (rme_guest) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 20a357061c..d314927027 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -646,6 +646,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); + } + ++ ret = kvm_arm_rme_vcpu_init(cs); ++ if (ret) { ++ return ret; ++ } ++ + /* Do KVM_ARM_VCPU_INIT ioctl */ + ret = kvm_arm_vcpu_init(cs); + if (ret) { +@@ -838,6 +843,29 @@ static int kvm_arch_put_sve(CPUState *cs) + return 0; + } + ++static int kvm_arm_rme_put_core_regs(CPUState *cs) ++{ ++ int i, ret; ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; ++ ++ /* The RME ABI only allows us to set 8 GPRs and the PC */ ++ for (i = 0; i < 8; i++) { ++ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), ++ &env->xregs[i]); ++ if (ret) { ++ return ret; ++ } ++ } ++ ++ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); ++ if (ret) { ++ return ret; ++ } ++ ++ return 0; ++} ++ + static int kvm_arm_put_core_regs(CPUState *cs, int level) + { + uint64_t val; +@@ -848,6 +876,10 @@ static int kvm_arm_put_core_regs(CPUState *cs, int level) + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + ++ if (cpu->kvm_rme) { ++ return kvm_arm_rme_put_core_regs(cs); ++ } ++ + /* If we are in AArch32 mode then we need to copy the AArch32 regs to the + * AArch64 registers before pushing them out to 64-bit KVM. + */ +@@ -1037,6 +1069,23 @@ static int kvm_arch_get_sve(CPUState *cs) + return 0; + } + ++static int kvm_arm_rme_get_core_regs(CPUState *cs) ++{ ++ int i, ret; ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; ++ ++ for (i = 0; i < 8; i++) { ++ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), ++ &env->xregs[i]); ++ if (ret) { ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ + static int kvm_arm_get_core_regs(CPUState *cs) + { + uint64_t val; +@@ -1047,6 +1096,10 @@ static int kvm_arm_get_core_regs(CPUState *cs) + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + ++ if (cpu->kvm_rme) { ++ return kvm_arm_rme_get_core_regs(cs); ++ } ++ + for (i = 0; i < 31; i++) { + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index f17de8855a..b6a07eb80f 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -418,6 +418,16 @@ int kvm_arm_rme_init(MachineState *ms); + */ + int kvm_arm_rme_vm_type(MachineState *ms); + ++/** ++ * kvm_arm_rme_vcpu_init ++ * @cs: the CPU ++ * ++ * If the user requested a Realm, setup the given vCPU accordingly. Realm vCPUs ++ * behave a little differently, for example most of their register state is ++ * hidden from the host. ++ */ ++int kvm_arm_rme_vcpu_init(CPUState *cs); ++ + #else + + /* +@@ -537,6 +547,12 @@ static inline int kvm_arm_rme_vm_type(MachineState *ms) + { + g_assert_not_reached(); + } ++ ++static inline int kvm_arm_rme_vcpu_init(CPUState *cs) ++{ ++ g_assert_not_reached(); ++} ++ + #endif + + /** +-- +2.33.0 + diff --git a/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch b/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b4bfbb2d6aa800e60bee7e96d429be813c6271c --- /dev/null +++ b/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch @@ -0,0 +1,407 @@ +From 9c4a7c44c3c9e89c6aeab85b00c72a09a0c13940 Mon Sep 17 00:00:00 2001 +From: Author Salil Mehta +Date: Sat, 27 May 2023 22:13:13 +0200 +Subject: [PATCH] target/arm/kvm,tcg: Register/Handle SMCCC hypercall exits to + VMM/Qemu + +Add registration and Handling of HVC/SMC hypercall exits to VMM + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Jean-Philippe Brucker +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Salil Mehta +--- + target/arm/arm-powerctl.c | 51 +++++++++++++++++++++++++++++------- + target/arm/helper.c | 2 +- + target/arm/internals.h | 11 -------- + target/arm/kvm.c | 52 +++++++++++++++++++++++++++++++++++++ + target/arm/kvm64.c | 46 +++++++++++++++++++++++++++++--- + target/arm/kvm_arm.h | 13 ++++++++++ + target/arm/meson.build | 1 + + target/arm/{tcg => }/psci.c | 8 ++++++ + target/arm/tcg/meson.build | 4 --- + 9 files changed, 160 insertions(+), 28 deletions(-) + rename target/arm/{tcg => }/psci.c (97%) + +diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c +index c078849403..fb19b04189 100644 +--- a/target/arm/arm-powerctl.c ++++ b/target/arm/arm-powerctl.c +@@ -16,6 +16,7 @@ + #include "qemu/log.h" + #include "qemu/main-loop.h" + #include "sysemu/tcg.h" ++#include "hw/boards.h" + + #ifndef DEBUG_ARM_POWERCTL + #define DEBUG_ARM_POWERCTL 0 +@@ -28,18 +29,37 @@ + } \ + } while (0) + ++static CPUArchId *arm_get_archid_by_id(uint64_t id) ++{ ++ int n; ++ CPUArchId *arch_id; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ++ /* ++ * At this point disabled CPUs don't have a CPUState, but their CPUArchId ++ * exists. ++ * ++ * TODO: Is arch_id == mp_affinity? This needs work. ++ */ ++ for (n = 0; n < ms->possible_cpus->len; n++) { ++ arch_id = &ms->possible_cpus->cpus[n]; ++ ++ if (arch_id->arch_id == id) { ++ return arch_id; ++ } ++ } ++ return NULL; ++} ++ + CPUState *arm_get_cpu_by_id(uint64_t id) + { +- CPUState *cpu; ++ CPUArchId *arch_id; + + DPRINTF("cpu %" PRId64 "\n", id); + +- CPU_FOREACH(cpu) { +- ARMCPU *armcpu = ARM_CPU(cpu); +- +- if (armcpu->mp_affinity == id) { +- return cpu; +- } ++ arch_id = arm_get_archid_by_id(id); ++ if (arch_id && arch_id->cpu) { ++ return CPU(arch_id->cpu); + } + + qemu_log_mask(LOG_GUEST_ERROR, +@@ -97,6 +117,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + { + CPUState *target_cpu_state; + ARMCPU *target_cpu; ++ CPUArchId *arch_id; + struct CpuOnInfo *info; + + assert(qemu_mutex_iothread_locked()); +@@ -117,12 +138,24 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + } + + /* Retrieve the cpu we are powering up */ +- target_cpu_state = arm_get_cpu_by_id(cpuid); +- if (!target_cpu_state) { ++ arch_id = arm_get_archid_by_id(cpuid); ++ if (!arch_id) { + /* The cpu was not found */ + return QEMU_ARM_POWERCTL_INVALID_PARAM; + } + ++ target_cpu_state = CPU(arch_id->cpu); ++ if (!qemu_enabled_cpu(target_cpu_state)) { ++ /* ++ * The cpu is not plugged in or disabled. We should return appropriate ++ * value as introduced in DEN0022E PSCI 1.2 issue E ++ */ ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "[ARM]%s: Denying attempt to online removed/disabled " ++ "CPU%" PRId64"\n", __func__, cpuid); ++ return QEMU_ARM_POWERCTL_IS_OFF; ++ } ++ + target_cpu = ARM_CPU(target_cpu_state); + if (target_cpu->power_state == PSCI_ON) { + qemu_log_mask(LOG_GUEST_ERROR, +diff --git a/target/arm/helper.c b/target/arm/helper.c +index e47498828c..793aa89cc6 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -11346,7 +11346,7 @@ void arm_cpu_do_interrupt(CPUState *cs) + env->exception.syndrome); + } + +- if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) { ++ if (arm_is_psci_call(cpu, cs->exception_index)) { + arm_handle_psci_call(cpu); + qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); + return; +diff --git a/target/arm/internals.h b/target/arm/internals.h +index c3a7682f05..20b9c1da38 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -314,21 +314,10 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len); + /* Callback function for when a watchpoint or breakpoint triggers. */ + void arm_debug_excp_handler(CPUState *cs); + +-#if defined(CONFIG_USER_ONLY) || !defined(CONFIG_TCG) +-static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) +-{ +- return false; +-} +-static inline void arm_handle_psci_call(ARMCPU *cpu) +-{ +- g_assert_not_reached(); +-} +-#else + /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ + bool arm_is_psci_call(ARMCPU *cpu, int excp_type); + /* Actually handle a PSCI call */ + void arm_handle_psci_call(ARMCPU *cpu); +-#endif + + /** + * arm_clear_exclusive: clear the exclusive monitor +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index aca652621f..66caf9e5e7 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -260,6 +260,7 @@ int kvm_arch_get_default_type(MachineState *ms) + int kvm_arch_init(MachineState *ms, KVMState *s) + { + int ret = 0; ++ + /* For ARM interrupt delivery is always asynchronous, + * whether we are using an in-kernel VGIC or not. + */ +@@ -310,6 +311,22 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + } + } + ++ /* ++ * To be able to handle PSCI CPU ON calls in QEMU, we need to install SMCCC ++ * filter in the Host KVM. This is required to support features like ++ * virtual CPU Hotplug on ARM platforms. ++ */ ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU On PSCI-to-user-space fwd filter install failed"); ++ abort(); ++ } ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU Off PSCI-to-user-space fwd filter install failed"); ++ abort(); ++ } ++ + kvm_arm_init_debug(s); + + return ret; +@@ -966,6 +983,38 @@ static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, + return -1; + } + ++static int kvm_arm_handle_hypercall(CPUState *cs, struct kvm_run *run) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; ++ ++ kvm_cpu_synchronize_state(cs); ++ ++ /* ++ * hard coding immediate to 0 as we dont expect non-zero value as of now ++ * This might change in future versions. Hence, KVM_GET_ONE_REG could be ++ * used in such cases but it must be enhanced then only synchronize will ++ * also fetch ESR_EL2 value. ++ */ ++ if (run->hypercall.flags == KVM_HYPERCALL_EXIT_SMC) { ++ cs->exception_index = EXCP_SMC; ++ env->exception.syndrome = syn_aa64_smc(0); ++ } else { ++ cs->exception_index = EXCP_HVC; ++ env->exception.syndrome = syn_aa64_hvc(0); ++ } ++ env->exception.target_el = 1; ++ qemu_mutex_lock_iothread(); ++ arm_cpu_do_interrupt(cs); ++ qemu_mutex_unlock_iothread(); ++ ++ /* ++ * For PSCI, exit the kvm_run loop and process the work. Especially ++ * important if this was a CPU_OFF command and we can't return to the guest. ++ */ ++ return EXCP_INTERRUPT; ++} ++ + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +@@ -981,6 +1030,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_arm_handle_dabt_nisv(cs, run->arm_nisv.esr_iss, + run->arm_nisv.fault_ipa); + break; ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_arm_handle_hypercall(cs, run); ++ break; + default: + qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", + __func__, run->exit_reason); +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 9c3a35d63a..00b257bb4b 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -111,6 +111,25 @@ bool kvm_arm_hw_debug_active(CPUState *cs) + return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); + } + ++static bool kvm_arm_set_vm_attr(struct kvm_device_attr *attr, const char *name) ++{ ++ int err; ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ return true; ++} ++ + static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, + const char *name) + { +@@ -181,6 +200,28 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) + } + } + ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ struct kvm_smccc_filter filter = { ++ .base = func, ++ .nr_functions = 1, ++ .action = faction, ++ }; ++ struct kvm_device_attr attr = { ++ .group = KVM_ARM_VM_SMCCC_CTRL, ++ .attr = KVM_ARM_VM_SMCCC_FILTER, ++ .flags = 0, ++ .addr = (uintptr_t)&filter, ++ }; ++ ++ if (!kvm_arm_set_vm_attr(&attr, "SMCCC Filter")) { ++ error_report("failed to set SMCCC filter in KVM Host"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) + { + uint64_t ret; +@@ -629,9 +670,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + /* +- * When KVM is in use, PSCI is emulated in-kernel and not by qemu. +- * Currently KVM has its own idea about MPIDR assignment, so we +- * override our defaults with what we get from KVM. ++ * KVM may emulate PSCI in-kernel. Currently KVM has its own idea about ++ * MPIDR assignment, so we override our defaults with what we get from KVM. + */ + ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); + if (ret) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 31408499b3..bf4df54c96 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -388,6 +388,15 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa); + + int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + ++/** ++ * kvm_arm_set_smccc_filter ++ * @func: funcion ++ * @faction: SMCCC filter action(handle, deny, fwd-to-user) to be deployed ++ * ++ * Sets the ARMs SMC-CC filter in KVM Host for selective hypercall exits ++ */ ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction); ++ + #else + + /* +@@ -462,6 +471,10 @@ static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs) + g_assert_not_reached(); + } + ++static inline int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +diff --git a/target/arm/meson.build b/target/arm/meson.build +index 5d04a8e94f..d1dd4932ed 100644 +--- a/target/arm/meson.build ++++ b/target/arm/meson.build +@@ -23,6 +23,7 @@ arm_system_ss.add(files( + 'arm-qmp-cmds.c', + 'cortex-regs.c', + 'machine.c', ++ 'psci.c', + 'ptw.c', + )) + +diff --git a/target/arm/tcg/psci.c b/target/arm/psci.c +similarity index 97% +rename from target/arm/tcg/psci.c +rename to target/arm/psci.c +index 6c1239bb96..a8690a16af 100644 +--- a/target/arm/tcg/psci.c ++++ b/target/arm/psci.c +@@ -21,7 +21,9 @@ + #include "exec/helper-proto.h" + #include "kvm-consts.h" + #include "qemu/main-loop.h" ++#include "qemu/error-report.h" + #include "sysemu/runstate.h" ++#include "sysemu/tcg.h" + #include "internals.h" + #include "arm-powerctl.h" + +@@ -157,6 +159,11 @@ void arm_handle_psci_call(ARMCPU *cpu) + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: ++ if (!tcg_enabled()) { ++ warn_report("CPU suspend not supported in non-tcg mode"); ++ break; ++ } ++#ifdef CONFIG_TCG + /* Affinity levels are not supported in QEMU */ + if (param[1] & 0xfffe0000) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; +@@ -169,6 +176,7 @@ void arm_handle_psci_call(ARMCPU *cpu) + env->regs[0] = 0; + } + helper_wfi(env, 4); ++#endif + break; + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + switch (param[1]) { +diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build +index 6fca38f2cc..ad3cfcb3bd 100644 +--- a/target/arm/tcg/meson.build ++++ b/target/arm/tcg/meson.build +@@ -51,7 +51,3 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files( + 'sme_helper.c', + 'sve_helper.c', + )) +- +-arm_system_ss.add(files( +- 'psci.c', +-)) +-- +2.27.0 + diff --git a/target-arm-kvm-trivial-Clean-up-header-documentation.patch b/target-arm-kvm-trivial-Clean-up-header-documentation.patch deleted file mode 100644 index 8c28c63b1e9fa89ace5860f635f07e2d9b221bbe..0000000000000000000000000000000000000000 --- a/target-arm-kvm-trivial-Clean-up-header-documentation.patch +++ /dev/null @@ -1,144 +0,0 @@ -From c057499f90af4be8b26f57f8755aca0ddfcf9467 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Tue, 21 Apr 2020 16:52:07 +0800 -Subject: [PATCH 1/4] target/arm/kvm: trivial: Clean up header documentation - -Signed-off-by: Andrew Jones -Message-id: 20200120101023.16030-2-drjones@redhat.com -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell ---- - target/arm/kvm_arm.h | 38 +++++++++++++++++++++++--------------- - 1 file changed, 23 insertions(+), 15 deletions(-) - -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index a9f3ccab..32d97ce5 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -61,8 +61,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, - int kvm_arm_init_cpreg_list(ARMCPU *cpu); - - /** -- * kvm_arm_reg_syncs_via_cpreg_list -- * regidx: KVM register index -+ * kvm_arm_reg_syncs_via_cpreg_list: -+ * @regidx: KVM register index - * - * Return true if this KVM register should be synchronized via the - * cpreg list of arbitrary system registers, false if it is synchronized -@@ -71,8 +71,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); - bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); - - /** -- * kvm_arm_cpreg_level -- * regidx: KVM register index -+ * kvm_arm_cpreg_level: -+ * @regidx: KVM register index - * - * Return the level of this coprocessor/system register. Return value is - * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. -@@ -134,6 +134,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); - * @cpu: ARMCPU - * - * Get VCPU related state from kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_get_vcpu_events(ARMCPU *cpu); - -@@ -142,6 +144,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); - * @cpu: ARMCPU - * - * Put VCPU related state to kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_put_vcpu_events(ARMCPU *cpu); - -@@ -191,10 +195,12 @@ typedef struct ARMHostCPUFeatures { - - /** - * kvm_arm_get_host_cpu_features: -- * @ahcc: ARMHostCPUClass to fill in -+ * @ahcf: ARMHostCPUClass to fill in - * - * Probe the capabilities of the host kernel's preferred CPU and fill - * in the ARMHostCPUClass struct accordingly. -+ * -+ * Returns true on success and false otherwise. - */ - bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); - -@@ -208,26 +214,30 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); - void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - - /** -- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the -- * IPA address space supported by KVM -- * -+ * kvm_arm_get_max_vm_ipa_size: - * @ms: Machine state handle -+ * -+ * Returns the number of bits in the IPA address space supported by KVM - */ - int kvm_arm_get_max_vm_ipa_size(MachineState *ms); - - /** -- * kvm_arm_sync_mpstate_to_kvm -+ * kvm_arm_sync_mpstate_to_kvm: - * @cpu: ARMCPU - * - * If supported set the KVM MP_STATE based on QEMU's model. -+ * -+ * Returns 0 on success and -1 on failure. - */ - int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - - /** -- * kvm_arm_sync_mpstate_to_qemu -+ * kvm_arm_sync_mpstate_to_qemu: - * @cpu: ARMCPU - * - * If supported get the MP_STATE from KVM and store in QEMU's model. -+ * -+ * Returns 0 on success and aborts on failure. - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -@@ -241,7 +251,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); - - static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - { -- /* This should never actually be called in the "not KVM" case, -+ /* -+ * This should never actually be called in the "not KVM" case, - * but set up the fields to indicate an error anyway. - */ - cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; -@@ -310,23 +321,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); - * - * Return: TRUE if any hardware breakpoints in use. - */ -- - bool kvm_arm_hw_debug_active(CPUState *cs); - - /** - * kvm_arm_copy_hw_debug_data: -- * - * @ptr: kvm_guest_debug_arch structure - * - * Copy the architecture specific debug registers into the - * kvm_guest_debug ioctl structure. - */ - struct kvm_guest_debug_arch; -- - void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); - - /** -- * its_class_name -+ * its_class_name: - * - * Return the ITS class name to use depending on whether KVM acceleration - * and KVM CAP_SIGNAL_MSI are supported --- -2.23.0 diff --git a/target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/target-arm-kvm64-kvm64-cpus-have-timer-registers.patch deleted file mode 100644 index b8cec1bd36e2da9526a643229252ac6760eebecf..0000000000000000000000000000000000000000 --- a/target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 07bd62920f968da7d1d8962cc7fd3d29652d25f4 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Tue, 21 Apr 2020 17:04:13 +0800 -Subject: [PATCH 2/4] target/arm/kvm64: kvm64 cpus have timer registers - -Add the missing GENERIC_TIMER feature to kvm64 cpus. - -We don't currently use these registers when KVM is enabled, but it's -probably best we add the feature flag for consistency and potential -future use. There's also precedent, as we add the PMU feature flag to -KVM enabled guests, even though we don't use those registers either. - -This change was originally posted as a hunk of a different, never -merged patch from Bijan Mottahedeh. - -Signed-off-by: Andrew Jones -Reviewed-by: Richard Henderson -Message-id: 20200120101023.16030-4-drjones@redhat.com -Signed-off-by: Peter Maydell ---- - target/arm/kvm64.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 22d19c9a..f2f0a92e 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -587,6 +587,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - set_feature(&features, ARM_FEATURE_NEON); - set_feature(&features, ARM_FEATURE_AARCH64); - set_feature(&features, ARM_FEATURE_PMU); -+ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); - - ahcf->features = features; - --- -2.23.0 diff --git a/target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch b/target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch deleted file mode 100644 index 30f14bafcc5b70310e462e9b4f5ca5cb91708cef..0000000000000000000000000000000000000000 --- a/target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch +++ /dev/null @@ -1,373 +0,0 @@ -From c527fa45dd0bb03c7f35b79ff53f127297f96314 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Thu, 31 Oct 2019 15:27:26 +0100 -Subject: [PATCH 12/13] target/arm/monitor: Introduce - qmp_query_cpu_model_expansion - -Add support for the query-cpu-model-expansion QMP command to Arm. We -do this selectively, only exposing CPU properties which represent -optional CPU features which the user may want to enable/disable. -Additionally we restrict the list of queryable cpu models to 'max', -'host', or the current type when KVM is in use. And, finally, we only -implement expansion type 'full', as Arm does not yet have a "base" -CPU type. More details and example queries are described in a new -document (docs/arm-cpu-features.rst). - -Note, certainly more features may be added to the list of advertised -features, e.g. 'vfp' and 'neon'. The only requirement is that we can -detect invalid configurations and emit failures at QMP query time. -For 'vfp' and 'neon' this will require some refactoring to share a -validation function between the QMP query and the CPU realize -functions. - -Signed-off-by: Andrew Jones -Reviewed-by: Richard Henderson -Reviewed-by: Eric Auger -Reviewed-by: Beata Michalska -Message-id: 20191031142734.8590-2-drjones@redhat.com -Signed-off-by: Peter Maydell ---- - docs/arm-cpu-features.rst | 137 +++++++++++++++++++++++++++++++++++ - qapi/machine-target.json | 6 +- - target/arm/monitor.c | 145 ++++++++++++++++++++++++++++++++++++++ - 3 files changed, 285 insertions(+), 3 deletions(-) - create mode 100644 docs/arm-cpu-features.rst - -diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst -new file mode 100644 -index 00000000..c79dcffb ---- /dev/null -+++ b/docs/arm-cpu-features.rst -@@ -0,0 +1,137 @@ -+================ -+ARM CPU Features -+================ -+ -+Examples of probing and using ARM CPU features -+ -+Introduction -+============ -+ -+CPU features are optional features that a CPU of supporting type may -+choose to implement or not. In QEMU, optional CPU features have -+corresponding boolean CPU proprieties that, when enabled, indicate -+that the feature is implemented, and, conversely, when disabled, -+indicate that it is not implemented. An example of an ARM CPU feature -+is the Performance Monitoring Unit (PMU). CPU types such as the -+Cortex-A15 and the Cortex-A57, which respectively implement ARM -+architecture reference manuals ARMv7-A and ARMv8-A, may both optionally -+implement PMUs. For example, if a user wants to use a Cortex-A15 without -+a PMU, then the `-cpu` parameter should contain `pmu=off` on the QEMU -+command line, i.e. `-cpu cortex-a15,pmu=off`. -+ -+As not all CPU types support all optional CPU features, then whether or -+not a CPU property exists depends on the CPU type. For example, CPUs -+that implement the ARMv8-A architecture reference manual may optionally -+support the AArch32 CPU feature, which may be enabled by disabling the -+`aarch64` CPU property. A CPU type such as the Cortex-A15, which does -+not implement ARMv8-A, will not have the `aarch64` CPU property. -+ -+QEMU's support may be limited for some CPU features, only partially -+supporting the feature or only supporting the feature under certain -+configurations. For example, the `aarch64` CPU feature, which, when -+disabled, enables the optional AArch32 CPU feature, is only supported -+when using the KVM accelerator and when running on a host CPU type that -+supports the feature. -+ -+CPU Feature Probing -+=================== -+ -+Determining which CPU features are available and functional for a given -+CPU type is possible with the `query-cpu-model-expansion` QMP command. -+Below are some examples where `scripts/qmp/qmp-shell` (see the top comment -+block in the script for usage) is used to issue the QMP commands. -+ -+(1) Determine which CPU features are available for the `max` CPU type -+ (Note, we started QEMU with qemu-system-aarch64, so `max` is -+ implementing the ARMv8-A reference manual in this case):: -+ -+ (QEMU) query-cpu-model-expansion type=full model={"name":"max"} -+ { "return": { -+ "model": { "name": "max", "props": { -+ "pmu": true, "aarch64": true -+ }}}} -+ -+We see that the `max` CPU type has the `pmu` and `aarch64` CPU features. -+We also see that the CPU features are enabled, as they are all `true`. -+ -+(2) Let's try to disable the PMU:: -+ -+ (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"pmu":false}} -+ { "return": { -+ "model": { "name": "max", "props": { -+ "pmu": false, "aarch64": true -+ }}}} -+ -+We see it worked, as `pmu` is now `false`. -+ -+(3) Let's try to disable `aarch64`, which enables the AArch32 CPU feature:: -+ -+ (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"aarch64":false}} -+ {"error": { -+ "class": "GenericError", "desc": -+ "'aarch64' feature cannot be disabled unless KVM is enabled and 32-bit EL1 is supported" -+ }} -+ -+It looks like this feature is limited to a configuration we do not -+currently have. -+ -+(4) Let's try probing CPU features for the Cortex-A15 CPU type:: -+ -+ (QEMU) query-cpu-model-expansion type=full model={"name":"cortex-a15"} -+ {"return": {"model": {"name": "cortex-a15", "props": {"pmu": true}}}} -+ -+Only the `pmu` CPU feature is available. -+ -+A note about CPU feature dependencies -+------------------------------------- -+ -+It's possible for features to have dependencies on other features. I.e. -+it may be possible to change one feature at a time without error, but -+when attempting to change all features at once an error could occur -+depending on the order they are processed. It's also possible changing -+all at once doesn't generate an error, because a feature's dependencies -+are satisfied with other features, but the same feature cannot be changed -+independently without error. For these reasons callers should always -+attempt to make their desired changes all at once in order to ensure the -+collection is valid. -+ -+A note about CPU models and KVM -+------------------------------- -+ -+Named CPU models generally do not work with KVM. There are a few cases -+that do work, e.g. using the named CPU model `cortex-a57` with KVM on a -+seattle host, but mostly if KVM is enabled the `host` CPU type must be -+used. This means the guest is provided all the same CPU features as the -+host CPU type has. And, for this reason, the `host` CPU type should -+enable all CPU features that the host has by default. Indeed it's even -+a bit strange to allow disabling CPU features that the host has when using -+the `host` CPU type, but in the absence of CPU models it's the best we can -+do if we want to launch guests without all the host's CPU features enabled. -+ -+Enabling KVM also affects the `query-cpu-model-expansion` QMP command. The -+affect is not only limited to specific features, as pointed out in example -+(3) of "CPU Feature Probing", but also to which CPU types may be expanded. -+When KVM is enabled, only the `max`, `host`, and current CPU type may be -+expanded. This restriction is necessary as it's not possible to know all -+CPU types that may work with KVM, but it does impose a small risk of users -+experiencing unexpected errors. For example on a seattle, as mentioned -+above, the `cortex-a57` CPU type is also valid when KVM is enabled. -+Therefore a user could use the `host` CPU type for the current type, but -+then attempt to query `cortex-a57`, however that query will fail with our -+restrictions. This shouldn't be an issue though as management layers and -+users have been preferring the `host` CPU type for use with KVM for quite -+some time. Additionally, if the KVM-enabled QEMU instance running on a -+seattle host is using the `cortex-a57` CPU type, then querying `cortex-a57` -+will work. -+ -+Using CPU Features -+================== -+ -+After determining which CPU features are available and supported for a -+given CPU type, then they may be selectively enabled or disabled on the -+QEMU command line with that CPU type:: -+ -+ $ qemu-system-aarch64 -M virt -cpu max,pmu=off -+ -+The example above disables the PMU for the `max` CPU type. -+ -diff --git a/qapi/machine-target.json b/qapi/machine-target.json -index 55310a6a..04623224 100644 ---- a/qapi/machine-target.json -+++ b/qapi/machine-target.json -@@ -212,7 +212,7 @@ - ## - { 'struct': 'CpuModelExpansionInfo', - 'data': { 'model': 'CpuModelInfo' }, -- 'if': 'defined(TARGET_S390X) || defined(TARGET_I386)' } -+ 'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' } - - ## - # @query-cpu-model-expansion: -@@ -237,7 +237,7 @@ - # query-cpu-model-expansion while using these is not advised. - # - # Some architectures may not support all expansion types. s390x supports --# "full" and "static". -+# "full" and "static". Arm only supports "full". - # - # Returns: a CpuModelExpansionInfo. Returns an error if expanding CPU models is - # not supported, if the model cannot be expanded, if the model contains -@@ -251,7 +251,7 @@ - 'data': { 'type': 'CpuModelExpansionType', - 'model': 'CpuModelInfo' }, - 'returns': 'CpuModelExpansionInfo', -- 'if': 'defined(TARGET_S390X) || defined(TARGET_I386)' } -+ 'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' } - - ## - # @CpuDefinitionInfo: -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index 6ec6dd04..560970de 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -23,7 +23,14 @@ - #include "qemu/osdep.h" - #include "hw/boards.h" - #include "kvm_arm.h" -+#include "qapi/error.h" -+#include "qapi/visitor.h" -+#include "qapi/qobject-input-visitor.h" -+#include "qapi/qapi-commands-machine-target.h" - #include "qapi/qapi-commands-misc-target.h" -+#include "qapi/qmp/qerror.h" -+#include "qapi/qmp/qdict.h" -+#include "qom/qom-qobject.h" - - static GICCapability *gic_cap_new(int version) - { -@@ -82,3 +89,141 @@ GICCapabilityList *qmp_query_gic_capabilities(Error **errp) - - return head; - } -+ -+/* -+ * These are cpu model features we want to advertise. The order here -+ * matters as this is the order in which qmp_query_cpu_model_expansion -+ * will attempt to set them. If there are dependencies between features, -+ * then the order that considers those dependencies must be used. -+ */ -+static const char *cpu_model_advertised_features[] = { -+ "aarch64", "pmu", -+ NULL -+}; -+ -+CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, -+ CpuModelInfo *model, -+ Error **errp) -+{ -+ CpuModelExpansionInfo *expansion_info; -+ const QDict *qdict_in = NULL; -+ QDict *qdict_out; -+ ObjectClass *oc; -+ Object *obj; -+ const char *name; -+ int i; -+ -+ if (type != CPU_MODEL_EXPANSION_TYPE_FULL) { -+ error_setg(errp, "The requested expansion type is not supported"); -+ return NULL; -+ } -+ -+ if (!kvm_enabled() && !strcmp(model->name, "host")) { -+ error_setg(errp, "The CPU type '%s' requires KVM", model->name); -+ return NULL; -+ } -+ -+ oc = cpu_class_by_name(TYPE_ARM_CPU, model->name); -+ if (!oc) { -+ error_setg(errp, "The CPU type '%s' is not a recognized ARM CPU type", -+ model->name); -+ return NULL; -+ } -+ -+ if (kvm_enabled()) { -+ const char *cpu_type = current_machine->cpu_type; -+ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); -+ bool supported = false; -+ -+ if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { -+ /* These are kvmarm's recommended cpu types */ -+ supported = true; -+ } else if (strlen(model->name) == len && -+ !strncmp(model->name, cpu_type, len)) { -+ /* KVM is enabled and we're using this type, so it works. */ -+ supported = true; -+ } -+ if (!supported) { -+ error_setg(errp, "We cannot guarantee the CPU type '%s' works " -+ "with KVM on this host", model->name); -+ return NULL; -+ } -+ } -+ -+ if (model->props) { -+ qdict_in = qobject_to(QDict, model->props); -+ if (!qdict_in) { -+ error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict"); -+ return NULL; -+ } -+ } -+ -+ obj = object_new(object_class_get_name(oc)); -+ -+ if (qdict_in) { -+ Visitor *visitor; -+ Error *err = NULL; -+ -+ visitor = qobject_input_visitor_new(model->props); -+ visit_start_struct(visitor, NULL, NULL, 0, &err); -+ if (err) { -+ visit_free(visitor); -+ object_unref(obj); -+ error_propagate(errp, err); -+ return NULL; -+ } -+ -+ i = 0; -+ while ((name = cpu_model_advertised_features[i++]) != NULL) { -+ if (qdict_get(qdict_in, name)) { -+ object_property_set(obj, visitor, name, &err); -+ if (err) { -+ break; -+ } -+ } -+ } -+ -+ if (!err) { -+ visit_check_struct(visitor, &err); -+ } -+ visit_end_struct(visitor, NULL); -+ visit_free(visitor); -+ if (err) { -+ object_unref(obj); -+ error_propagate(errp, err); -+ return NULL; -+ } -+ } -+ -+ expansion_info = g_new0(CpuModelExpansionInfo, 1); -+ expansion_info->model = g_malloc0(sizeof(*expansion_info->model)); -+ expansion_info->model->name = g_strdup(model->name); -+ -+ qdict_out = qdict_new(); -+ -+ i = 0; -+ while ((name = cpu_model_advertised_features[i++]) != NULL) { -+ ObjectProperty *prop = object_property_find(obj, name, NULL); -+ if (prop) { -+ Error *err = NULL; -+ QObject *value; -+ -+ assert(prop->get); -+ value = object_property_get_qobject(obj, name, &err); -+ assert(!err); -+ -+ qdict_put_obj(qdict_out, name, value); -+ } -+ } -+ -+ if (!qdict_size(qdict_out)) { -+ qobject_unref(qdict_out); -+ } else { -+ expansion_info->model->props = QOBJECT(qdict_out); -+ expansion_info->model->has_props = true; -+ } -+ -+ object_unref(obj); -+ -+ return expansion_info; -+} --- -2.25.1 - diff --git a/target-arm-monitor-query-cpu-model-expansion-crashed.patch b/target-arm-monitor-query-cpu-model-expansion-crashed.patch deleted file mode 100644 index 60973a7c2233a8e57f23d89c69a0c3a972835e8b..0000000000000000000000000000000000000000 --- a/target-arm-monitor-query-cpu-model-expansion-crashed.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 5d75b922480f3fbefe83b5bb5e241e56a16e1e3e Mon Sep 17 00:00:00 2001 -From: Liang Yan -Date: Fri, 7 Feb 2020 14:04:21 +0000 -Subject: [PATCH 13/13] target/arm/monitor: query-cpu-model-expansion crashed - qemu when using machine type none - -Commit e19afd566781 mentioned that target-arm only supports queryable -cpu models 'max', 'host', and the current type when KVM is in use. -The logic works well until using machine type none. - -For machine type none, cpu_type will be null if cpu option is not -set by command line, strlen(cpu_type) will terminate process. -So We add a check above it. - -This won't affect i386 and s390x since they do not use current_cpu. - -Signed-off-by: Liang Yan -Message-id: 20200203134251.12986-1-lyan@suse.com -Reviewed-by: Andrew Jones -Tested-by: Andrew Jones -Signed-off-by: Peter Maydell ---- - target/arm/monitor.c | 15 +++++++++------ - 1 file changed, 9 insertions(+), 6 deletions(-) - -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index 560970de..e2b1d117 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -131,17 +131,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - } - - if (kvm_enabled()) { -- const char *cpu_type = current_machine->cpu_type; -- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); - bool supported = false; - - if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { - /* These are kvmarm's recommended cpu types */ - supported = true; -- } else if (strlen(model->name) == len && -- !strncmp(model->name, cpu_type, len)) { -- /* KVM is enabled and we're using this type, so it works. */ -- supported = true; -+ } else if (current_machine->cpu_type) { -+ const char *cpu_type = current_machine->cpu_type; -+ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); -+ -+ if (strlen(model->name) == len && -+ !strncmp(model->name, cpu_type, len)) { -+ /* KVM is enabled and we're using this type, so it works. */ -+ supported = true; -+ } - } - if (!supported) { - error_setg(errp, "We cannot guarantee the CPU type '%s' works " --- -2.25.1 - diff --git a/target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch b/target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch deleted file mode 100644 index 91702dbe0cbcc5fff7a113ff2b75a90026521500..0000000000000000000000000000000000000000 --- a/target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 88e3146118230de8b99280db219a6a6c47bebce1 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Wed, 16 Sep 2020 19:40:28 +0800 -Subject: [PATCH] target/arm: only set ID_PFR1_EL1.GIC for AArch32 guest - -Some AArch64 CPU doesn't support AArch32 mode, and the values of AArch32 -registers are all 0. Hence, We'd better not to modify AArch32 registers -in AArch64 mode. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/helper.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 97b6b86197..b262f5d6c5 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -5672,7 +5672,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) - ARMCPU *cpu = env_archcpu(env); - uint64_t pfr1 = cpu->id_pfr1; - -- if (env->gicv3state) { -+ if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && env->gicv3state) { - pfr1 |= 1 << 28; - } - return pfr1; --- -2.23.0 - diff --git a/target-arm-parse-cpu-feature-related-options.patch b/target-arm-parse-cpu-feature-related-options.patch deleted file mode 100644 index 066e231af6266d21eecaf5d0c519b8a6aa4069d6..0000000000000000000000000000000000000000 --- a/target-arm-parse-cpu-feature-related-options.patch +++ /dev/null @@ -1,124 +0,0 @@ -From dca1df05ce3d6b17d03203fc6fd94e23548216c7 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Thu, 6 Aug 2020 16:14:35 +0800 -Subject: [PATCH 2/9] target/arm: parse cpu feature related options - -The implementation of CPUClass::parse_features only supports CPU -features in "feature=value" format. However, libvirt maybe send us a -CPU feature string in "+feature/-feature" format. Hence, we need to -override CPUClass::parse_features to support CPU feature string in both -"feature=value" and "+feature/-feature" format. - -The logic of AArch64CPUClass::parse_features is similar to that of -X86CPUClass::parse_features. - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/cpu64.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 83 insertions(+) - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index fe648752..7de20848 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -506,6 +506,88 @@ static void arm_cpu_parse_featurestr(const char *typename, char *features, - } - } - -+static void -+cpu_add_feat_as_prop(const char *typename, const char *name, const char *val) -+{ -+ GlobalProperty *prop = g_new0(typeof(*prop), 1); -+ prop->driver = typename; -+ prop->property = g_strdup(name); -+ prop->value = g_strdup(val); -+ qdev_prop_register_global(prop); -+} -+ -+static gint compare_string(gconstpointer a, gconstpointer b) -+{ -+ return g_strcmp0(a, b); -+} -+ -+static GList *plus_features, *minus_features; -+ -+static void aarch64_cpu_parse_features(const char *typename, char *features, -+ Error **errp) -+{ -+ GList *l; -+ char *featurestr; /* Single 'key=value" string being parsed */ -+ static bool cpu_globals_initialized; -+ -+ if (cpu_globals_initialized) { -+ return; -+ } -+ cpu_globals_initialized = true; -+ -+ if (!features) { -+ return; -+ } -+ for (featurestr = strtok(features, ","); -+ featurestr; -+ featurestr = strtok(NULL, ",")) { -+ const char *name; -+ const char *val = NULL; -+ char *eq = NULL; -+ -+ /* Compatibility syntax: */ -+ if (featurestr[0] == '+') { -+ plus_features = g_list_append(plus_features, -+ g_strdup(featurestr + 1)); -+ continue; -+ } else if (featurestr[0] == '-') { -+ minus_features = g_list_append(minus_features, -+ g_strdup(featurestr + 1)); -+ continue; -+ } -+ -+ eq = strchr(featurestr, '='); -+ name = featurestr; -+ if (eq) { -+ *eq++ = 0; -+ val = eq; -+ } else { -+ error_setg(errp, "Unsupported property format: %s", name); -+ return; -+ } -+ -+ if (g_list_find_custom(plus_features, name, compare_string)) { -+ warn_report("Ambiguous CPU model string. " -+ "Don't mix both \"+%s\" and \"%s=%s\"", -+ name, name, val); -+ } -+ if (g_list_find_custom(minus_features, name, compare_string)) { -+ warn_report("Ambiguous CPU model string. " -+ "Don't mix both \"-%s\" and \"%s=%s\"", -+ name, name, val); -+ } -+ cpu_add_feat_as_prop(typename, name, val); -+ } -+ -+ for (l = plus_features; l; l = l->next) { -+ cpu_add_feat_as_prop(typename, l->data, "on"); -+ } -+ -+ for (l = minus_features; l; l = l->next) { -+ cpu_add_feat_as_prop(typename, l->data, "off"); -+ } -+} -+ - static void aarch64_cpu_class_init(ObjectClass *oc, void *data) - { - CPUClass *cc = CPU_CLASS(oc); -@@ -517,6 +599,7 @@ static void aarch64_cpu_class_init(ObjectClass *oc, void *data) - cc->gdb_num_core_regs = 34; - cc->gdb_core_xml_file = "aarch64-core.xml"; - cc->gdb_arch_name = aarch64_gdb_arch_name; -+ cc->parse_features = aarch64_cpu_parse_features; - } - - static void aarch64_cpu_instance_init(Object *obj) --- -2.25.1 - diff --git a/target-arm-register-CPU-features-for-property.patch b/target-arm-register-CPU-features-for-property.patch deleted file mode 100644 index ea42a63ea7620fc790d01ae94590fb4336e12c32..0000000000000000000000000000000000000000 --- a/target-arm-register-CPU-features-for-property.patch +++ /dev/null @@ -1,398 +0,0 @@ -From f169b1f76cad9f727c701df853b05ad5e8d7f927 Mon Sep 17 00:00:00 2001 -From: Peng Liang -Date: Thu, 6 Aug 2020 16:14:37 +0800 -Subject: [PATCH 3/9] target/arm: register CPU features for property - -The Arm architecture specifies a number of ID registers that are -characterized as comprising a set of 4-bit ID fields. Each ID field -identifies the presence, and possibly the level of support for, a -particular feature in an implementation of the architecture. [1] - -For most of the ID fields, there is a minimum presence value, equal to -or higher than which means the corresponding CPU feature is implemented. -Hence, we can use the minimum presence value to determine whether a CPU -feature is enabled and enable a CPU feature. - -To disable a CPU feature, setting the corresponding ID field to 0x0/0xf -(for unsigned/signed field) seems as a good idea. However, it maybe -lead to some problems. For example, ID_AA64PFR0_EL1.FP is a signed ID -field. ID_AA64PFR0_EL1.FP == 0x0 represents the implementation of FP -(floating-point) and ID_AA64PFR0_EL1.FP == 0x1 represents the -implementation of FPHP (half-precision floating-point). If -ID_AA64PFR0_EL1.FP is set to 0xf when FPHP is disabled (which is also -disable FP), guest kernel maybe stuck. Hence, we add a ni_value (means -not-implemented value) to disable a CPU feature safely. - -[1] D13.1.3 Principles of the ID scheme for fields in ID registers in - DDI.0487 - -Signed-off-by: zhanghailiang -Signed-off-by: Peng Liang ---- - target/arm/cpu.c | 343 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 343 insertions(+) - -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5bcdad0c..3f63312c 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -1034,6 +1034,347 @@ static void arm_set_init_svtor(Object *obj, Visitor *v, const char *name, - visit_type_uint32(v, name, &cpu->init_svtor, errp); - } - -+/** -+ * CPUFeatureInfo: -+ * @reg: The ID register where the ID field is in. -+ * @name: The name of the CPU feature. -+ * @length: The bit length of the ID field. -+ * @shift: The bit shift of the ID field in the ID register. -+ * @min_value: The minimum value equal to or larger than which means the CPU -+ * feature is implemented. -+ * @ni_value: Not-implemented value. It will be set to the ID field when -+ * disabling the CPU feature. Usually, it's min_value - 1. -+ * @sign: Whether the ID field is signed. -+ * @is_32bit: Whether the CPU feature is for 32-bit. -+ * -+ * In ARM, a CPU feature is described by an ID field, which is a 4-bit field in -+ * an ID register. -+ */ -+typedef struct CPUFeatureInfo { -+ CPUIDReg reg; -+ const char *name; -+ int length; -+ int shift; -+ int min_value; -+ int ni_value; -+ bool sign; -+ bool is_32bit; -+} CPUFeatureInfo; -+ -+#define FIELD_INFO(feature_name, id_reg, field, s, min_val, ni_val, is32bit) { \ -+ .reg = id_reg, \ -+ .length = R_ ## id_reg ## _ ## field ## _LENGTH, \ -+ .shift = R_ ## id_reg ## _ ## field ## _SHIFT, \ -+ .sign = s, \ -+ .min_value = min_val, \ -+ .ni_value = ni_val, \ -+ .name = feature_name, \ -+ .is_32bit = is32bit, \ -+} -+ -+static struct CPUFeatureInfo cpu_features[] = { -+ FIELD_INFO("swap", ID_ISAR0, SWAP, false, 1, 0, true), -+ FIELD_INFO("bitcount", ID_ISAR0, BITCOUNT, false, 1, 0, true), -+ FIELD_INFO("bitfield", ID_ISAR0, BITFIELD, false, 1, 0, true), -+ FIELD_INFO("cmpbranch", ID_ISAR0, CMPBRANCH, false, 1, 0, true), -+ FIELD_INFO("coproc", ID_ISAR0, COPROC, false, 1, 0, true), -+ FIELD_INFO("debug", ID_ISAR0, DEBUG, false, 1, 0, true), -+ FIELD_INFO("device", ID_ISAR0, DIVIDE, false, 1, 0, true), -+ -+ FIELD_INFO("endian", ID_ISAR1, ENDIAN, false, 1, 0, true), -+ FIELD_INFO("except", ID_ISAR1, EXCEPT, false, 1, 0, true), -+ FIELD_INFO("except_ar", ID_ISAR1, EXCEPT_AR, false, 1, 0, true), -+ FIELD_INFO("extend", ID_ISAR1, EXTEND, false, 1, 0, true), -+ FIELD_INFO("ifthen", ID_ISAR1, IFTHEN, false, 1, 0, true), -+ FIELD_INFO("immediate", ID_ISAR1, IMMEDIATE, false, 1, 0, true), -+ FIELD_INFO("interwork", ID_ISAR1, INTERWORK, false, 1, 0, true), -+ FIELD_INFO("jazelle", ID_ISAR1, JAZELLE, false, 1, 0, true), -+ -+ FIELD_INFO("loadstore", ID_ISAR2, LOADSTORE, false, 1, 0, true), -+ FIELD_INFO("memhint", ID_ISAR2, MEMHINT, false, 1, 0, true), -+ FIELD_INFO("multiaccessint", ID_ISAR2, MULTIACCESSINT, false, 1, 0, true), -+ FIELD_INFO("mult", ID_ISAR2, MULT, false, 1, 0, true), -+ FIELD_INFO("mults", ID_ISAR2, MULTS, false, 1, 0, true), -+ FIELD_INFO("multu", ID_ISAR2, MULTU, false, 1, 0, true), -+ FIELD_INFO("psr_ar", ID_ISAR2, PSR_AR, false, 1, 0, true), -+ FIELD_INFO("reversal", ID_ISAR2, REVERSAL, false, 1, 0, true), -+ -+ FIELD_INFO("saturate", ID_ISAR3, SATURATE, false, 1, 0, true), -+ FIELD_INFO("simd", ID_ISAR3, SIMD, false, 1, 0, true), -+ FIELD_INFO("svc", ID_ISAR3, SVC, false, 1, 0, true), -+ FIELD_INFO("synchprim", ID_ISAR3, SYNCHPRIM, false, 1, 0, true), -+ FIELD_INFO("tabbranch", ID_ISAR3, TABBRANCH, false, 1, 0, true), -+ FIELD_INFO("t32copy", ID_ISAR3, T32COPY, false, 1, 0, true), -+ FIELD_INFO("truenop", ID_ISAR3, TRUENOP, false, 1, 0, true), -+ FIELD_INFO("t32ee", ID_ISAR3, T32EE, false, 1, 0, true), -+ -+ FIELD_INFO("unpriv", ID_ISAR4, UNPRIV, false, 1, 0, true), -+ FIELD_INFO("withshifts", ID_ISAR4, WITHSHIFTS, false, 1, 0, true), -+ FIELD_INFO("writeback", ID_ISAR4, WRITEBACK, false, 1, 0, true), -+ FIELD_INFO("smc", ID_ISAR4, SMC, false, 1, 0, true), -+ FIELD_INFO("barrier", ID_ISAR4, BARRIER, false, 1, 0, true), -+ FIELD_INFO("synchprim_frac", ID_ISAR4, SYNCHPRIM_FRAC, false, 1, 0, true), -+ FIELD_INFO("psr_m", ID_ISAR4, PSR_M, false, 1, 0, true), -+ FIELD_INFO("swp_frac", ID_ISAR4, SWP_FRAC, false, 1, 0, true), -+ -+ FIELD_INFO("sevl", ID_ISAR5, SEVL, false, 1, 0, true), -+ FIELD_INFO("aes", ID_ISAR5, AES, false, 1, 0, true), -+ FIELD_INFO("sha1", ID_ISAR5, SHA1, false, 1, 0, true), -+ FIELD_INFO("sha2", ID_ISAR5, SHA2, false, 1, 0, true), -+ FIELD_INFO("crc32", ID_ISAR5, CRC32, false, 1, 0, true), -+ FIELD_INFO("rdm", ID_ISAR5, RDM, false, 1, 0, true), -+ FIELD_INFO("vcma", ID_ISAR5, VCMA, false, 1, 0, true), -+ -+ FIELD_INFO("jscvt", ID_ISAR6, JSCVT, false, 1, 0, true), -+ FIELD_INFO("dp", ID_ISAR6, DP, false, 1, 0, true), -+ FIELD_INFO("fhm", ID_ISAR6, FHM, false, 1, 0, true), -+ FIELD_INFO("sb", ID_ISAR6, SB, false, 1, 0, true), -+ FIELD_INFO("specres", ID_ISAR6, SPECRES, false, 1, 0, true), -+ -+ FIELD_INFO("cmaintva", ID_MMFR3, CMAINTVA, false, 1, 0, true), -+ FIELD_INFO("cmaintsw", ID_MMFR3, CMAINTSW, false, 1, 0, true), -+ FIELD_INFO("bpmaint", ID_MMFR3, BPMAINT, false, 1, 0, true), -+ FIELD_INFO("maintbcst", ID_MMFR3, MAINTBCST, false, 1, 0, true), -+ FIELD_INFO("pan", ID_MMFR3, PAN, false, 1, 0, true), -+ FIELD_INFO("cohwalk", ID_MMFR3, COHWALK, false, 1, 0, true), -+ FIELD_INFO("cmemsz", ID_MMFR3, CMEMSZ, false, 1, 0, true), -+ FIELD_INFO("supersec", ID_MMFR3, SUPERSEC, false, 1, 0, true), -+ -+ FIELD_INFO("specsei", ID_MMFR4, SPECSEI, false, 1, 0, true), -+ FIELD_INFO("ac2", ID_MMFR4, AC2, false, 1, 0, true), -+ FIELD_INFO("xnx", ID_MMFR4, XNX, false, 1, 0, true), -+ FIELD_INFO("cnp", ID_MMFR4, CNP, false, 1, 0, true), -+ FIELD_INFO("hpds", ID_MMFR4, HPDS, false, 1, 0, true), -+ FIELD_INFO("lsm", ID_MMFR4, LSM, false, 1, 0, true), -+ FIELD_INFO("ccidx", ID_MMFR4, CCIDX, false, 1, 0, true), -+ FIELD_INFO("evt", ID_MMFR4, EVT, false, 1, 0, true), -+ -+ FIELD_INFO("simdreg", MVFR0, SIMDREG, false, 1, 0, true), -+ FIELD_INFO("fpsp", MVFR0, FPSP, false, 1, 0, true), -+ FIELD_INFO("fpdp", MVFR0, FPDP, false, 1, 0, true), -+ FIELD_INFO("fptrap", MVFR0, FPTRAP, false, 1, 0, true), -+ FIELD_INFO("fpdivide", MVFR0, FPDIVIDE, false, 1, 0, true), -+ FIELD_INFO("fpsqrt", MVFR0, FPSQRT, false, 1, 0, true), -+ FIELD_INFO("fpshvec", MVFR0, FPSHVEC, false, 1, 0, true), -+ FIELD_INFO("fpround", MVFR0, FPROUND, false, 1, 0, true), -+ -+ FIELD_INFO("fpftz", MVFR1, FPFTZ, false, 1, 0, true), -+ FIELD_INFO("fpdnan", MVFR1, FPDNAN, false, 1, 0, true), -+ FIELD_INFO("simdls", MVFR1, SIMDLS, false, 1, 0, true), -+ FIELD_INFO("simdint", MVFR1, SIMDINT, false, 1, 0, true), -+ FIELD_INFO("simdsp", MVFR1, SIMDSP, false, 1, 0, true), -+ FIELD_INFO("simdhp", MVFR1, SIMDHP, false, 1, 0, true), -+ FIELD_INFO("fphp", MVFR1, FPHP, false, 1, 0, true), -+ FIELD_INFO("simdfmac", MVFR1, SIMDFMAC, false, 1, 0, true), -+ -+ FIELD_INFO("simdmisc", MVFR2, SIMDMISC, false, 1, 0, true), -+ FIELD_INFO("fpmisc", MVFR2, FPMISC, false, 1, 0, true), -+ -+ FIELD_INFO("debugver", ID_AA64DFR0, DEBUGVER, false, 1, 0, false), -+ FIELD_INFO("tracever", ID_AA64DFR0, TRACEVER, false, 1, 0, false), -+ FIELD_INFO("pmuver", ID_AA64DFR0, PMUVER, false, 1, 0, false), -+ FIELD_INFO("brps", ID_AA64DFR0, BRPS, false, 1, 0, false), -+ FIELD_INFO("wrps", ID_AA64DFR0, WRPS, false, 1, 0, false), -+ FIELD_INFO("ctx_cmps", ID_AA64DFR0, CTX_CMPS, false, 1, 0, false), -+ FIELD_INFO("pmsver", ID_AA64DFR0, PMSVER, false, 1, 0, false), -+ FIELD_INFO("doublelock", ID_AA64DFR0, DOUBLELOCK, false, 1, 0, false), -+ FIELD_INFO("tracefilt", ID_AA64DFR0, TRACEFILT, false, 1, 0, false), -+ -+ FIELD_INFO("aes", ID_AA64ISAR0, AES, false, 1, 0, false), -+ FIELD_INFO("sha1", ID_AA64ISAR0, SHA1, false, 1, 0, false), -+ FIELD_INFO("sha2", ID_AA64ISAR0, SHA2, false, 1, 0, false), -+ FIELD_INFO("crc32", ID_AA64ISAR0, CRC32, false, 1, 0, false), -+ FIELD_INFO("atomics", ID_AA64ISAR0, ATOMIC, false, 1, 0, false), -+ FIELD_INFO("asimdrdm", ID_AA64ISAR0, RDM, false, 1, 0, false), -+ FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), -+ FIELD_INFO("sm3", ID_AA64ISAR0, SM3, false, 1, 0, false), -+ FIELD_INFO("sm4", ID_AA64ISAR0, SM4, false, 1, 0, false), -+ FIELD_INFO("asimddp", ID_AA64ISAR0, DP, false, 1, 0, false), -+ FIELD_INFO("asimdfhm", ID_AA64ISAR0, FHM, false, 1, 0, false), -+ FIELD_INFO("flagm", ID_AA64ISAR0, TS, false, 1, 0, false), -+ FIELD_INFO("tlb", ID_AA64ISAR0, TLB, false, 1, 0, false), -+ FIELD_INFO("rng", ID_AA64ISAR0, RNDR, false, 1, 0, false), -+ -+ FIELD_INFO("dcpop", ID_AA64ISAR1, DPB, false, 1, 0, false), -+ FIELD_INFO("papa", ID_AA64ISAR1, APA, false, 1, 0, false), -+ FIELD_INFO("api", ID_AA64ISAR1, API, false, 1, 0, false), -+ FIELD_INFO("jscvt", ID_AA64ISAR1, JSCVT, false, 1, 0, false), -+ FIELD_INFO("fcma", ID_AA64ISAR1, FCMA, false, 1, 0, false), -+ FIELD_INFO("lrcpc", ID_AA64ISAR1, LRCPC, false, 1, 0, false), -+ FIELD_INFO("pacg", ID_AA64ISAR1, GPA, false, 1, 0, false), -+ FIELD_INFO("gpi", ID_AA64ISAR1, GPI, false, 1, 0, false), -+ FIELD_INFO("frint", ID_AA64ISAR1, FRINTTS, false, 1, 0, false), -+ FIELD_INFO("sb", ID_AA64ISAR1, SB, false, 1, 0, false), -+ FIELD_INFO("specres", ID_AA64ISAR1, SPECRES, false, 1, 0, false), -+ -+ FIELD_INFO("el0", ID_AA64PFR0, EL0, false, 1, 0, false), -+ FIELD_INFO("el1", ID_AA64PFR0, EL1, false, 1, 0, false), -+ FIELD_INFO("el2", ID_AA64PFR0, EL2, false, 1, 0, false), -+ FIELD_INFO("el3", ID_AA64PFR0, EL3, false, 1, 0, false), -+ FIELD_INFO("fp", ID_AA64PFR0, FP, true, 0, 0xf, false), -+ FIELD_INFO("asimd", ID_AA64PFR0, ADVSIMD, true, 0, 0xf, false), -+ FIELD_INFO("gic", ID_AA64PFR0, GIC, false, 1, 0, false), -+ FIELD_INFO("ras", ID_AA64PFR0, RAS, false, 1, 0, false), -+ FIELD_INFO("sve", ID_AA64PFR0, SVE, false, 1, 0, false), -+ -+ FIELD_INFO("bti", ID_AA64PFR1, BT, false, 1, 0, false), -+ FIELD_INFO("sbss", ID_AA64PFR1, SBSS, false, 1, 0, false), -+ FIELD_INFO("mte", ID_AA64PFR1, MTE, false, 1, 0, false), -+ FIELD_INFO("ras_frac", ID_AA64PFR1, RAS_FRAC, false, 1, 0, false), -+ -+ FIELD_INFO("parange", ID_AA64MMFR0, PARANGE, false, 1, 0, false), -+ FIELD_INFO("asidbits", ID_AA64MMFR0, ASIDBITS, false, 1, 0, false), -+ FIELD_INFO("bigend", ID_AA64MMFR0, BIGEND, false, 1, 0, false), -+ FIELD_INFO("snsmem", ID_AA64MMFR0, SNSMEM, false, 1, 0, false), -+ FIELD_INFO("bigendel0", ID_AA64MMFR0, BIGENDEL0, false, 1, 0, false), -+ FIELD_INFO("tgran16", ID_AA64MMFR0, TGRAN16, false, 1, 0, false), -+ FIELD_INFO("tgran64", ID_AA64MMFR0, TGRAN64, false, 1, 0, false), -+ FIELD_INFO("tgran4", ID_AA64MMFR0, TGRAN4, false, 1, 0, false), -+ FIELD_INFO("tgran16_2", ID_AA64MMFR0, TGRAN16_2, false, 1, 0, false), -+ FIELD_INFO("tgran64_2", ID_AA64MMFR0, TGRAN64_2, false, 1, 0, false), -+ FIELD_INFO("tgran4_2", ID_AA64MMFR0, TGRAN4_2, false, 1, 0, false), -+ FIELD_INFO("exs", ID_AA64MMFR0, EXS, false, 1, 0, false), -+ -+ FIELD_INFO("hafdbs", ID_AA64MMFR1, HAFDBS, false, 1, 0, false), -+ FIELD_INFO("vmidbits", ID_AA64MMFR1, VMIDBITS, false, 1, 0, false), -+ FIELD_INFO("vh", ID_AA64MMFR1, VH, false, 1, 0, false), -+ FIELD_INFO("hpds", ID_AA64MMFR1, HPDS, false, 1, 0, false), -+ FIELD_INFO("lo", ID_AA64MMFR1, LO, false, 1, 0, false), -+ FIELD_INFO("pan", ID_AA64MMFR1, PAN, false, 1, 0, false), -+ FIELD_INFO("specsei", ID_AA64MMFR1, SPECSEI, false, 1, 0, false), -+ FIELD_INFO("xnx", ID_AA64MMFR1, XNX, false, 1, 0, false), -+ -+ FIELD_INFO("cnp", ID_AA64MMFR2, CNP, false, 1, 0, false), -+ FIELD_INFO("uao", ID_AA64MMFR2, UAO, false, 1, 0, false), -+ FIELD_INFO("lsm", ID_AA64MMFR2, LSM, false, 1, 0, false), -+ FIELD_INFO("iesb", ID_AA64MMFR2, IESB, false, 1, 0, false), -+ FIELD_INFO("varange", ID_AA64MMFR2, VARANGE, false, 1, 0, false), -+ FIELD_INFO("ccidx", ID_AA64MMFR2, CCIDX, false, 1, 0, false), -+ FIELD_INFO("nv", ID_AA64MMFR2, NV, false, 1, 0, false), -+ FIELD_INFO("st", ID_AA64MMFR2, ST, false, 1, 0, false), -+ FIELD_INFO("uscat", ID_AA64MMFR2, AT, false, 1, 0, false), -+ FIELD_INFO("ids", ID_AA64MMFR2, IDS, false, 1, 0, false), -+ FIELD_INFO("fwb", ID_AA64MMFR2, FWB, false, 1, 0, false), -+ FIELD_INFO("ttl", ID_AA64MMFR2, TTL, false, 1, 0, false), -+ FIELD_INFO("bbm", ID_AA64MMFR2, BBM, false, 1, 0, false), -+ FIELD_INFO("evt", ID_AA64MMFR2, EVT, false, 1, 0, false), -+ FIELD_INFO("e0pd", ID_AA64MMFR2, E0PD, false, 1, 0, false), -+ -+ FIELD_INFO("copdbg", ID_DFR0, COPDBG, false, 1, 0, false), -+ FIELD_INFO("copsdbg", ID_DFR0, COPSDBG, false, 1, 0, false), -+ FIELD_INFO("mmapdbg", ID_DFR0, MMAPDBG, false, 1, 0, false), -+ FIELD_INFO("coptrc", ID_DFR0, COPTRC, false, 1, 0, false), -+ FIELD_INFO("mmaptrc", ID_DFR0, MMAPTRC, false, 1, 0, false), -+ FIELD_INFO("mprofdbg", ID_DFR0, MPROFDBG, false, 1, 0, false), -+ FIELD_INFO("perfmon", ID_DFR0, PERFMON, false, 1, 0, false), -+ FIELD_INFO("tracefilt", ID_DFR0, TRACEFILT, false, 1, 0, false), -+ -+ { -+ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_FP_LENGTH, -+ .shift = R_ID_AA64PFR0_FP_SHIFT, .sign = true, .min_value = 1, -+ .ni_value = 0, .name = "fphp", .is_32bit = false, -+ }, -+ { -+ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_ADVSIMD_LENGTH, -+ .shift = R_ID_AA64PFR0_ADVSIMD_SHIFT, .sign = true, .min_value = 1, -+ .ni_value = 0, .name = "asimdhp", .is_32bit = false, -+ }, -+ { -+ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_AES_LENGTH, -+ .shift = R_ID_AA64ISAR0_AES_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "pmull", .is_32bit = false, -+ }, -+ { -+ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, -+ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "sha512", .is_32bit = false, -+ }, -+ { -+ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_TS_LENGTH, -+ .shift = R_ID_AA64ISAR0_TS_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "flagm2", .is_32bit = false, -+ }, -+ { -+ .reg = ID_AA64ISAR1, .length = R_ID_AA64ISAR1_DPB_LENGTH, -+ .shift = R_ID_AA64ISAR1_DPB_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "dcpodp", .is_32bit = false, -+ }, -+ { -+ .reg = ID_AA64ISAR1, .length = R_ID_AA64ISAR1_LRCPC_LENGTH, -+ .shift = R_ID_AA64ISAR1_LRCPC_SHIFT, .sign = false, .min_value = 2, -+ .ni_value = 1, .name = "ilrcpc", .is_32bit = false, -+ }, -+}; -+ -+static void arm_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, -+ void *opaque, Error **errp) -+{ -+ ARMCPU *cpu = ARM_CPU(obj); -+ CPUFeatureInfo *feat = opaque; -+ int field_value = feat->sign ? sextract64(cpu->isar.regs[feat->reg], -+ feat->shift, feat->length) : -+ extract64(cpu->isar.regs[feat->reg], -+ feat->shift, feat->length); -+ bool value = field_value >= feat->min_value; -+ -+ visit_type_bool(v, name, &value, errp); -+} -+ -+static void arm_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, -+ void *opaque, Error **errp) -+{ -+ DeviceState *dev = DEVICE(obj); -+ ARMCPU *cpu = ARM_CPU(obj); -+ ARMISARegisters *isar = &cpu->isar; -+ CPUFeatureInfo *feat = opaque; -+ Error *local_err = NULL; -+ bool value; -+ -+ if (dev->realized) { -+ qdev_prop_set_after_realize(dev, name, errp); -+ return; -+ } -+ -+ visit_type_bool(v, name, &value, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ if (value) { -+ isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], -+ feat->shift, feat->length, -+ feat->min_value); -+ } else { -+ isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], -+ feat->shift, feat->length, -+ feat->ni_value); -+ } -+} -+ -+static void arm_cpu_register_feature_props(ARMCPU *cpu) -+{ -+ int i; -+ int num = ARRAY_SIZE(cpu_features); -+ ObjectProperty *op; -+ CPUARMState *env = &cpu->env; -+ -+ for (i = 0; i < num; i++) { -+ if ((arm_feature(env, ARM_FEATURE_AARCH64) && cpu_features[i].is_32bit) -+ || (!arm_feature(env, ARM_FEATURE_AARCH64) && -+ cpu_features[i].is_32bit)) { -+ continue; -+ } -+ op = object_property_find(OBJECT(cpu), cpu_features[i].name, NULL); -+ if (!op) { -+ object_property_add(OBJECT(cpu), cpu_features[i].name, "bool", -+ arm_cpu_get_bit_prop, -+ arm_cpu_set_bit_prop, -+ NULL, &cpu_features[i], &error_abort); -+ } -+ } -+} -+ - void arm_cpu_post_init(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1150,6 +1491,8 @@ void arm_cpu_post_init(Object *obj) - - qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property, - &error_abort); -+ -+ arm_cpu_register_feature_props(cpu); - } - - static void arm_cpu_finalizefn(Object *obj) --- -2.25.1 - diff --git a/target-arm-support-the-IPIV-feature.patch b/target-arm-support-the-IPIV-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..0467695b0860d81347e6073c97039794efc37288 --- /dev/null +++ b/target-arm-support-the-IPIV-feature.patch @@ -0,0 +1,69 @@ +From 4db69439ab84a108795f7dc1ea218aa746f1d2be Mon Sep 17 00:00:00 2001 +From: Jinqian Yang +Date: Fri, 16 May 2025 18:20:17 +0800 +Subject: [PATCH] target/arm: support the IPIV feature + +virt inclusion +category: feature +bugzilla: https://gitee.com/openeuler/qemu/issues/IC1EV7 + +------------------------------------------------------------------------ + +QEMU uses ioctl to enable IPIV. + +Signed-off-by: Jinqian Yang +--- + linux-headers/linux/kvm.h | 1 + + target/arm/kvm.c | 17 +++++++++++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index b711c04506..92fc1fbb85 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1214,6 +1214,7 @@ struct kvm_ppc_resize_hpt { + + #define KVM_CAP_ARM_HW_DIRTY_STATE_TRACK 502 + ++#define KVM_CAP_ARM_HISI_IPIV 798 + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + + #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index ee5ba68305..ab31515a2a 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -257,6 +257,22 @@ int kvm_arch_get_default_type(MachineState *ms) + return fixed_ipa ? 0 : size; + } + ++static void kvm_update_ipiv_cap(KVMState *s) ++{ ++ int ret; ++ ++ if (!kvm_check_extension(s, KVM_CAP_ARM_HISI_IPIV)) { ++ return; ++ } ++ ++ ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_HISI_IPIV, 0); ++ if (ret) { ++ fprintf(stderr, "Could not enable KVM_CAP_ARM_HISI_IPIV: %d\n", ret); ++ } ++ ++ return; ++} ++ + int kvm_arch_init(MachineState *ms, KVMState *s) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -330,6 +346,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + } + + kvm_arm_init_debug(s); ++ kvm_update_ipiv_cap(s); + + return ret; + } +-- +2.41.0.windows.1 + diff --git a/target-arm-take-HSTR-traps-of-cp15-accesses-to-EL2-n.patch b/target-arm-take-HSTR-traps-of-cp15-accesses-to-EL2-n.patch new file mode 100644 index 0000000000000000000000000000000000000000..aaa839a6c5470c095e6f56d82b5ec45524fc4c69 --- /dev/null +++ b/target-arm-take-HSTR-traps-of-cp15-accesses-to-EL2-n.patch @@ -0,0 +1,42 @@ +From 45e80d1d71f7f4b50b47ec61560a77edd80badc1 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Thu, 2 Jan 2025 10:35:05 +0800 +Subject: [PATCH] target/arm: take HSTR traps of cp15 accesses to EL2, not EL1 + +cherry-pick from fbe5ac5671a9cfcc7f4aee9a5fac7720eea08876 + +The HSTR_EL2 register allows the hypervisor to trap AArch32 EL1 and +EL0 accesses to cp15 registers. We incorrectly implemented this so +they trap to EL1 when we detect the need for a HSTR trap at code +generation time. (The check in access_check_cp_reg() which we do at +runtime to catch traps from EL0 is correctly routing them to EL2.) + +Use the correct target EL when generating the code to take the trap. + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2226 +Fixes: 049edada5e93df ("target/arm: Make HSTR_EL2 traps take priority over UNDEF-at-EL1") +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20240325133116.2075362-1-peter.maydell@linaro.org +Signed-off-by: gubin +--- + target/arm/tcg/translate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c +index b3660173d1..e555e885a1 100644 +--- a/target/arm/tcg/translate.c ++++ b/target/arm/tcg/translate.c +@@ -4584,7 +4584,7 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, + tcg_gen_andi_i32(t, t, 1u << maskbit); + tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label); + +- gen_exception_insn(s, 0, EXCP_UDEF, syndrome); ++ gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); + /* + * gen_exception_insn() will set is_jmp to DISAS_NORETURN, + * but since we're conditionally branching over it, we want +-- +2.41.0.windows.1 + diff --git a/target-hexagon-don-t-look-for-static-glib.patch b/target-hexagon-don-t-look-for-static-glib.patch new file mode 100644 index 0000000000000000000000000000000000000000..92fe02ecf3f01e7cebfa752043bd40ef472cfd42 --- /dev/null +++ b/target-hexagon-don-t-look-for-static-glib.patch @@ -0,0 +1,47 @@ +From f698e21192b07335197e8a20032cbb411715775a Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 11 Jan 2025 10:37:12 +0800 +Subject: [PATCH] target/hexagon: don't look for static glib + +cherry-pick from fe68cc0923ebfa0c12e4176f61ec9b363a07a73a + +When cross compiling QEMU configured with --static, I've been getting +configure errors like the following: + + Build-time dependency glib-2.0 found: NO + + ../target/hexagon/meson.build:303:15: ERROR: Dependency lookup for glib-2.0 with method 'pkgconfig' failed: Could not generate libs for glib-2.0: + Package libpcre2-8 was not found in the pkg-config search path. + Perhaps you should add the directory containing `libpcre2-8.pc' + to the PKG_CONFIG_PATH environment variable + Package 'libpcre2-8', required by 'glib-2.0', not found + +This happens because --static sets the prefer_static Meson option, but +my build machine doesn't have a static libpcre2. I don't think it +makes sense to insist that native dependencies are static, just +because I want the non-native QEMU binaries to be static. + +Signed-off-by: Alyssa Ross +Link: https://lore.kernel.org/r/20240805104921.4035256-1-hi@alyssa.is +Signed-off-by: Paolo Bonzini +Signed-off-by: gubin +--- + target/hexagon/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build +index da8e608d00..436217f25a 100644 +--- a/target/hexagon/meson.build ++++ b/target/hexagon/meson.build +@@ -188,7 +188,7 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs + arguments: ['@INPUT@', '--defines=@OUTPUT1@', '--output=@OUTPUT0@'] + ) + +- glib_dep = dependency('glib-2.0', native: true) ++ glib_dep = dependency('glib-2.0', native: true, static: false) + + idef_parser = executable( + 'idef-parser', +-- +2.41.0.windows.1 + diff --git a/target-hexagon-idef-parser-fix-leak-of-init_list.patch b/target-hexagon-idef-parser-fix-leak-of-init_list.patch new file mode 100644 index 0000000000000000000000000000000000000000..dae4443b55759f7181ed917a1f010c484393b915 --- /dev/null +++ b/target-hexagon-idef-parser-fix-leak-of-init_list.patch @@ -0,0 +1,50 @@ +From c36b2fb64446013ce8ded7f6bca5787795a17de1 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 13 Jun 2024 10:31:49 +0800 +Subject: [PATCH] target/hexagon: idef-parser fix leak of init_list cheery-pick + from 95408ad8e24c4364086f185285039e89927dad6c + +gen_inst_init_args() is called for instructions using a predicate as an +rvalue. Upon first call, the list of arguments which might need +initialization init_list is freed to indicate that they have been +processed. For instructions without an rvalue predicate, +gen_inst_init_args() isn't called and init_list will never be freed. + +Free init_list from free_instruction() if it hasn't already been freed. +A comment in free_instruction is also updated. + +Signed-off-by: Anton Johansson +Reviewed-by: Taylor Simpson +Reviewed-by: Brian Cain +Message-Id: <20240523125901.27797-4-anjo@rev.ng> +Signed-off-by: Brian Cain +Signed-off-by: qihao_yewu +--- + target/hexagon/idef-parser/parser-helpers.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c +index 4af020933a..a83099de6b 100644 +--- a/target/hexagon/idef-parser/parser-helpers.c ++++ b/target/hexagon/idef-parser/parser-helpers.c +@@ -2123,9 +2123,16 @@ void free_instruction(Context *c) + g_string_free(g_array_index(c->inst.strings, GString*, i), TRUE); + } + g_array_free(c->inst.strings, TRUE); ++ /* ++ * Free list of arguments that might need initialization, if they haven't ++ * already been freed. ++ */ ++ if (c->inst.init_list) { ++ g_array_free(c->inst.init_list, TRUE); ++ } + /* Free INAME token value */ + g_string_free(c->inst.name, TRUE); +- /* Free variables and registers */ ++ /* Free declared TCGv variables */ + g_array_free(c->inst.allocated, TRUE); + /* Initialize instruction-specific portion of the context */ + memset(&(c->inst), 0, sizeof(Inst)); +-- +2.41.0.windows.1 + diff --git a/target-hppa-Fix-PSW-V-bit-packaging-in-cpu_hppa_get-.patch b/target-hppa-Fix-PSW-V-bit-packaging-in-cpu_hppa_get-.patch new file mode 100644 index 0000000000000000000000000000000000000000..1aa893ee55cf92a995365d49bfce218e208e4c52 --- /dev/null +++ b/target-hppa-Fix-PSW-V-bit-packaging-in-cpu_hppa_get-.patch @@ -0,0 +1,58 @@ +From b1a14fd9b59803a17626903c5fb54f1aa2655d00 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Tue, 3 Sep 2024 17:22:10 +0200 +Subject: [PATCH] target/hppa: Fix PSW V-bit packaging in cpu_hppa_get for + hppa64 + +While adding hppa64 support, the psw_v variable got extended from 32 to 64 +bits. So, when packaging the PSW-V bit from the psw_v variable for interrupt +processing, check bit 31 instead the 63th (sign) bit. + +This fixes a hard to find Linux kernel boot issue where the loss of the PSW-V +bit due to an ITLB interruption in the middle of a series of ds/addc +instructions (from the divU milicode library) generated the wrong division +result and thus triggered a Linux kernel crash. + +Link: https://lore.kernel.org/lkml/718b8afe-222f-4b3a-96d3-93af0e4ceff1@roeck-us.net/ +Reported-by: Guenter Roeck +Signed-off-by: Helge Deller +Reviewed-by: Richard Henderson +Tested-by: Guenter Roeck +Fixes: 931adff31478 ("target/hppa: Update cpu_hppa_get/put_psw for hppa64") +Cc: qemu-stable@nongnu.org # v8.2+ +(cherry picked from commit ead5078cf1a5f11d16e3e8462154c859620bcc7e) +Signed-off-by: zhujun2 +--- + target/hppa/cpu.h | 2 +- + target/hppa/helper.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h +index 8be45c69c9..ba100c21a2 100644 +--- a/target/hppa/cpu.h ++++ b/target/hppa/cpu.h +@@ -188,7 +188,7 @@ typedef struct CPUArchState { + + target_ulong psw; /* All psw bits except the following: */ + target_ulong psw_n; /* boolean */ +- target_long psw_v; /* in most significant bit */ ++ target_long psw_v; /* in bit 31 */ + + /* Splitting the carry-borrow field into the MSB and "the rest", allows + * for "the rest" to be deleted when it is unused, but the MSB is in use. +diff --git a/target/hppa/helper.c b/target/hppa/helper.c +index 859644c47a..9e35b65f29 100644 +--- a/target/hppa/helper.c ++++ b/target/hppa/helper.c +@@ -53,7 +53,7 @@ target_ulong cpu_hppa_get_psw(CPUHPPAState *env) + } + + psw |= env->psw_n * PSW_N; +- psw |= (env->psw_v < 0) * PSW_V; ++ psw |= ((env->psw_v >> 31) & 1) * PSW_V; + psw |= env->psw; + + return psw; +-- +2.41.0.windows.1 + diff --git a/target-i386-Add-Hygon-Dhyana-v3-CPU-model.patch b/target-i386-Add-Hygon-Dhyana-v3-CPU-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..03db2220dc18223e56e7c8b42d684db0624af45b --- /dev/null +++ b/target-i386-Add-Hygon-Dhyana-v3-CPU-model.patch @@ -0,0 +1,43 @@ +From 7d4bc795419a69457ee5f2e32674183dc009d48f Mon Sep 17 00:00:00 2001 +From: Yanjing Zhou +Date: Wed, 15 May 2024 13:49:19 +0800 +Subject: [PATCH] target/i386: Add Hygon Dhyana-v3 CPU model + +Add the following feature bits for Dhyana CPU model: +perfctr-core, clzero, xsaveerptr, aes, pclmulqdq, sha-ni + +Disable xsaves feature bit for Erratum 1386 + +Signed-off-by: Yanjing Zhou +--- + target/i386/cpu.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index fd32c64f99..f4c22f32c6 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4793,6 +4793,20 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { .version = 3, ++ .props = (PropValue[]) { ++ { "xsaves", "off" }, ++ { "perfctr-core", "on" }, ++ { "clzero", "on" }, ++ { "xsaveerptr", "on" }, ++ { "aes", "on" }, ++ { "pclmulqdq", "on" }, ++ { "sha-ni", "on" }, ++ { "model-id", ++ "Hygon Dhyana-v3 processor" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.41.0.windows.1 + diff --git a/target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch b/target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch deleted file mode 100644 index eedd3356bf33b6fd02950740fa81330cbbed2895..0000000000000000000000000000000000000000 --- a/target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch +++ /dev/null @@ -1,43 +0,0 @@ -From ce4bb30a650773833cd1e86afcaa30e47259085c Mon Sep 17 00:00:00 2001 -From: Xiaoyao Li -Date: Sat, 12 Oct 2019 10:47:48 +0800 -Subject: [PATCH] target/i386: Add Snowridge-v2 (no MPX) CPU model - -Add new version of Snowridge CPU model that removes MPX feature. - -MPX support is being phased out by Intel. GCC has dropped it, Linux kernel -and KVM are also going to do that in the future. - -Signed-off-by: Xiaoyao Li -Message-Id: <20191012024748.127135-1-xiaoyao.li@intel.com> -Signed-off-by: Eduardo Habkost ---- - target/i386/cpu.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index d3742ef4ac..f09612f9da 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2668,6 +2668,18 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Atom Processor (SnowRidge)", -+ .versions = (X86CPUVersionDefinition[]) { -+ { .version = 1 }, -+ { -+ .version = 2, -+ .props = (PropValue[]) { -+ { "mpx", "off" }, -+ { "model-id", "Intel Atom Processor (Snowridge, no MPX)" }, -+ { /* end of list */ }, -+ }, -+ }, -+ { /* end of list */ }, -+ }, - }, - { - .name = "KnightsMill", --- -2.27.0 - diff --git a/target-i386-Add-VMX-control-bits-for-nested-FRED-sup.patch b/target-i386-Add-VMX-control-bits-for-nested-FRED-sup.patch new file mode 100644 index 0000000000000000000000000000000000000000..4459e52cbc2ebe742ae6e4a80ecb4e46ea0d0dcf --- /dev/null +++ b/target-i386-Add-VMX-control-bits-for-nested-FRED-sup.patch @@ -0,0 +1,48 @@ +From 4dea92e8570650776ed8caa0fedf0a90920f5e97 Mon Sep 17 00:00:00 2001 +From: "Xin Li (Intel)" +Date: Wed, 7 Aug 2024 01:18:11 -0700 +Subject: [PATCH] target/i386: Add VMX control bits for nested FRED support + +commit 7c6ec5bc5fea92a4ddea3f0189e3a7e7588e1d19 upstream. + +Add definitions of + 1) VM-exit activate secondary controls bit + 2) VM-entry load FRED bit +which are required to enable nested FRED. + +Intel-SIG: commit 7c6ec5bc5fea target/i386: Add VMX control bits for nested FRED support + +Reviewed-by: Zhao Liu +Signed-off-by: Xin Li (Intel) +Link: https://lore.kernel.org/r/20240807081813.735158-3-xin@zytor.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 00e636e61c..f80570f4da 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1271,7 +1271,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "vmx-exit-save-efer", "vmx-exit-load-efer", + "vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs", + NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL, +- NULL, "vmx-exit-load-pkrs", NULL, NULL, ++ NULL, "vmx-exit-load-pkrs", NULL, "vmx-exit-secondary-ctls", + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_EXIT_CTLS, +@@ -1286,7 +1286,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "vmx-entry-ia32e-mode", NULL, NULL, + NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer", + "vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL, +- NULL, NULL, "vmx-entry-load-pkrs", NULL, ++ NULL, NULL, "vmx-entry-load-pkrs", "vmx-entry-load-fred", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, +-- +2.41.0.windows.1 + diff --git a/target-i386-Add-get-set-migrate-support-for-FRED-MSR.patch b/target-i386-Add-get-set-migrate-support-for-FRED-MSR.patch new file mode 100644 index 0000000000000000000000000000000000000000..0fec3249ff729d81259ddf33e01e1d338feb5ac9 --- /dev/null +++ b/target-i386-Add-get-set-migrate-support-for-FRED-MSR.patch @@ -0,0 +1,188 @@ +From c3e47749fba4418d80bf4314335118452912b29c Mon Sep 17 00:00:00 2001 +From: Xin Li +Date: Wed, 8 Nov 2023 23:20:12 -0800 +Subject: [PATCH] target/i386: Add get/set/migrate support for FRED MSRs + +commit 4ebd98eb3ade5957a842da1420bda012eeeaab9c upstream. + +FRED CPU states are managed in 9 new FRED MSRs, in addtion to a few +existing CPU registers and MSRs, e.g., CR4.FRED and MSR_IA32_PL0_SSP. + +Save/restore/migrate FRED MSRs if FRED is exposed to the guest. + +Intel-SIG: commit 4ebd98eb3ade target/i386: Add get/set/migrate support for FRED MSRs + +Tested-by: Shan Kang +Signed-off-by: Xin Li +Message-ID: <20231109072012.8078-7-xin3.li@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.h | 22 +++++++++++++++++++ + target/i386/kvm/kvm.c | 49 +++++++++++++++++++++++++++++++++++++++++++ + target/i386/machine.c | 28 +++++++++++++++++++++++++ + 3 files changed, 99 insertions(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index b03237c305..1b9d922651 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -539,6 +539,17 @@ typedef enum X86Seg { + #define MSR_IA32_XFD 0x000001c4 + #define MSR_IA32_XFD_ERR 0x000001c5 + ++/* FRED MSRs */ ++#define MSR_IA32_FRED_RSP0 0x000001cc /* Stack level 0 regular stack pointer */ ++#define MSR_IA32_FRED_RSP1 0x000001cd /* Stack level 1 regular stack pointer */ ++#define MSR_IA32_FRED_RSP2 0x000001ce /* Stack level 2 regular stack pointer */ ++#define MSR_IA32_FRED_RSP3 0x000001cf /* Stack level 3 regular stack pointer */ ++#define MSR_IA32_FRED_STKLVLS 0x000001d0 /* FRED exception stack levels */ ++#define MSR_IA32_FRED_SSP1 0x000001d1 /* Stack level 1 shadow stack pointer in ring 0 */ ++#define MSR_IA32_FRED_SSP2 0x000001d2 /* Stack level 2 shadow stack pointer in ring 0 */ ++#define MSR_IA32_FRED_SSP3 0x000001d3 /* Stack level 3 shadow stack pointer in ring 0 */ ++#define MSR_IA32_FRED_CONFIG 0x000001d4 /* FRED Entrypoint and interrupt stack level */ ++ + #define MSR_IA32_BNDCFGS 0x00000d90 + #define MSR_IA32_XSS 0x00000da0 + #define MSR_IA32_UMWAIT_CONTROL 0xe1 +@@ -1698,6 +1709,17 @@ typedef struct CPUArchState { + target_ulong cstar; + target_ulong fmask; + target_ulong kernelgsbase; ++ ++ /* FRED MSRs */ ++ uint64_t fred_rsp0; ++ uint64_t fred_rsp1; ++ uint64_t fred_rsp2; ++ uint64_t fred_rsp3; ++ uint64_t fred_stklvls; ++ uint64_t fred_ssp1; ++ uint64_t fred_ssp2; ++ uint64_t fred_ssp3; ++ uint64_t fred_config; + #endif + + uint64_t tsc_adjust; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 12e920bbb4..5f3497e122 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3391,6 +3391,17 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase); + kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask); + kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar); ++ if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config); ++ } + } + #endif + +@@ -3867,6 +3878,17 @@ static int kvm_get_msrs(X86CPU *cpu) + kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0); + kvm_msr_entry_add(cpu, MSR_FMASK, 0); + kvm_msr_entry_add(cpu, MSR_LSTAR, 0); ++ if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0); ++ } + } + #endif + kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0); +@@ -4092,6 +4114,33 @@ static int kvm_get_msrs(X86CPU *cpu) + case MSR_LSTAR: + env->lstar = msrs[i].data; + break; ++ case MSR_IA32_FRED_RSP0: ++ env->fred_rsp0 = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_RSP1: ++ env->fred_rsp1 = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_RSP2: ++ env->fred_rsp2 = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_RSP3: ++ env->fred_rsp3 = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_STKLVLS: ++ env->fred_stklvls = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_SSP1: ++ env->fred_ssp1 = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_SSP2: ++ env->fred_ssp2 = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_SSP3: ++ env->fred_ssp3 = msrs[i].data; ++ break; ++ case MSR_IA32_FRED_CONFIG: ++ env->fred_config = msrs[i].data; ++ break; + #endif + case MSR_IA32_TSC: + env->tsc = msrs[i].data; +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 9a1cb8f3b8..7cbfbc0efb 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -1544,6 +1544,33 @@ static const VMStateDescription vmstate_msr_xfd = { + }; + + #ifdef TARGET_X86_64 ++static bool intel_fred_msrs_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return !!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED); ++} ++ ++static const VMStateDescription vmstate_msr_fred = { ++ .name = "cpu/fred", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = intel_fred_msrs_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.fred_rsp0, X86CPU), ++ VMSTATE_UINT64(env.fred_rsp1, X86CPU), ++ VMSTATE_UINT64(env.fred_rsp2, X86CPU), ++ VMSTATE_UINT64(env.fred_rsp3, X86CPU), ++ VMSTATE_UINT64(env.fred_stklvls, X86CPU), ++ VMSTATE_UINT64(env.fred_ssp1, X86CPU), ++ VMSTATE_UINT64(env.fred_ssp2, X86CPU), ++ VMSTATE_UINT64(env.fred_ssp3, X86CPU), ++ VMSTATE_UINT64(env.fred_config, X86CPU), ++ VMSTATE_END_OF_LIST() ++ } ++ }; ++ + static bool amx_xtile_needed(void *opaque) + { + X86CPU *cpu = opaque; +@@ -1768,6 +1795,7 @@ const VMStateDescription vmstate_x86_cpu = { + &vmstate_pdptrs, + &vmstate_msr_xfd, + #ifdef TARGET_X86_64 ++ &vmstate_msr_fred, + &vmstate_amx_xtile, + #endif + &vmstate_arch_lbr, +-- +2.41.0.windows.1 + diff --git a/target-i386-Add-missed-security-features-to-Cooperla.patch b/target-i386-Add-missed-security-features-to-Cooperla.patch deleted file mode 100644 index d17e0c00e9a638c2c1dd715a7c6f1f1eb5a14474..0000000000000000000000000000000000000000 --- a/target-i386-Add-missed-security-features-to-Cooperla.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 97d5c6c621569b011a2122423d0f630bd71de5ff Mon Sep 17 00:00:00 2001 -From: Jingyi Wang -Date: Fri, 9 Jul 2021 11:17:19 +0800 -Subject: [PATCH] target/i386: Add missed security features to Cooperlake CPU - model - -It lacks two security feature bits in MSR_IA32_ARCH_CAPABILITIES in -current Cooperlake CPU model, so add them. - -This is part of uptream commit 2dea9d9 - -Signed-off-by: Xiaoyao Li -Signed-off-by: Paolo Bonzini -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5329d73316..50d6ef9de4 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2420,7 +2420,8 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, - .features[FEAT_ARCH_CAPABILITIES] = - MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | -- MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, -+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | -+ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, - .features[FEAT_7_1_EAX] = - CPUID_7_1_EAX_AVX512_BF16, - /* --- -2.27.0 - diff --git a/target-i386-Add-more-features-enumerated-by-CPUID.7..patch b/target-i386-Add-more-features-enumerated-by-CPUID.7..patch new file mode 100644 index 0000000000000000000000000000000000000000..7713fa5b2bb02fcd1cc88cf4fbb2350fd453ef7f --- /dev/null +++ b/target-i386-Add-more-features-enumerated-by-CPUID.7..patch @@ -0,0 +1,63 @@ +From cfb01b2fe4a99ed030dacdc49064a152a472dc2d Mon Sep 17 00:00:00 2001 +From: Chao Gao +Date: Thu, 19 Sep 2024 13:10:11 +0800 +Subject: [PATCH] target/i386: Add more features enumerated by CPUID.7.2.EDX + +commit 10eaf9c0fb7060f45807becbb2742a9de9bc3632 upstream + +Following 5 bits in CPUID.7.2.EDX are supported by KVM. Add their +supports in QEMU. Each of them indicates certain bits of IA32_SPEC_CTRL +are supported. Those bits can control CPU speculation behavior which can +be used to defend against side-channel attacks. + +bit0: intel-psfd + if 1, indicates bit 7 of the IA32_SPEC_CTRL MSR is supported. Bit 7 of + this MSR disables Fast Store Forwarding Predictor without disabling + Speculative Store Bypass + +bit1: ipred-ctrl + If 1, indicates bits 3 and 4 of the IA32_SPEC_CTRL MSR are supported. + Bit 3 of this MSR enables IPRED_DIS control for CPL3. Bit 4 of this + MSR enables IPRED_DIS control for CPL0/1/2 + +bit2: rrsba-ctrl + If 1, indicates bits 5 and 6 of the IA32_SPEC_CTRL MSR are supported. + Bit 5 of this MSR disables RRSBA behavior for CPL3. Bit 6 of this MSR + disables RRSBA behavior for CPL0/1/2 + +bit3: ddpd-u + If 1, indicates bit 8 of the IA32_SPEC_CTRL MSR is supported. Bit 8 of + this MSR disables Data Dependent Prefetcher. + +bit4: bhi-ctrl + if 1, indicates bit 10 of the IA32_SPEC_CTRL MSR is supported. Bit 10 + of this MSR enables BHI_DIS_S behavior. + +Intel-SIG: 10eaf9c0fb70 target/i386: Add more features enumerated by CPUID.7.2.EDX + +Signed-off-by: Chao Gao +Link: https://lore.kernel.org/r/20240919051011.118309-1-chao.gao@intel.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1fa08265bc..f3df62127c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1000,8 +1000,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + [FEAT_7_2_EDX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +- NULL, NULL, NULL, NULL, +- NULL, "mcdt-no", NULL, NULL, ++ "intel-psfd", "ipred-ctrl", "rrsba-ctrl", "ddpd-u", ++ "bhi-ctrl", "mcdt-no", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +-- +2.41.0.windows.1 + diff --git a/target-i386-Add-new-CPU-model-ClearwaterForest.patch b/target-i386-Add-new-CPU-model-ClearwaterForest.patch new file mode 100644 index 0000000000000000000000000000000000000000..35ee518b8f7a533ebf9d01db37c9226787bea70c --- /dev/null +++ b/target-i386-Add-new-CPU-model-ClearwaterForest.patch @@ -0,0 +1,272 @@ +From e6464174c2261e809764ed63f8a064913a108446 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:49 +0800 +Subject: [PATCH] target/i386: Add new CPU model ClearwaterForest + +commit 56e84d898f17606b5d88778726466540af96b234 upstream. + +According to table 1-2 in Intel Architecture Instruction Set Extensions +and Future Features (rev 056) [1], ClearwaterForest has the following new +features which have already been virtualized: + + - AVX-VNNI-INT16 CPUID.(EAX=7,ECX=1):EDX[bit 10] + - SHA512 CPUID.(EAX=7,ECX=1):EAX[bit 0] + - SM3 CPUID.(EAX=7,ECX=1):EAX[bit 1] + - SM4 CPUID.(EAX=7,ECX=1):EAX[bit 2] + +Add above features to new CPU model ClearwaterForest. Comparing with +SierraForest, ClearwaterForest bare-metal contains all features of +SierraForest-v2 CPU model and adds: + + - PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] + - DDPD_U CPUID.(EAX=7,ECX=2):EDX[bit 3] + - BHI_NO IA32_ARCH_CAPABILITIES[bit 20] + +Add above and all features of SierraForest-v2 CPU model to new CPU model +ClearwaterForest. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Intel-SIG: commit 56e84d898f17 target/i386: Add new CPU model ClearwaterForest. + +Tested-by: Xuelian Guo +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-4-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++ + target/i386/cpu.h | 35 +++++++++--- + 2 files changed, 164 insertions(+), 6 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6ed4e84b5c..f79d0c9abf 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4337,6 +4337,141 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "ClearwaterForest", ++ .level = 0x23, ++ .xlevel = 0x80000008, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 221, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2 | CPUID_SS, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_TSC_ADJUST | ++ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | ++ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | ++ CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | ++ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT | ++ CPUID_7_0_ECX_CLDEMOTE | CPUID_7_0_ECX_MOVDIRI | ++ CPUID_7_0_ECX_MOVDIR64B, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_BHI_NO | MSR_ARCH_CAP_PBRSB_NO | ++ MSR_ARCH_CAP_GDS_NO | MSR_ARCH_CAP_RFDS_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_SHA512 | CPUID_7_1_EAX_SM3 | CPUID_7_1_EAX_SM4 | ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA | ++ CPUID_7_1_EAX_LAM, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT | ++ CPUID_7_1_EDX_AVX_VNNI_INT16 | CPUID_7_1_EDX_PREFETCHITI, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | ++ CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_DDPD_U | ++ CPUID_7_2_EDX_BHI_CTRL | CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .model_id = "Intel Xeon Processor (ClearwaterForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index b883e5e1d6..4424e58d1b 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -801,6 +801,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + + /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ + #define CPUID_7_0_EBX_FSGSBASE (1U << 0) ++/* Support TSC adjust MSR */ ++#define CPUID_7_0_EBX_TSC_ADJUST (1U << 1) + /* Support SGX */ + #define CPUID_7_0_EBX_SGX (1U << 2) + /* 1st Group of Advanced Bit Manipulation Extensions */ +@@ -934,6 +936,12 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + /* Speculative Store Bypass Disable */ + #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) + ++/* SHA512 Instruction */ ++#define CPUID_7_1_EAX_SHA512 (1U << 0) ++/* SM3 Instruction */ ++#define CPUID_7_1_EAX_SM3 (1U << 1) ++/* SM4 Instruction */ ++#define CPUID_7_1_EAX_SM4 (1U << 2) + /* AVX VNNI Instruction */ + #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) + /* AVX512 BFloat16 Instruction */ +@@ -946,6 +954,12 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define CPUID_7_1_EAX_FSRS (1U << 11) + /* Fast Short REP CMPS/SCAS */ + #define CPUID_7_1_EAX_FSRC (1U << 12) ++/* Flexible return and event delivery (FRED) */ ++#define CPUID_7_1_EAX_FRED (1U << 17) ++/* Load into IA32_KERNEL_GS_BASE (LKGS) */ ++#define CPUID_7_1_EAX_LKGS (1U << 18) ++/* Non-Serializing Write to Model Specific Register (WRMSRNS) */ ++#define CPUID_7_1_EAX_WRMSRNS (1U << 19) + /* Support Tile Computational Operations on FP16 Numbers */ + #define CPUID_7_1_EAX_AMX_FP16 (1U << 21) + /* Support for VPMADD52[H,L]UQ */ +@@ -957,17 +971,23 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) + /* AVX NE CONVERT Instructions */ + #define CPUID_7_1_EDX_AVX_NE_CONVERT (1U << 5) ++/* AVX-VNNI-INT16 Instructions */ ++#define CPUID_7_1_EDX_AVX_VNNI_INT16 (1U << 10) + /* AMX COMPLEX Instructions */ + #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) + /* PREFETCHIT0/1 Instructions */ + #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) +-/* Flexible return and event delivery (FRED) */ +-#define CPUID_7_1_EAX_FRED (1U << 17) +-/* Load into IA32_KERNEL_GS_BASE (LKGS) */ +-#define CPUID_7_1_EAX_LKGS (1U << 18) +-/* Non-Serializing Write to Model Specific Register (WRMSRNS) */ +-#define CPUID_7_1_EAX_WRMSRNS (1U << 19) + ++/* Indicate bit 7 of the IA32_SPEC_CTRL MSR is supported */ ++#define CPUID_7_2_EDX_PSFD (1U << 0) ++/* Indicate bits 3 and 4 of the IA32_SPEC_CTRL MSR are supported */ ++#define CPUID_7_2_EDX_IPRED_CTRL (1U << 1) ++/* Indicate bits 5 and 6 of the IA32_SPEC_CTRL MSR are supported */ ++#define CPUID_7_2_EDX_RRSBA_CTRL (1U << 2) ++/* Indicate bit 8 of the IA32_SPEC_CTRL MSR is supported */ ++#define CPUID_7_2_EDX_DDPD_U (1U << 3) ++/* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */ ++#define CPUID_7_2_EDX_BHI_CTRL (1U << 4) + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ + #define CPUID_7_2_EDX_MCDT_NO (1U << 5) + +@@ -1061,7 +1081,10 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define MSR_ARCH_CAP_FBSDP_NO (1U << 14) + #define MSR_ARCH_CAP_PSDP_NO (1U << 15) + #define MSR_ARCH_CAP_FB_CLEAR (1U << 17) ++#define MSR_ARCH_CAP_BHI_NO (1U << 20) + #define MSR_ARCH_CAP_PBRSB_NO (1U << 24) ++#define MSR_ARCH_CAP_GDS_NO (1U << 26) ++#define MSR_ARCH_CAP_RFDS_NO (1U << 27) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +2.41.0.windows.1 + diff --git a/target-i386-Add-new-CPU-model-SierraForest.patch b/target-i386-Add-new-CPU-model-SierraForest.patch new file mode 100644 index 0000000000000000000000000000000000000000..156e1d6db0c10ae1d9de2f7d0aea57a887a3d4e1 --- /dev/null +++ b/target-i386-Add-new-CPU-model-SierraForest.patch @@ -0,0 +1,212 @@ +From c61eabb8aa86fed57c2cd5394e0e89e350c99c5e Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Wed, 20 Mar 2024 10:10:44 +0800 +Subject: [PATCH] target/i386: Add new CPU model SierraForest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 6e82d3b6220777667968a04c87e1667f164ebe88 upstream. + +According to table 1-2 in Intel Architecture Instruction Set Extensions and +Future Features (rev 051) [1], SierraForest has the following new features +which have already been virtualized: + +- CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7] +- AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23] +- AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4] +- AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5] + +Add above features to new CPU model SierraForest. Comparing with GraniteRapids +CPU model, SierraForest bare-metal removes the following features: + +- HLE CPUID.(EAX=7,ECX=0):EBX[bit 4] +- RTM CPUID.(EAX=7,ECX=0):EBX[bit 11] +- AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16] +- AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17] +- AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21] +- AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28] +- AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30] +- AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31] +- AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1] +- AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6] +- AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11] +- AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12] +- AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14] +- LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16] +- TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16] +- AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22] +- AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23] +- AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24] +- AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25] +- AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5] +- fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10] +- fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12] +- AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21] +- PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] +- XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4] +- EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7] + +Add all features of GraniteRapids CPU model except above features to +SierraForest CPU model. + +SierraForest doesn’t support TSX and RTM but supports TAA_NO. When RTM is +not enabled in host, KVM will not report TAA_NO. So, just don't include +TAA_NO in SierraForest CPU model. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Intel-SIG: commit 6e82d3b62207 target/i386: Add new CPU model SierraForest. +8.2.0-Add SRF CPU module support + +Reviewed-by: Zhao Liu +Reviewed-by: Xiaoyao Li +Signed-off-by: Tao Su +Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com> +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 126 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6abe33946c..57a832cea2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4109,6 +4109,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "SierraForest", ++ .level = 0x23, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 175, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_PBRSB_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (SierraForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +-- +2.27.0 + diff --git a/target-i386-Add-new-Hygon-Chengdu-CPU-model.patch b/target-i386-Add-new-Hygon-Chengdu-CPU-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..829b46d4c5c98c1355d3794499c28ff52cd76761 --- /dev/null +++ b/target-i386-Add-new-Hygon-Chengdu-CPU-model.patch @@ -0,0 +1,89 @@ +From 198d98579a2ccb26423b644b29f53323c9bcb1e8 Mon Sep 17 00:00:00 2001 +From: Yanjing Zhou +Date: Mon, 19 May 2025 08:14:54 +0000 +Subject: [PATCH] target/i386: Add new Hygon 'Chengdu' CPU model + +Add the following feature bits compare to Dhyana CPU model: +avx512f, avx512dq, avx512ifma, clwb, avx512cd, avx512bw, gfni, +avx512vl, avx512_bf16, wbnoinvd, avx512vbmi, avx512_vbmi2, +vaes, vpclmulqdq, avx512_vnni, avx512_bitalg,avx512_vpopcntdq + +Signed-off-by: Yanjing Zhou +--- + target/i386/cpu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 60 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f79d0c9abf..8360ea3d61 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5324,6 +5324,66 @@ static const X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Hygon Dharma Processor", + .cache_info = &dharma_cache_info, + }, ++ { ++ .name = "Chengdu", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_HYGON, ++ .family = 24, ++ .model = 7, ++ .stepping = 0, ++ .features[FEAT_1_EDX] = ++ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | ++ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | ++ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | ++ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | ++ CPUID_VME | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | ++ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | ++ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | ++ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | ++ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | ++ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | ++ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | ++ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | ++ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | ++ CPUID_8000_0008_EBX_AMD_SSBD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_AVX512F | ++ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | ++ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | ++ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | ++ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | ++ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | ++ CPUID_7_0_ECX_AVX512_VPOPCNTDQ, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX512_BF16, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_SVM] = ++ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, ++ .xlevel = 0x80000020, ++ .model_id = "Hygon Chengdu Processor", ++ .cache_info = &dharma_cache_info, ++ }, + }; + + /* +-- +2.33.0 + diff --git a/target-i386-Add-new-Hygon-Dharma-CPU-model.patch b/target-i386-Add-new-Hygon-Dharma-CPU-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..b05c4a53f9fd27270e63b6f727c25c36bc5e8868 --- /dev/null +++ b/target-i386-Add-new-Hygon-Dharma-CPU-model.patch @@ -0,0 +1,133 @@ +From f4d31d640491c66bb1277e12d3c1d0e7ebc7cae5 Mon Sep 17 00:00:00 2001 +From: Yanjing Zhou +Date: Wed, 15 May 2024 13:50:17 +0800 +Subject: [PATCH] target/i386: Add new Hygon 'Dharma' CPU model + +Add the following feature bits compare to Dhyana CPU model: +stibp, ibrs, umip, ssbd + +Signed-off-by: Yanjing Zhou +--- + target/i386/cpu.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 99 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f4c22f32c6..711370d9b8 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2162,6 +2162,56 @@ static const CPUCaches epyc_genoa_cache_info = { + }, + }; + ++static const CPUCaches dharma_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 16 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 16384, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = true, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -5038,6 +5088,55 @@ static const X86CPUDefinition builtin_x86_defs[] = { + .model_id = "AMD EPYC-Genoa Processor", + .cache_info = &epyc_genoa_cache_info, + }, ++ { ++ .name = "Dharma", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_HYGON, ++ .family = 24, ++ .model = 4, ++ .stepping = 0, ++ .features[FEAT_1_EDX] = ++ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | ++ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | ++ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | ++ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | ++ CPUID_VME | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | ++ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | ++ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | ++ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | ++ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | ++ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | ++ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | ++ CPUID_8000_0008_EBX_IBPB | CPUID_8000_0008_EBX_IBRS | ++ CPUID_8000_0008_EBX_STIBP | CPUID_8000_0008_EBX_AMD_SSBD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | ++ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = CPUID_7_0_ECX_UMIP, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_SVM] = ++ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, ++ .xlevel = 0x8000001E, ++ .model_id = "Hygon Dharma Processor", ++ .cache_info = &dharma_cache_info, ++ }, + }; + + /* +-- +2.41.0.windows.1 + diff --git a/target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch b/target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch deleted file mode 100644 index 3aff7ea35f37047933f6f4464b513feaa242cf69..0000000000000000000000000000000000000000 --- a/target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 05b13a8de90abc6c1cfeca8b9c436e60e6d3142e Mon Sep 17 00:00:00 2001 -From: Xiaoyao Li -Date: Wed, 25 Dec 2019 14:30:17 +0800 -Subject: [PATCH] target/i386: Add new bit definitions of - MSR_IA32_ARCH_CAPABILITIES - -The bit 6, 7 and 8 of MSR_IA32_ARCH_CAPABILITIES are recently disclosed -for some security issues. Add the definitions for them to be used by named -CPU models. - -Signed-off-by: Xiaoyao Li -Message-Id: <20191225063018.20038-2-xiaoyao.li@intel.com> -Signed-off-by: Paolo Bonzini - -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.h | 13 ++++++++----- - 1 file changed, 8 insertions(+), 5 deletions(-) - -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 58d8c48964..7ff8ddd464 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -743,12 +743,15 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_TOPOLOGY_LEVEL_DIE (5U << 8) - - /* MSR Feature Bits */ --#define MSR_ARCH_CAP_RDCL_NO (1U << 0) --#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) --#define MSR_ARCH_CAP_RSBA (1U << 2) -+#define MSR_ARCH_CAP_RDCL_NO (1U << 0) -+#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) -+#define MSR_ARCH_CAP_RSBA (1U << 2) - #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) --#define MSR_ARCH_CAP_SSB_NO (1U << 4) --#define MSR_ARCH_CAP_MDS_NO (1U << 5) -+#define MSR_ARCH_CAP_SSB_NO (1U << 4) -+#define MSR_ARCH_CAP_MDS_NO (1U << 5) -+#define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) -+#define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) -+#define MSR_ARCH_CAP_TAA_NO (1U << 8) - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - --- -2.27.0 - diff --git a/target-i386-Construct-CPUID-2-as-stateful-iff-times-.patch b/target-i386-Construct-CPUID-2-as-stateful-iff-times-.patch new file mode 100644 index 0000000000000000000000000000000000000000..38a2ac997e1c7e690f3b3eaa7411f24e4337f36f --- /dev/null +++ b/target-i386-Construct-CPUID-2-as-stateful-iff-times-.patch @@ -0,0 +1,41 @@ +From afcdb893e4c702f4e009a98da71408cf54a53cc4 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 14 Aug 2024 03:54:27 -0400 +Subject: [PATCH] target/i386: Construct CPUID 2 as stateful iff times > 1 + +commit 5ab639141b6d916a6f4041d4ec46f2f1a1e4a365 upstream. + +When times == 1, the CPUID leaf 2 is not stateful. + +Intel-SIG: commit 5ab639141b6d target/i386: Construct CPUID 2 as stateful iff times > 1 + +Signed-off-by: Xiaoyao Li +Link: https://lore.kernel.org/r/20240814075431.339209-6-xiaoyao.li@intel.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5057dfbd75..a867512822 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1896,10 +1896,12 @@ int kvm_arch_init_vcpu(CPUState *cs) + int times; + + c->function = i; +- c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | +- KVM_CPUID_FLAG_STATE_READ_NEXT; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + times = c->eax & 0xff; ++ if (times > 1) { ++ c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | ++ KVM_CPUID_FLAG_STATE_READ_NEXT; ++ } + + for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +-- +2.41.0.windows.1 + diff --git a/target-i386-Delete-duplicated-macro-definition-CR4_F.patch b/target-i386-Delete-duplicated-macro-definition-CR4_F.patch new file mode 100644 index 0000000000000000000000000000000000000000..ca33d59036b68a92006ef88ae140918d551e6243 --- /dev/null +++ b/target-i386-Delete-duplicated-macro-definition-CR4_F.patch @@ -0,0 +1,39 @@ +From 1eacc509e9158b9e87f05fc9844142c0022b2d64 Mon Sep 17 00:00:00 2001 +From: "Xin Li (Intel)" +Date: Wed, 7 Aug 2024 01:18:10 -0700 +Subject: [PATCH] target/i386: Delete duplicated macro definition CR4_FRED_MASK + +commit a23bc6539890d8b27458cf56bc4ed0e0d3c2de3e upstream. + +Macro CR4_FRED_MASK is defined twice, delete one. + +Intel-SIG: commit a23bc6539890 target/i386: Delete duplicated macro definition CR4_FRED_MASK + +Signed-off-by: Xin Li (Intel) +Link: https://lore.kernel.org/r/20240807081813.735158-2-xin@zytor.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.h | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 1b9d922651..f022749c86 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -270,12 +270,6 @@ typedef enum X86Seg { + #define CR4_FRED_MASK 0 + #endif + +-#ifdef TARGET_X86_64 +-#define CR4_FRED_MASK (1ULL << 32) +-#else +-#define CR4_FRED_MASK 0 +-#endif +- + #define CR4_RESERVED_MASK \ + (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ + | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \ +-- +2.41.0.windows.1 + diff --git a/target-i386-Don-t-construct-a-all-zero-entry-for-CPU.patch b/target-i386-Don-t-construct-a-all-zero-entry-for-CPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..4f00f7337ff1906ff87355e8a69d1d1f2c099231 --- /dev/null +++ b/target-i386-Don-t-construct-a-all-zero-entry-for-CPU.patch @@ -0,0 +1,57 @@ +From e0b51ea0f229ea9c6788fa0da252e8100e30241e Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 14 Aug 2024 03:54:23 -0400 +Subject: [PATCH] target/i386: Don't construct a all-zero entry for CPUID[0xD + 0x3f] + +commit 00c8a933d95add3ce4afebbe491ca0fa398a9007 upstream. + +Currently, QEMU always constructs a all-zero CPUID entry for +CPUID[0xD 0x3f]. + +It's meaningless to construct such a leaf as the end of leaf 0xD. Rework +the logic of how subleaves of 0xD are constructed to get rid of such +all-zero value of subleaf 0x3f. + +Intel-SIG: commit 00c8a933d95a target/i386: Don't construct a all-zero entry for CPUID[0xD 0x3f] + +Signed-off-by: Xiaoyao Li +Link: https://lore.kernel.org/r/20240814075431.339209-2-xiaoyao.li@intel.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 850104f6b5..5057dfbd75 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1924,10 +1924,6 @@ int kvm_arch_init_vcpu(CPUState *cs) + case 0xb: + case 0xd: + for (j = 0; ; j++) { +- if (i == 0xd && j == 64) { +- break; +- } +- + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; +@@ -1943,7 +1939,12 @@ int kvm_arch_init_vcpu(CPUState *cs) + break; + } + if (i == 0xd && c->eax == 0) { +- continue; ++ if (j < 63) { ++ continue; ++ } else { ++ cpuid_i--; ++ break; ++ } + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " +-- +2.41.0.windows.1 + diff --git a/target-i386-Enable-fdp-excptn-only-and-zero-fcs-fds.patch b/target-i386-Enable-fdp-excptn-only-and-zero-fcs-fds.patch new file mode 100644 index 0000000000000000000000000000000000000000..b293a5563c7b2291ea52a215989ce7779df83aeb --- /dev/null +++ b/target-i386-Enable-fdp-excptn-only-and-zero-fcs-fds.patch @@ -0,0 +1,70 @@ +From 8c61e09f435ff3a965867b0496f01682d679182f Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 14 Aug 2024 03:54:24 -0400 +Subject: [PATCH] target/i386: Enable fdp-excptn-only and zero-fcs-fds + +commit 7dddc3bb875e7141ab25931d0f30a1c319bc8457 upstream. + +- CPUID.(EAX=07H,ECX=0H):EBX[bit 6]: x87 FPU Data Pointer updated only + on x87 exceptions if 1. + +- CPUID.(EAX=07H,ECX=0H):EBX[bit 13]: Deprecates FPU CS and FPU DS + values if 1. i.e., X87 FCS and FDS are always zero. + +Define names for them so that they can be exposed to guest with -cpu host. + +Also define the bit field MACROs so that named cpu models can add it as +well in the future. + +Intel-SIG: commit 7dddc3bb875e target/i386: Enable fdp-excptn-only and zero-fcs-fds + +Signed-off-by: Xiaoyao Li +Link: https://lore.kernel.org/r/20240814075431.339209-3-xiaoyao.li@intel.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 4 ++++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index dfc0f7fd2d..d0aa2fb5ff 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -906,9 +906,9 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "fsgsbase", "tsc-adjust", "sgx", "bmi1", +- "hle", "avx2", NULL, "smep", ++ "hle", "avx2", "fdp-excptn-only", "smep", + "bmi2", "erms", "invpcid", "rtm", +- NULL, NULL, "mpx", NULL, ++ NULL, "zero-fcs-fds", "mpx", NULL, + "avx512f", "avx512dq", "rdseed", "adx", + "smap", "avx512ifma", "pcommit", "clflushopt", + "clwb", "intel-pt", "avx512pf", "avx512er", +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index b90182582f..b883e5e1d6 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -809,6 +809,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define CPUID_7_0_EBX_HLE (1U << 4) + /* Intel Advanced Vector Extensions 2 */ + #define CPUID_7_0_EBX_AVX2 (1U << 5) ++/* FPU data pointer updated only on x87 exceptions */ ++#define CPUID_7_0_EBX_FDP_EXCPTN_ONLY (1u << 6) + /* Supervisor-mode Execution Prevention */ + #define CPUID_7_0_EBX_SMEP (1U << 7) + /* 2nd Group of Advanced Bit Manipulation Extensions */ +@@ -819,6 +821,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + #define CPUID_7_0_EBX_INVPCID (1U << 10) + /* Restricted Transactional Memory */ + #define CPUID_7_0_EBX_RTM (1U << 11) ++/* Zero out FPU CS and FPU DS */ ++#define CPUID_7_0_EBX_ZERO_FCS_FDS (1U << 13) + /* Memory Protection Extension */ + #define CPUID_7_0_EBX_MPX (1U << 14) + /* AVX-512 Foundation */ +-- +2.41.0.windows.1 + diff --git a/target-i386-Export-BHI_NO-bit-to-guests.patch b/target-i386-Export-BHI_NO-bit-to-guests.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f24a14d0556ca036325c2ef94121bfb05e98ae9 --- /dev/null +++ b/target-i386-Export-BHI_NO-bit-to-guests.patch @@ -0,0 +1,44 @@ +From bd65b82f94b07c90f856a34cb10d535b5301d9d9 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:48 +0800 +Subject: [PATCH] target/i386: Export BHI_NO bit to guests + +commit b611931d4f70b9a3e49e39c405c63b3b5e9c0df1 upstream. + +Branch History Injection (BHI) is a CPU side-channel vulnerability, where +an attacker may manipulate branch history before transitioning from user +to supervisor mode or from VMX non-root/guest to root mode. CPUs that set +BHI_NO bit in MSR IA32_ARCH_CAPABILITIES to indicate no additional +mitigation is required to prevent BHI. + +Make BHI_NO bit available to guests. + +Intel-SIG: commit b611931d4f70 target/i386: Export BHI_NO bit to guests. + +Tested-by: Xuelian Guo +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-3-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index bad30581ce..b5231432e7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1157,7 +1157,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "taa-no", NULL, NULL, NULL, + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, +- NULL, NULL, NULL, NULL, ++ "bhi-no", NULL, NULL, NULL, + "pbrsb-no", NULL, "gds-no", "rfds-no", + "rfds-clear", NULL, NULL, NULL, + }, +-- +2.41.0.windows.1 + diff --git a/target-i386-Export-RFDS-bit-to-guests.patch b/target-i386-Export-RFDS-bit-to-guests.patch new file mode 100644 index 0000000000000000000000000000000000000000..00561d6ddb4bde68c8702ee1524a2aedfc403cc4 --- /dev/null +++ b/target-i386-Export-RFDS-bit-to-guests.patch @@ -0,0 +1,47 @@ +From b167617657fa078c4ea14cf54138ff5a4ce180f3 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 13 Mar 2024 07:53:23 -0700 +Subject: [PATCH] target/i386: Export RFDS bit to guests + +commit 41bdd9812863c150284a9339a048ed88c40f4df7 upstream. + +Register File Data Sampling (RFDS) is a CPU side-channel vulnerability +that may expose stale register value. CPUs that set RFDS_NO bit in MSR +IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS. +Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has +the microcode to help mitigate RFDS. + +Make RFDS_CLEAR and RFDS_NO bits available to guests. + +Intel-SIG: commit 41bdd9812863 target/i386: Export RFDS bit to guests. +8.2.0-Add SRF CPU module support + +Signed-off-by: Pawan Gupta +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 57a832cea2..fd32c64f99 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1157,8 +1157,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, +- "pbrsb-no", NULL, "gds-no", NULL, +- NULL, NULL, NULL, NULL, ++ "pbrsb-no", NULL, "gds-no", "rfds-no", ++ "rfds-clear", NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, +-- +2.27.0 + diff --git a/target-i386-Export-TAA_NO-bit-to-guests.patch b/target-i386-Export-TAA_NO-bit-to-guests.patch deleted file mode 100644 index 8d995dc840bbace44c4d59b61f70167e960e2699..0000000000000000000000000000000000000000 --- a/target-i386-Export-TAA_NO-bit-to-guests.patch +++ /dev/null @@ -1,36 +0,0 @@ -From c828229e1dc4a3d0837071db4c08f7860dc24755 Mon Sep 17 00:00:00 2001 -From: Pawan Gupta -Date: Mon, 18 Nov 2019 23:23:27 -0800 -Subject: [PATCH] target/i386: Export TAA_NO bit to guests - -TSX Async Abort (TAA) is a side channel attack on internal buffers in -some Intel processors similar to Microachitectural Data Sampling (MDS). - -Some future Intel processors will use the ARCH_CAP_TAA_NO bit in the -IA32_ARCH_CAPABILITIES MSR to report that they are not vulnerable to -TAA. Make this bit available to guests. - -Signed-off-by: Pawan Gupta -Signed-off-by: Paolo Bonzini - -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 29836cb2a5..5af4fca350 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1209,7 +1209,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .feat_names = { - "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", - "ssb-no", "mds-no", "pschange-mc-no", NULL, -- NULL, NULL, NULL, NULL, -+ "taa-no", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, --- -2.27.0 - diff --git a/target-i386-Fix-minor-typo-in-NO_NESTED_DATA_BP-feat.patch b/target-i386-Fix-minor-typo-in-NO_NESTED_DATA_BP-feat.patch new file mode 100644 index 0000000000000000000000000000000000000000..448226ed7d515ca64a9010fa14268b747d58b6c8 --- /dev/null +++ b/target-i386-Fix-minor-typo-in-NO_NESTED_DATA_BP-feat.patch @@ -0,0 +1,50 @@ +From c006b5b78ffe7e6af76cde943a9fdd082473ba55 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 18 Nov 2024 15:45:24 -0500 +Subject: [PATCH] target/i386: Fix minor typo in NO_NESTED_DATA_BP feature bit + +cheery-pick from 9c882ad4dc96f658ff9f92b88b3749d0398e6fa2 + +Rename CPUID_8000_0021_EAX_No_NESTED_DATA_BP to + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP. + +No functional change intended. + +Signed-off-by: Babu Moger +Link: https://lore.kernel.org/r/a6749acd125670d3930f4ca31736a91b1d965f2f.1729807947.git.babu.moger@amd.com +Signed-off-by: Paolo Bonzini +Signed-off-by: qihao_yewu +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index ca7e5337b0..c2dc929eaa 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5063,7 +5063,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = +- CPUID_8000_0021_EAX_No_NESTED_DATA_BP | ++ CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 34f9615b98..6ca185cd9d 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -971,7 +971,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + + /* Processor ignores nested data breakpoints */ +-#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) ++#define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) + /* LFENCE is always serializing */ + #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) + /* Null Selector Clears Base */ +-- +2.41.0.windows.1 + diff --git a/target-i386-Introduce-Denverton-CPU-model.patch b/target-i386-Introduce-Denverton-CPU-model.patch deleted file mode 100644 index 3e9debe339e6d1b0f7c0ca0ad019129de668363b..0000000000000000000000000000000000000000 --- a/target-i386-Introduce-Denverton-CPU-model.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 7d602cefa04f4992d913683c1a5826abc4806e41 Mon Sep 17 00:00:00 2001 -From: Tao Xu -Date: Thu, 18 Jul 2019 15:34:05 +0800 -Subject: [PATCH] target/i386: Introduce Denverton CPU model - -Denverton is the Atom Processor of Intel Harrisonville platform. - -For more information: -https://ark.intel.com/content/www/us/en/ark/products/\ -codename/63508/denverton.html - -Signed-off-by: Tao Xu -Message-Id: <20190718073405.28301-1-tao3.xu@intel.com> -Signed-off-by: Eduardo Habkost ---- - target/i386/cpu.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 47 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5af4fca350..d3742ef4ac 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2552,6 +2552,53 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Icelake)", - }, -+ { -+ .name = "Denverton", -+ .level = 21, -+ .vendor = CPUID_VENDOR_INTEL, -+ .family = 6, -+ .model = 95, -+ .stepping = 1, -+ .features[FEAT_1_EDX] = -+ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | -+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | -+ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | -+ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | -+ CPUID_SSE | CPUID_SSE2, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_MONITOR | -+ CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_SSE41 | -+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | -+ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | -+ CPUID_EXT_AES | CPUID_EXT_XSAVE | CPUID_EXT_RDRAND, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_ERMS | -+ CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_SMAP | -+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_SHA_NI, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | -+ CPUID_7_0_EDX_SPEC_CTRL_SSBD, -+ /* -+ * Missing: XSAVES (not supported by some Linux versions, -+ * including v4.1 to v4.12). -+ * KVM doesn't yet expose any XSAVES state save component, -+ * and the only one defined in Skylake (processor tracing) -+ * probably will block migration anyway. -+ */ -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_ARCH_CAPABILITIES] = -+ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY, -+ .xlevel = 0x80000008, -+ .model_id = "Intel Atom Processor (Denverton)", -+ }, - { - .name = "Snowridge", - .level = 27, --- -2.27.0 - diff --git a/target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch b/target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ecd0ba1d3a70a4c14fe6da4bf868f4ca4e73767 --- /dev/null +++ b/target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch @@ -0,0 +1,64 @@ +From 8f2e7e0ebc4351d61091669137a4e26b78f3cb27 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 20 Mar 2024 17:31:38 +0800 +Subject: [PATCH] target/i386: Introduce Icelake-Server-v7 to enable TSX + +commit c895fa54e3060c5ac6f3888dce96c9b78626072b upstream. + +When start L2 guest with both L1/L2 using Icelake-Server-v3 or above, +QEMU reports below warning: + +"warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]" + +Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no +bit. It's meaningless that TSX isn't supported but still claim TSX is secure. +So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2 +triggers the warning. + +Fix it by introducing a new version Icelake-Server-v7 which has both TSX +and taa-no features. Then guest can use TSX securely when it see taa-no. + +This matches the production Icelake which supports TSX and isn't susceptible +to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no. + +Ideally, TSX should have being enabled together with taa-no since v3, but for +compatibility, we'd better to add v7 to enable it. + +Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model") +Intel-SIG: commit c895fa54e306 target/i386: Introduce Icelake-Server-v7 to enable TSX. +8.2.0-Add SRF CPU module support + +Tested-by: Xiangfei Ma +Signed-off-by: Zhenzhong Duan +Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com> +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 491cf40cc7..6abe33946c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3822,6 +3822,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 7, ++ .note = "TSX, taa-no", ++ .props = (PropValue[]) { ++ /* Restore TSX features removed by -v2 above */ ++ { "hle", "on" }, ++ { "rtm", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.27.0 + diff --git a/target-i386-Introduce-SapphireRapids-v3-to-add-missi.patch b/target-i386-Introduce-SapphireRapids-v3-to-add-missi.patch new file mode 100644 index 0000000000000000000000000000000000000000..420fcf1c81b895c9e228cf5af7530afd6fc6bb10 --- /dev/null +++ b/target-i386-Introduce-SapphireRapids-v3-to-add-missi.patch @@ -0,0 +1,48 @@ +From 3323c09d283e02c10bbf6e8dfc43ea9f41e746db Mon Sep 17 00:00:00 2001 +From: Lei Wang +Date: Wed, 24 Apr 2024 03:29:12 -0400 +Subject: [PATCH] target/i386: Introduce SapphireRapids-v3 to add missing + features + +commit b10b2481738304db13d28252e86c10555121a5b3 upstream. + +Add the missing features(ss, tsc-adjust, cldemote, movdiri, movdir64b) in +the SapphireRapids-v3 CPU model. + +Intel-SIG: commit b10b24817383 target/i386: Introduce SapphireRapids-v3 to add missing features. +8.2-SPR new model support + +Signed-off-by: Lei Wang +Message-ID: <20240424072912.43188-1-lei4.wang@intel.com> +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 19ebd49e8c..ca7e5337b0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4020,6 +4020,17 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .version = 3, ++ .props = (PropValue[]) { ++ { "ss", "on" }, ++ { "tsc-adjust", "on" }, ++ { "cldemote", "on" }, ++ { "movdiri", "on" }, ++ { "movdir64b", "on" }, ++ { /* end of list */ } ++ } ++ }, + { /* end of list */ } + } + }, +-- +2.41.0.windows.1 + diff --git a/target-i386-Introduce-SierraForest-v2-model.patch b/target-i386-Introduce-SierraForest-v2-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..09d04d938f733f57cd5e0108353454a01b381985 --- /dev/null +++ b/target-i386-Introduce-SierraForest-v2-model.patch @@ -0,0 +1,62 @@ +From 79a6baa688a19242512a753ab240a2238bb7ed7e Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Tue, 21 Jan 2025 10:06:47 +0800 +Subject: [PATCH] target/i386: Introduce SierraForest-v2 model + +commit c597ff5339a9918b00d9f4160126db0ac2a423cc upstream. + +Update SierraForest CPU model to add LAM, 4 bits indicating certain bits +of IA32_SPEC_CTR are supported(intel-psfd, ipred-ctrl, rrsba-ctrl, +bhi-ctrl) and the missing features(ss, tsc-adjust, cldemote, movdiri, +movdir64b) + +Also add GDS-NO and RFDS-NO to indicate the related vulnerabilities are +mitigated in stepping 3. + +Intel-SIG: commit c597ff5339a9 target/i386: Introduce SierraForest-v2 model. +Add SRF CPU model support + +Tested-by: Xuelian Guo +Signed-off-by: Tao Su +Reviewed-by: Zhao Liu +Link: https://lore.kernel.org/r/20250121020650.1899618-2-tao1.su@linux.intel.com +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 20358ffa91..bad30581ce 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4315,6 +4315,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Intel Xeon Processor (SierraForest)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, ++ { ++ .version = 2, ++ .props = (PropValue[]) { ++ { "ss", "on" }, ++ { "tsc-adjust", "on" }, ++ { "cldemote", "on" }, ++ { "movdiri", "on" }, ++ { "movdir64b", "on" }, ++ { "gds-no", "on" }, ++ { "rfds-no", "on" }, ++ { "lam", "on" }, ++ { "intel-psfd", "on"}, ++ { "ipred-ctrl", "on"}, ++ { "rrsba-ctrl", "on"}, ++ { "bhi-ctrl", "on"}, ++ { "stepping", "3" }, ++ { /* end of list */ } ++ } ++ }, + { /* end of list */ }, + }, + }, +-- +2.41.0.windows.1 + diff --git a/target-i386-Introduce-header-file-csv.h.patch b/target-i386-Introduce-header-file-csv.h.patch new file mode 100644 index 0000000000000000000000000000000000000000..a31acbf71439bbdf43b08d3562fdfcffa789cdb8 --- /dev/null +++ b/target-i386-Introduce-header-file-csv.h.patch @@ -0,0 +1,107 @@ +From 2bdf07593dbec66205f2f20fa5430595678ded89 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Thu, 14 Mar 2024 19:21:11 +0800 +Subject: [PATCH] target/i386: Introduce header file csv.h + +This header file is used to provide common helper functions +and data structures for Hygon CSV. + +Signed-off-by: hanliyang +--- + configs/devices/i386-softmmu/default.mak | 1 + + hw/i386/Kconfig | 5 +++ + target/i386/csv.h | 47 ++++++++++++++++++++++++ + 3 files changed, 53 insertions(+) + create mode 100644 target/i386/csv.h + +diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak +index 598c6646df..db83ffcab9 100644 +--- a/configs/devices/i386-softmmu/default.mak ++++ b/configs/devices/i386-softmmu/default.mak +@@ -23,6 +23,7 @@ + #CONFIG_TPM_TIS_ISA=n + #CONFIG_VTD=n + #CONFIG_SGX=n ++#CONFIG_CSV=n + + # Boards: + # +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index 55850791df..08f3ae43f8 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -10,6 +10,10 @@ config SGX + bool + depends on KVM + ++config CSV ++ bool ++ depends on SEV ++ + config PC + bool + imply APPLESMC +@@ -26,6 +30,7 @@ config PC + imply QXL + imply SEV + imply SGX ++ imply CSV + imply TEST_DEVICES + imply TPM_CRB + imply TPM_TIS_ISA +diff --git a/target/i386/csv.h b/target/i386/csv.h +new file mode 100644 +index 0000000000..f935babe97 +--- /dev/null ++++ b/target/i386/csv.h +@@ -0,0 +1,47 @@ ++/* ++ * QEMU CSV support ++ * ++ * Copyright: Hygon Info Technologies Ltd. 2022 ++ * ++ * Author: ++ * Jiang Xin ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#ifndef I386_CSV_H ++#define I386_CSV_H ++ ++#ifdef CONFIG_CSV ++ ++#include "cpu.h" ++ ++#define CPUID_VENDOR_HYGON_EBX 0x6f677948 /* "Hygo" */ ++#define CPUID_VENDOR_HYGON_ECX 0x656e6975 /* "uine" */ ++#define CPUID_VENDOR_HYGON_EDX 0x6e65476e /* "nGen" */ ++ ++static bool __attribute__((unused)) is_hygon_cpu(void) ++{ ++ uint32_t ebx = 0; ++ uint32_t ecx = 0; ++ uint32_t edx = 0; ++ ++ host_cpuid(0, 0, NULL, &ebx, &ecx, &edx); ++ ++ if (ebx == CPUID_VENDOR_HYGON_EBX && ++ ecx == CPUID_VENDOR_HYGON_ECX && ++ edx == CPUID_VENDOR_HYGON_EDX) ++ return true; ++ else ++ return false; ++} ++ ++#else ++ ++#define is_hygon_cpu() (false) ++ ++#endif ++ ++#endif +-- +2.41.0.windows.1 + diff --git a/target-i386-Make-invtsc-migratable-when-user-sets-ts.patch b/target-i386-Make-invtsc-migratable-when-user-sets-ts.patch new file mode 100644 index 0000000000000000000000000000000000000000..df9b7441716a6a9e0ddf2a96a902d593cc84055b --- /dev/null +++ b/target-i386-Make-invtsc-migratable-when-user-sets-ts.patch @@ -0,0 +1,66 @@ +From 07a671dc3e3baedb650b307c36d69bef869c2480 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 14 Aug 2024 03:54:31 -0400 +Subject: [PATCH] target/i386: Make invtsc migratable when user sets tsc-khz + explicitly + +commit 87c88db3143e91076d167a62dd7febf49afca8a2 upstream. + +When user sets tsc-frequency explicitly, the invtsc feature is actually +migratable because the tsc-frequency is supposed to be fixed during the +migration. + +See commit d99569d9d856 ("kvm: Allow invtsc migration if tsc-khz +is set explicitly") for referrence. + +Intel-SIG: commit 87c88db3143e target/i386: Make invtsc migratable when user sets tsc-khz explicitly + +Signed-off-by: Xiaoyao Li +Link: https://lore.kernel.org/r/20240814075431.339209-10-xiaoyao.li@intel.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index d0aa2fb5ff..20358ffa91 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1685,9 +1685,10 @@ static inline uint64_t x86_cpu_xsave_xss_components(X86CPU *cpu) + * Returns the set of feature flags that are supported and migratable by + * QEMU, for a given FeatureWord. + */ +-static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) ++static uint64_t x86_cpu_get_migratable_flags(X86CPU *cpu, FeatureWord w) + { + FeatureWordInfo *wi = &feature_word_info[w]; ++ CPUX86State *env = &cpu->env; + uint64_t r = 0; + int i; + +@@ -1701,6 +1702,12 @@ static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) + r |= f; + } + } ++ ++ /* when tsc-khz is set explicitly, invtsc is migratable */ ++ if ((w == FEAT_8000_0007_EDX) && env->user_tsc_khz) { ++ r |= CPUID_APM_INVTSC; ++ } ++ + return r; + } + +@@ -6002,7 +6009,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w) + } + #endif + if (cpu && cpu->migratable) { +- r &= x86_cpu_get_migratable_flags(w); ++ r &= x86_cpu_get_migratable_flags(cpu, w); + } + return r; + } +-- +2.41.0.windows.1 + diff --git a/target-i386-Raise-the-highest-index-value-used-for-a.patch b/target-i386-Raise-the-highest-index-value-used-for-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..57db409451982642754fc92a09b7231e5d41d57f --- /dev/null +++ b/target-i386-Raise-the-highest-index-value-used-for-a.patch @@ -0,0 +1,66 @@ +From 513d33050869a337262fdba0a2d064e7ce9fdb22 Mon Sep 17 00:00:00 2001 +From: Lei Wang +Date: Wed, 7 Aug 2024 01:18:12 -0700 +Subject: [PATCH] target/i386: Raise the highest index value used for any VMCS + encoding + +commit ab891454ebe82f7e359be721007652556f9f8356 upstream. + +Because the index value of the VMCS field encoding of FRED injected-event +data (one of the newly added VMCS fields for FRED transitions), 0x52, is +larger than any existing index value, raise the highest index value used +for any VMCS encoding to 0x52. + +Because the index value of the VMCS field encoding of Secondary VM-exit +controls, 0x44, is larger than any existing index value, raise the highest +index value used for any VMCS encoding to 0x44. + +Intel-SIG: commit ab891454ebe8 target/i386: Raise the highest index value used for any VMCS encoding + +Co-developed-by: Xin Li +Signed-off-by: Xin Li +Signed-off-by: Lei Wang +Signed-off-by: Xin Li (Intel) +Link: https://lore.kernel.org/r/20240807081813.735158-4-xin@zytor.com +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.h | 1 + + target/i386/kvm/kvm.c | 9 ++++++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index f022749c86..fb6721f182 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1166,6 +1166,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define VMX_VM_EXIT_PT_CONCEAL_PIP 0x01000000 + #define VMX_VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 + #define VMX_VM_EXIT_LOAD_IA32_PKRS 0x20000000 ++#define VMX_VM_EXIT_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + + #define VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 + #define VMX_VM_ENTRY_IA32E_MODE 0x00000200 +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5f3497e122..ce96ed9158 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3254,7 +3254,14 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) + kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0, + CR4_VMXE_MASK); + +- if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) { ++ if (f[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { ++ /* FRED injected-event data (0x2052). */ ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x52); ++ } else if (f[FEAT_VMX_EXIT_CTLS] & ++ VMX_VM_EXIT_ACTIVATE_SECONDARY_CONTROLS) { ++ /* Secondary VM-exit controls (0x2044). */ ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x44); ++ } else if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) { + /* TSC multiplier (0x2032). */ + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32); + } else { +-- +2.41.0.windows.1 + diff --git a/target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch b/target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch deleted file mode 100644 index 6cba87b020b3f5a01fbc2f74958d8e9c03a9d1a2..0000000000000000000000000000000000000000 --- a/target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 4372535d5f2f50b24d14ec8a3393aebec938fb61 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Wed, 13 Nov 2019 15:54:35 +0100 -Subject: [PATCH] target/i386: add PSCHANGE_NO bit for the ARCH_CAPABILITIES - MSR - -This is required to disable ITLB multihit mitigations in nested -hypervisors. - -Signed-off-by: Paolo Bonzini - -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 50d6ef9de4..29836cb2a5 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1208,7 +1208,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .type = MSR_FEATURE_WORD, - .feat_names = { - "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", -- "ssb-no", "mds-no", NULL, NULL, -+ "ssb-no", "mds-no", "pschange-mc-no", NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, --- -2.27.0 - diff --git a/target-i386-add-VMX-definitions.patch b/target-i386-add-VMX-definitions.patch deleted file mode 100644 index 4365e3a7a1082abf15b5e9c51a7c3801e27ee806..0000000000000000000000000000000000000000 --- a/target-i386-add-VMX-definitions.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 9fb16fc548fca297086be0efe20345160660f340 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 1 Jul 2019 18:24:52 +0200 -Subject: [PATCH] target/i386: add VMX definitions - -These will be used to compile the list of VMX features for named -CPU models, and/or by the code that sets up the VMX MSRs. - -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.h | 130 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 130 insertions(+) - -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 9a105b2251..b4be6ffb1f 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -452,6 +452,25 @@ typedef enum X86Seg { - #define MSR_IA32_BNDCFGS 0x00000d90 - #define MSR_IA32_XSS 0x00000da0 - -+#define MSR_IA32_VMX_BASIC 0x00000480 -+#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 -+#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 -+#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 -+#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 -+#define MSR_IA32_VMX_MISC 0x00000485 -+#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 -+#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 -+#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 -+#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 -+#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a -+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b -+#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c -+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d -+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e -+#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f -+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 -+#define MSR_IA32_VMX_VMFUNC 0x00000491 -+ - #define XSTATE_FP_BIT 0 - #define XSTATE_SSE_BIT 1 - #define XSTATE_YMM_BIT 2 -@@ -757,6 +776,117 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - -+/* VMX MSR features */ -+#define MSR_VMX_BASIC_VMCS_REVISION_MASK 0x7FFFFFFFull -+#define MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK (0x00001FFFull << 32) -+#define MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK (0x003C0000ull << 32) -+#define MSR_VMX_BASIC_DUAL_MONITOR (1ULL << 49) -+#define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) -+#define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) -+ -+#define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full -+#define MSR_VMX_MISC_STORE_LMA (1ULL << 5) -+#define MSR_VMX_MISC_ACTIVITY_HLT (1ULL << 6) -+#define MSR_VMX_MISC_ACTIVITY_SHUTDOWN (1ULL << 7) -+#define MSR_VMX_MISC_ACTIVITY_WAIT_SIPI (1ULL << 8) -+#define MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK 0x0E000000ull -+#define MSR_VMX_MISC_VMWRITE_VMEXIT (1ULL << 29) -+#define MSR_VMX_MISC_ZERO_LEN_INJECT (1ULL << 30) -+ -+#define MSR_VMX_EPT_EXECONLY (1ULL << 0) -+#define MSR_VMX_EPT_PAGE_WALK_LENGTH_4 (1ULL << 6) -+#define MSR_VMX_EPT_PAGE_WALK_LENGTH_5 (1ULL << 7) -+#define MSR_VMX_EPT_UC (1ULL << 8) -+#define MSR_VMX_EPT_WB (1ULL << 14) -+#define MSR_VMX_EPT_2MB (1ULL << 16) -+#define MSR_VMX_EPT_1GB (1ULL << 17) -+#define MSR_VMX_EPT_INVEPT (1ULL << 20) -+#define MSR_VMX_EPT_AD_BITS (1ULL << 21) -+#define MSR_VMX_EPT_ADVANCED_VMEXIT_INFO (1ULL << 22) -+#define MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT (1ULL << 25) -+#define MSR_VMX_EPT_INVEPT_ALL_CONTEXT (1ULL << 26) -+#define MSR_VMX_EPT_INVVPID (1ULL << 32) -+#define MSR_VMX_EPT_INVVPID_SINGLE_ADDR (1ULL << 40) -+#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT (1ULL << 41) -+#define MSR_VMX_EPT_INVVPID_ALL_CONTEXT (1ULL << 42) -+#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS (1ULL << 43) -+ -+#define MSR_VMX_VMFUNC_EPT_SWITCHING (1ULL << 0) -+ -+ -+/* VMX controls */ -+#define VMX_CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -+#define VMX_CPU_BASED_USE_TSC_OFFSETING 0x00000008 -+#define VMX_CPU_BASED_HLT_EXITING 0x00000080 -+#define VMX_CPU_BASED_INVLPG_EXITING 0x00000200 -+#define VMX_CPU_BASED_MWAIT_EXITING 0x00000400 -+#define VMX_CPU_BASED_RDPMC_EXITING 0x00000800 -+#define VMX_CPU_BASED_RDTSC_EXITING 0x00001000 -+#define VMX_CPU_BASED_CR3_LOAD_EXITING 0x00008000 -+#define VMX_CPU_BASED_CR3_STORE_EXITING 0x00010000 -+#define VMX_CPU_BASED_CR8_LOAD_EXITING 0x00080000 -+#define VMX_CPU_BASED_CR8_STORE_EXITING 0x00100000 -+#define VMX_CPU_BASED_TPR_SHADOW 0x00200000 -+#define VMX_CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 -+#define VMX_CPU_BASED_MOV_DR_EXITING 0x00800000 -+#define VMX_CPU_BASED_UNCOND_IO_EXITING 0x01000000 -+#define VMX_CPU_BASED_USE_IO_BITMAPS 0x02000000 -+#define VMX_CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 -+#define VMX_CPU_BASED_USE_MSR_BITMAPS 0x10000000 -+#define VMX_CPU_BASED_MONITOR_EXITING 0x20000000 -+#define VMX_CPU_BASED_PAUSE_EXITING 0x40000000 -+#define VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 -+ -+#define VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -+#define VMX_SECONDARY_EXEC_ENABLE_EPT 0x00000002 -+#define VMX_SECONDARY_EXEC_DESC 0x00000004 -+#define VMX_SECONDARY_EXEC_RDTSCP 0x00000008 -+#define VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 -+#define VMX_SECONDARY_EXEC_ENABLE_VPID 0x00000020 -+#define VMX_SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -+#define VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 -+#define VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 -+#define VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 -+#define VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 -+#define VMX_SECONDARY_EXEC_RDRAND_EXITING 0x00000800 -+#define VMX_SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 -+#define VMX_SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 -+#define VMX_SECONDARY_EXEC_SHADOW_VMCS 0x00004000 -+#define VMX_SECONDARY_EXEC_ENCLS_EXITING 0x00008000 -+#define VMX_SECONDARY_EXEC_RDSEED_EXITING 0x00010000 -+#define VMX_SECONDARY_EXEC_ENABLE_PML 0x00020000 -+#define VMX_SECONDARY_EXEC_XSAVES 0x00100000 -+ -+#define VMX_PIN_BASED_EXT_INTR_MASK 0x00000001 -+#define VMX_PIN_BASED_NMI_EXITING 0x00000008 -+#define VMX_PIN_BASED_VIRTUAL_NMIS 0x00000020 -+#define VMX_PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 -+#define VMX_PIN_BASED_POSTED_INTR 0x00000080 -+ -+#define VMX_VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 -+#define VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 -+#define VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 -+#define VMX_VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 -+#define VMX_VM_EXIT_SAVE_IA32_PAT 0x00040000 -+#define VMX_VM_EXIT_LOAD_IA32_PAT 0x00080000 -+#define VMX_VM_EXIT_SAVE_IA32_EFER 0x00100000 -+#define VMX_VM_EXIT_LOAD_IA32_EFER 0x00200000 -+#define VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 -+#define VMX_VM_EXIT_CLEAR_BNDCFGS 0x00800000 -+#define VMX_VM_EXIT_PT_CONCEAL_PIP 0x01000000 -+#define VMX_VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 -+ -+#define VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 -+#define VMX_VM_ENTRY_IA32E_MODE 0x00000200 -+#define VMX_VM_ENTRY_SMM 0x00000400 -+#define VMX_VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 -+#define VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 -+#define VMX_VM_ENTRY_LOAD_IA32_PAT 0x00004000 -+#define VMX_VM_ENTRY_LOAD_IA32_EFER 0x00008000 -+#define VMX_VM_ENTRY_LOAD_BNDCFGS 0x00010000 -+#define VMX_VM_ENTRY_PT_CONCEAL_PIP 0x00020000 -+#define VMX_VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 -+ - /* Supported Hyper-V Enlightenments */ - #define HYPERV_FEAT_RELAXED 0 - #define HYPERV_FEAT_VAPIC 1 --- -2.27.0 - diff --git a/target-i386-add-VMX-features-to-named-CPU-models.patch b/target-i386-add-VMX-features-to-named-CPU-models.patch deleted file mode 100644 index ab42b83785e5e5fd463f64415611b98ebe06066b..0000000000000000000000000000000000000000 --- a/target-i386-add-VMX-features-to-named-CPU-models.patch +++ /dev/null @@ -1,980 +0,0 @@ -From 5a63a16d709c89b25a0a9c3c7fdf765f26dac312 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Wed, 20 Nov 2019 18:37:53 +0100 -Subject: [PATCH] target/i386: add VMX features to named CPU models - -This allows using "-cpu Haswell,+vmx", which we did not really want to -support in QEMU but was produced by Libvirt when using the "host-model" -CPU model. Without this patch, no VMX feature is _actually_ supported -(only the basic instruction set extensions are) and KVM fails to load -in the guest. - -This was produced from the output of scripts/kvm/vmxcap using the following -very ugly Python script: - - bits = { - 'INS/OUTS instruction information': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_INS_OUTS'], - 'IA32_VMX_TRUE_*_CTLS support': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_TRUE_CTLS'], - 'External interrupt exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_EXT_INTR_MASK'], - 'NMI exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_NMI_EXITING'], - 'Virtual NMIs': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VIRTUAL_NMIS'], - 'Activate VMX-preemption timer': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VMX_PREEMPTION_TIMER'], - 'Process posted interrupts': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_POSTED_INTR'], - 'Interrupt window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_INTR_PENDING'], - 'Use TSC offsetting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_TSC_OFFSETING'], - 'HLT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_HLT_EXITING'], - 'INVLPG exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_INVLPG_EXITING'], - 'MWAIT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MWAIT_EXITING'], - 'RDPMC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDPMC_EXITING'], - 'RDTSC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDTSC_EXITING'], - 'CR3-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_LOAD_EXITING'], - 'CR3-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_STORE_EXITING'], - 'CR8-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_LOAD_EXITING'], - 'CR8-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_STORE_EXITING'], - 'Use TPR shadow': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_TPR_SHADOW'], - 'NMI-window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_NMI_PENDING'], - 'MOV-DR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MOV_DR_EXITING'], - 'Unconditional I/O exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_UNCOND_IO_EXITING'], - 'Use I/O bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_IO_BITMAPS'], - 'Monitor trap flag': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_TRAP_FLAG'], - 'Use MSR bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_MSR_BITMAPS'], - 'MONITOR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_EXITING'], - 'PAUSE exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_PAUSE_EXITING'], - 'Activate secondary control': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS'], - 'Virtualize APIC accesses': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES'], - 'Enable EPT': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_EPT'], - 'Descriptor-table exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_DESC'], - 'Enable RDTSCP': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDTSCP'], - 'Virtualize x2APIC mode': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE'], - 'Enable VPID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VPID'], - 'WBINVD exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_WBINVD_EXITING'], - 'Unrestricted guest': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST'], - 'APIC register emulation': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT'], - 'Virtual interrupt delivery': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY'], - 'PAUSE-loop exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING'], - 'RDRAND exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDRAND_EXITING'], - 'Enable INVPCID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_INVPCID'], - 'Enable VM functions': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VMFUNC'], - 'VMCS shadowing': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_SHADOW_VMCS'], - 'RDSEED exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDSEED_EXITING'], - 'Enable PML': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_PML'], - 'Enable XSAVES/XRSTORS': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_XSAVES'], - 'Save debug controls': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_DEBUG_CONTROLS'], - 'Load IA32_PERF_GLOBAL_CTRL': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL'], - 'Acknowledge interrupt on exit': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_ACK_INTR_ON_EXIT'], - 'Save IA32_PAT': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_IA32_PAT'], - 'Load IA32_PAT': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_PAT'], - 'Save IA32_EFER': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_IA32_EFER'], - 'Load IA32_EFER': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_EFER'], - 'Save VMX-preemption timer value': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER'], - 'Clear IA32_BNDCFGS': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_CLEAR_BNDCFGS'], - 'Load debug controls': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS'], - 'IA-32e mode guest': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_IA32E_MODE'], - 'Load IA32_PERF_GLOBAL_CTRL': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL'], - 'Load IA32_PAT': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_IA32_PAT'], - 'Load IA32_EFER': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_IA32_EFER'], - 'Load IA32_BNDCFGS': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_BNDCFGS'], - 'Store EFER.LMA into IA-32e mode guest control': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_STORE_LMA'], - 'HLT activity state': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_ACTIVITY_HLT'], - 'VMWRITE to VM-exit information fields': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_VMWRITE_VMEXIT'], - 'Inject event with insn length=0': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_ZERO_LEN_INJECT'], - 'Execute-only EPT translations': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_EXECONLY'], - 'Page-walk length 4': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_PAGE_WALK_LENGTH_4'], - 'Paging-structure memory type WB': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_WB'], - '2MB EPT pages': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB'], - 'INVEPT supported': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVEPT'], - 'EPT accessed and dirty flags': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_AD_BITS'], - 'Single-context INVEPT': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT'], - 'All-context INVEPT': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVEPT_ALL_CONTEXT'], - 'INVVPID supported': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID'], - 'Individual-address INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_SINGLE_ADDR'], - 'Single-context INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT'], - 'All-context INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_ALL_CONTEXT'], - 'Single-context-retaining-globals INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS'], - 'EPTP Switching': ['FEAT_VMX_VMFUNC', 'MSR_VMX_VMFUNC_EPT_SWITCHING'] - } - - import sys - import textwrap - - out = {} - for l in sys.stdin.readlines(): - l = l.rstrip() - if l.endswith('!!'): - l = l[:-2].rstrip() - if l.startswith(' ') and (l.endswith('default') or l.endswith('yes')): - l = l[4:] - for key, value in bits.items(): - if l.startswith(key): - ctl, bit = value - if ctl in out: - out[ctl] = out[ctl] + ' | ' - else: - out[ctl] = ' [%s] = ' % ctl - out[ctl] = out[ctl] + bit - - for x in sorted(out.keys()): - print("\n ".join(textwrap.wrap(out[x] + ","))) - -Note that the script has a bug in that some keys apply to both VM entry -and VM exit controls ("load IA32_PERF_GLOBAL_CTRL", "load IA32_EFER", -"load IA32_PAT". Those have to be fixed by hand. - -Reviewed-by: Eduardo Habkost -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.c | 705 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 705 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index fd248a78db..2f32d67aa5 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1799,6 +1799,34 @@ static CPUCaches epyc_cache_info = { - }, - }; - -+/* The following VMX features are not supported by KVM and are left out in the -+ * CPU definitions: -+ * -+ * Dual-monitor support (all processors) -+ * Entry to SMM -+ * Deactivate dual-monitor treatment -+ * Number of CR3-target values -+ * Shutdown activity state -+ * Wait-for-SIPI activity state -+ * PAUSE-loop exiting (Westmere and newer) -+ * EPT-violation #VE (Broadwell and newer) -+ * Inject event with insn length=0 (Skylake and newer) -+ * Conceal non-root operation from PT -+ * Conceal VM exits from PT -+ * Conceal VM entries from PT -+ * Enable ENCLS exiting -+ * Mode-based execute control (XS/XU) -+ s TSC scaling (Skylake Server and newer) -+ * GPA translation for PT (IceLake and newer) -+ * User wait and pause -+ * ENCLV exiting -+ * Load IA32_RTIT_CTL -+ * Clear IA32_RTIT_CTL -+ * Advanced VM-exit information for EPT violations -+ * Sub-page write permissions -+ * PT in VMX operation -+ */ -+ - static X86CPUDefinition builtin_x86_defs[] = { - { - .name = "qemu64", -@@ -1873,6 +1901,24 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, -+ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, - .xlevel = 0x80000008, - .model_id = "Intel(R) Core(TM)2 Duo CPU T7700 @ 2.40GHz", - }, -@@ -1900,6 +1946,20 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT3_OSVW, CPUID_EXT3_IBS, CPUID_EXT3_SVM */ - .features[FEAT_8000_0001_ECX] = - 0, -+ /* VMX features from Cedar Mill/Prescott */ -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, -+ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING, - .xlevel = 0x80000008, - .model_id = "Common KVM processor" - }, -@@ -1931,6 +1991,19 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT_SSE3, - .features[FEAT_8000_0001_ECX] = - 0, -+ /* VMX features from Yonah */ -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, -+ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | -+ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | -+ VMX_CPU_BASED_PAUSE_EXITING | VMX_CPU_BASED_USE_MSR_BITMAPS, - .xlevel = 0x80000008, - .model_id = "Common 32-bit KVM processor" - }, -@@ -1952,6 +2025,18 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT_SSE3 | CPUID_EXT_MONITOR, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_NX, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, -+ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | -+ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | -+ VMX_CPU_BASED_PAUSE_EXITING | VMX_CPU_BASED_USE_MSR_BITMAPS, - .xlevel = 0x80000008, - .model_id = "Genuine Intel(R) CPU T2600 @ 2.16GHz", - }, -@@ -2062,6 +2147,24 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, -+ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, - .xlevel = 0x80000008, - .model_id = "Intel Celeron_4x0 (Conroe/Merom Class Core 2)", - }, -@@ -2085,6 +2188,27 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, -+ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING, - .xlevel = 0x80000008, - .model_id = "Intel Core 2 Duo P9xxx (Penryn Class Core 2)", - }, -@@ -2108,6 +2232,46 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID, - .xlevel = 0x80000008, - .model_id = "Intel Core i7 9xx (Nehalem Class Core i7)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2148,6 +2312,47 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_EXT3_LAHF_LM, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST, - .xlevel = 0x80000008, - .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2193,6 +2398,47 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST, - .xlevel = 0x80000008, - .model_id = "Intel Xeon E312xx (Sandy Bridge)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2241,6 +2487,50 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING, - .xlevel = 0x80000008, - .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2292,6 +2582,52 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, -+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Haswell)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2376,6 +2712,53 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, -+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Broadwell)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2460,6 +2843,51 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, -+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Skylake)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2524,6 +2952,52 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Skylake)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2594,6 +3068,52 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Cascadelake)", - .versions = (X86CPUVersionDefinition[]) { -@@ -2724,6 +3244,51 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, -+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Icelake)", - }, -@@ -2782,6 +3347,52 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Icelake)", - }, -@@ -2829,6 +3440,53 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_6_EAX_ARAT, - .features[FEAT_ARCH_CAPABILITIES] = - MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, -+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Atom Processor (Denverton)", - }, -@@ -2899,6 +3557,53 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, -+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Atom Processor (SnowRidge)", - .versions = (X86CPUVersionDefinition[]) { --- -2.27.0 - diff --git a/target-i386-add-VMX-features.patch b/target-i386-add-VMX-features.patch deleted file mode 100644 index 50457d7f2125e4db535a18c4798e6bab48d68393..0000000000000000000000000000000000000000 --- a/target-i386-add-VMX-features.patch +++ /dev/null @@ -1,492 +0,0 @@ -From 290ed17e639a67a9faf4a18b1b5973f9535bace4 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 1 Jul 2019 18:32:17 +0200 -Subject: [PATCH] target/i386: add VMX features - -Add code to convert the VMX feature words back into MSR values, -allowing the user to enable/disable VMX features as they wish. The same -infrastructure enables support for limiting VMX features in named -CPU models. - -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.c | 225 ++++++++++++++++++++++++++++++++++++++++++++++ - target/i386/cpu.h | 9 ++ - target/i386/kvm.c | 162 ++++++++++++++++++++++++++++++++- - 3 files changed, 394 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 3d6541c4a8..fd248a78db 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1232,6 +1232,163 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .index = MSR_IA32_CORE_CAPABILITY, - }, - }, -+ -+ [FEAT_VMX_PROCBASED_CTLS] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ NULL, NULL, "vmx-vintr-pending", "vmx-tsc-offset", -+ NULL, NULL, NULL, "vmx-hlt-exit", -+ NULL, "vmx-invlpg-exit", "vmx-mwait-exit", "vmx-rdpmc-exit", -+ "vmx-rdtsc-exit", NULL, NULL, "vmx-cr3-load-noexit", -+ "vmx-cr3-store-noexit", NULL, NULL, "vmx-cr8-load-exit", -+ "vmx-cr8-store-exit", "vmx-flexpriority", "vmx-vnmi-pending", "vmx-movdr-exit", -+ "vmx-io-exit", "vmx-io-bitmap", NULL, "vmx-mtf", -+ "vmx-msr-bitmap", "vmx-monitor-exit", "vmx-pause-exit", "vmx-secondary-ctls", -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, -+ } -+ }, -+ -+ [FEAT_VMX_SECONDARY_CTLS] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ "vmx-apicv-xapic", "vmx-ept", "vmx-desc-exit", "vmx-rdtscp-exit", -+ "vmx-apicv-x2apic", "vmx-vpid", "vmx-wbinvd-exit", "vmx-unrestricted-guest", -+ "vmx-apicv-register", "vmx-apicv-vid", "vmx-ple", "vmx-rdrand-exit", -+ "vmx-invpcid-exit", "vmx-vmfunc", "vmx-shadow-vmcs", "vmx-encls-exit", -+ "vmx-rdseed-exit", "vmx-pml", NULL, NULL, -+ "vmx-xsaves", NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_PROCBASED_CTLS2, -+ } -+ }, -+ -+ [FEAT_VMX_PINBASED_CTLS] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ "vmx-intr-exit", NULL, NULL, "vmx-nmi-exit", -+ NULL, "vmx-vnmi", "vmx-preemption-timer", "vmx-posted-intr", -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_TRUE_PINBASED_CTLS, -+ } -+ }, -+ -+ [FEAT_VMX_EXIT_CTLS] = { -+ .type = MSR_FEATURE_WORD, -+ /* -+ * VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE is copied from -+ * the LM CPUID bit. -+ */ -+ .feat_names = { -+ NULL, NULL, "vmx-exit-nosave-debugctl", NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL /* vmx-exit-host-addr-space-size */, NULL, NULL, -+ "vmx-exit-load-perf-global-ctrl", NULL, NULL, "vmx-exit-ack-intr", -+ NULL, NULL, "vmx-exit-save-pat", "vmx-exit-load-pat", -+ "vmx-exit-save-efer", "vmx-exit-load-efer", -+ "vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs", -+ NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_TRUE_EXIT_CTLS, -+ } -+ }, -+ -+ [FEAT_VMX_ENTRY_CTLS] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ NULL, NULL, "vmx-entry-noload-debugctl", NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, "vmx-entry-ia32e-mode", NULL, NULL, -+ NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer", -+ "vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_TRUE_ENTRY_CTLS, -+ } -+ }, -+ -+ [FEAT_VMX_MISC] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ NULL, NULL, NULL, NULL, -+ NULL, "vmx-store-lma", "vmx-activity-hlt", "vmx-activity-shutdown", -+ "vmx-activity-wait-sipi", NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, "vmx-vmwrite-vmexit-fields", "vmx-zero-len-inject", NULL, -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_MISC, -+ } -+ }, -+ -+ [FEAT_VMX_EPT_VPID_CAPS] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ "vmx-ept-execonly", NULL, NULL, NULL, -+ NULL, NULL, "vmx-page-walk-4", "vmx-page-walk-5", -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ "vmx-ept-2mb", "vmx-ept-1gb", NULL, NULL, -+ "vmx-invept", "vmx-eptad", "vmx-ept-advanced-exitinfo", NULL, -+ NULL, "vmx-invept-single-context", "vmx-invept-all-context", NULL, -+ NULL, NULL, NULL, NULL, -+ "vmx-invvpid", NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ "vmx-invvpid-single-addr", "vmx-invept-single-context", -+ "vmx-invvpid-all-context", "vmx-invept-single-context-noglobals", -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_EPT_VPID_CAP, -+ } -+ }, -+ -+ [FEAT_VMX_BASIC] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ [54] = "vmx-ins-outs", -+ [55] = "vmx-true-ctls", -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_BASIC, -+ }, -+ /* Just to be safe - we don't support setting the MSEG version field. */ -+ .no_autoenable_flags = MSR_VMX_BASIC_DUAL_MONITOR, -+ }, -+ -+ [FEAT_VMX_VMFUNC] = { -+ .type = MSR_FEATURE_WORD, -+ .feat_names = { -+ [0] = "vmx-eptp-switching", -+ }, -+ .msr = { -+ .index = MSR_IA32_VMX_VMFUNC, -+ } -+ }, -+ - }; - - typedef struct FeatureMask { -@@ -1252,6 +1409,74 @@ static FeatureDep feature_dependencies[] = { - .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, - .to = { FEAT_CORE_CAPABILITY, ~0ull }, - }, -+ { -+ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, -+ .to = { FEAT_VMX_PROCBASED_CTLS, ~0ull }, -+ }, -+ { -+ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, -+ .to = { FEAT_VMX_PINBASED_CTLS, ~0ull }, -+ }, -+ { -+ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, -+ .to = { FEAT_VMX_EXIT_CTLS, ~0ull }, -+ }, -+ { -+ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, -+ .to = { FEAT_VMX_ENTRY_CTLS, ~0ull }, -+ }, -+ { -+ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, -+ .to = { FEAT_VMX_MISC, ~0ull }, -+ }, -+ { -+ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, -+ .to = { FEAT_VMX_BASIC, ~0ull }, -+ }, -+ { -+ .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM }, -+ .to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_IA32E_MODE }, -+ }, -+ { -+ .from = { FEAT_VMX_PROCBASED_CTLS, VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS }, -+ .to = { FEAT_VMX_SECONDARY_CTLS, ~0ull }, -+ }, -+ { -+ .from = { FEAT_XSAVE, CPUID_XSAVE_XSAVES }, -+ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_XSAVES }, -+ }, -+ { -+ .from = { FEAT_1_ECX, CPUID_EXT_RDRAND }, -+ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDRAND_EXITING }, -+ }, -+ { -+ .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INVPCID }, -+ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_INVPCID }, -+ }, -+ { -+ .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_RDSEED }, -+ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDSEED_EXITING }, -+ }, -+ { -+ .from = { FEAT_8000_0001_EDX, CPUID_EXT2_RDTSCP }, -+ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDTSCP }, -+ }, -+ { -+ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, -+ .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull }, -+ }, -+ { -+ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, -+ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST }, -+ }, -+ { -+ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VPID }, -+ .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull << 32 }, -+ }, -+ { -+ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VMFUNC }, -+ .to = { FEAT_VMX_VMFUNC, ~0ull }, -+ }, - }; - - typedef struct X86RegisterInfo32 { -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index b4be6ffb1f..0b57b915af 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -518,6 +518,15 @@ typedef enum FeatureWord { - FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ - FEAT_ARCH_CAPABILITIES, - FEAT_CORE_CAPABILITY, -+ FEAT_VMX_PROCBASED_CTLS, -+ FEAT_VMX_SECONDARY_CTLS, -+ FEAT_VMX_PINBASED_CTLS, -+ FEAT_VMX_EXIT_CTLS, -+ FEAT_VMX_ENTRY_CTLS, -+ FEAT_VMX_MISC, -+ FEAT_VMX_EPT_VPID_CAPS, -+ FEAT_VMX_BASIC, -+ FEAT_VMX_VMFUNC, - FEATURE_WORDS, - } FeatureWord; - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index e9a6293ab2..fafb9fb26d 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -96,6 +96,7 @@ static bool has_msr_virt_ssbd; - static bool has_msr_smi_count; - static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; -+static bool has_msr_vmx_vmfunc; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -443,7 +444,8 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data; -- uint32_t ret; -+ uint64_t value; -+ uint32_t ret, can_be_one, must_be_one; - - if (kvm_feature_msrs == NULL) { /* Host doesn't support feature MSRs */ - return 0; -@@ -469,7 +471,25 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - exit(1); - } - -- return msr_data.entries[0].data; -+ value = msr_data.entries[0].data; -+ switch (index) { -+ case MSR_IA32_VMX_PROCBASED_CTLS2: -+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: -+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: -+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: -+ case MSR_IA32_VMX_TRUE_EXIT_CTLS: -+ /* -+ * Return true for bits that can be one, but do not have to be one. -+ * The SDM tells us which bits could have a "must be one" setting, -+ * so we can do the opposite transformation in make_vmx_msr_value. -+ */ -+ must_be_one = (uint32_t)value; -+ can_be_one = (uint32_t)(value >> 32); -+ return can_be_one & ~must_be_one; -+ -+ default: -+ return value; -+ } - } - - -@@ -1933,6 +1953,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_CORE_CAPABILITY: - has_msr_core_capabs = true; - break; -+ case MSR_IA32_VMX_VMFUNC: -+ has_msr_vmx_vmfunc = true; -+ break; - } - } - } -@@ -2407,6 +2430,132 @@ static int kvm_put_msr_feature_control(X86CPU *cpu) - return 0; - } - -+static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features) -+{ -+ uint32_t default1, can_be_one, can_be_zero; -+ uint32_t must_be_one; -+ -+ switch (index) { -+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: -+ default1 = 0x00000016; -+ break; -+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: -+ default1 = 0x0401e172; -+ break; -+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: -+ default1 = 0x000011ff; -+ break; -+ case MSR_IA32_VMX_TRUE_EXIT_CTLS: -+ default1 = 0x00036dff; -+ break; -+ case MSR_IA32_VMX_PROCBASED_CTLS2: -+ default1 = 0; -+ break; -+ default: -+ abort(); -+ } -+ -+ /* If a feature bit is set, the control can be either set or clear. -+ * Otherwise the value is limited to either 0 or 1 by default1. -+ */ -+ can_be_one = features | default1; -+ can_be_zero = features | ~default1; -+ must_be_one = ~can_be_zero; -+ -+ /* -+ * Bit 0:31 -> 0 if the control bit can be zero (i.e. 1 if it must be one). -+ * Bit 32:63 -> 1 if the control bit can be one. -+ */ -+ return must_be_one | (((uint64_t)can_be_one) << 32); -+} -+ -+#define VMCS12_MAX_FIELD_INDEX (0x17) -+ -+static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) -+{ -+ uint64_t kvm_vmx_basic = -+ kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_VMX_BASIC); -+ uint64_t kvm_vmx_misc = -+ kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_VMX_MISC); -+ uint64_t kvm_vmx_ept_vpid = -+ kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_VMX_EPT_VPID_CAP); -+ -+ /* -+ * If the guest is 64-bit, a value of 1 is allowed for the host address -+ * space size vmexit control. -+ */ -+ uint64_t fixed_vmx_exit = f[FEAT_8000_0001_EDX] & CPUID_EXT2_LM -+ ? (uint64_t)VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE << 32 : 0; -+ -+ /* -+ * Bits 0-30, 32-44 and 50-53 come from the host. KVM should -+ * not change them for backwards compatibility. -+ */ -+ uint64_t fixed_vmx_basic = kvm_vmx_basic & -+ (MSR_VMX_BASIC_VMCS_REVISION_MASK | -+ MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK | -+ MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK); -+ -+ /* -+ * Same for bits 0-4 and 25-27. Bits 16-24 (CR3 target count) can -+ * change in the future but are always zero for now, clear them to be -+ * future proof. Bits 32-63 in theory could change, though KVM does -+ * not support dual-monitor treatment and probably never will; mask -+ * them out as well. -+ */ -+ uint64_t fixed_vmx_misc = kvm_vmx_misc & -+ (MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK | -+ MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK); -+ -+ /* -+ * EPT memory types should not change either, so we do not bother -+ * adding features for them. -+ */ -+ uint64_t fixed_vmx_ept_mask = -+ (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_ENABLE_EPT ? -+ MSR_VMX_EPT_UC | MSR_VMX_EPT_WB : 0); -+ uint64_t fixed_vmx_ept_vpid = kvm_vmx_ept_vpid & fixed_vmx_ept_mask; -+ -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, -+ make_vmx_msr_value(MSR_IA32_VMX_TRUE_PROCBASED_CTLS, -+ f[FEAT_VMX_PROCBASED_CTLS])); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, -+ make_vmx_msr_value(MSR_IA32_VMX_TRUE_PINBASED_CTLS, -+ f[FEAT_VMX_PINBASED_CTLS])); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, -+ make_vmx_msr_value(MSR_IA32_VMX_TRUE_EXIT_CTLS, -+ f[FEAT_VMX_EXIT_CTLS]) | fixed_vmx_exit); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, -+ make_vmx_msr_value(MSR_IA32_VMX_TRUE_ENTRY_CTLS, -+ f[FEAT_VMX_ENTRY_CTLS])); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_PROCBASED_CTLS2, -+ make_vmx_msr_value(MSR_IA32_VMX_PROCBASED_CTLS2, -+ f[FEAT_VMX_SECONDARY_CTLS])); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP, -+ f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC, -+ f[FEAT_VMX_BASIC] | fixed_vmx_basic); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC, -+ f[FEAT_VMX_MISC] | fixed_vmx_misc); -+ if (has_msr_vmx_vmfunc) { -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMFUNC, f[FEAT_VMX_VMFUNC]); -+ } -+ -+ /* -+ * Just to be safe, write these with constant values. The CRn_FIXED1 -+ * MSRs are generated by KVM based on the vCPU's CPUID. -+ */ -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR0_FIXED0, -+ CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0, -+ CR4_VMXE_MASK); -+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, -+ VMCS12_MAX_FIELD_INDEX << 1); -+} -+ - static int kvm_put_msrs(X86CPU *cpu, int level) - { - CPUX86State *env = &cpu->env; -@@ -2646,7 +2795,16 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - - /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see - * kvm_put_msr_feature_control. */ -+ -+ /* -+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -+ * all kernels with MSR features should have them. -+ */ -+ if (kvm_feature_msrs && cpu_has_vmx(env)) { -+ kvm_msr_entry_add_vmx(cpu, env->features); -+ } - } -+ - if (env->mcg_cap) { - int i; - --- -2.27.0 - diff --git a/target-i386-add-a-ucode-rev-property.patch b/target-i386-add-a-ucode-rev-property.patch deleted file mode 100644 index 8a3ff6fd91f67dc93e12e75f9c84ce30dd7725f1..0000000000000000000000000000000000000000 --- a/target-i386-add-a-ucode-rev-property.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 9b3b22bfe87be7eec126056b96f7cea7e3ab9257 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:12 +0000 -Subject: [PATCH] target/i386: add a ucode-rev property - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-3-pbonzini@redhat.com> -Patchwork-id: 93909 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property -Bugzilla: 1791648 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Add the property and plumb it in TCG and HVF (the latter of which -tried to support returning a constant value but used the wrong MSR). - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 10 ++++++++++ - target/i386/cpu.h | 3 +++ - target/i386/hvf/x86_emu.c | 4 +--- - target/i386/misc_helper.c | 4 ++++ - 4 files changed, 18 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 35a33db39a..ec8bc9957e 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6332,6 +6332,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - } - } - -+ if (cpu->ucode_rev == 0) { -+ /* The default is the same as KVM's. */ -+ if (IS_AMD_CPU(env)) { -+ cpu->ucode_rev = 0x01000065; -+ } else { -+ cpu->ucode_rev = 0x100000000ULL; -+ } -+ } -+ - /* mwait extended info: needed for Core compatibility */ - /* We always wake on interrupt even if host does not have the capability */ - cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; -@@ -7011,6 +7020,7 @@ static Property x86_cpu_properties[] = { - DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), - DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), - DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), -+ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), - DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), - DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), - DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 0b57b915af..ca7de143af 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -345,6 +345,7 @@ typedef enum X86Seg { - #define MSR_IA32_SPEC_CTRL 0x48 - #define MSR_VIRT_SSBD 0xc001011f - #define MSR_IA32_PRED_CMD 0x49 -+#define MSR_IA32_UCODE_REV 0x8b - #define MSR_IA32_CORE_CAPABILITY 0xcf - #define MSR_IA32_ARCH_CAPABILITIES 0x10a - #define MSR_IA32_TSCDEADLINE 0x6e0 -@@ -1562,6 +1563,8 @@ struct X86CPU { - CPUNegativeOffsetState neg; - CPUX86State env; - -+ uint64_t ucode_rev; -+ - uint32_t hyperv_spinlock_attempts; - char *hyperv_vendor_id; - bool hyperv_synic_kvm_only; -diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c -index 1b04bd7e94..cd40520c16 100644 ---- a/target/i386/hvf/x86_emu.c -+++ b/target/i386/hvf/x86_emu.c -@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) - RIP(env) += decode->len; - } - --#define MSR_IA32_UCODE_REV 0x00000017 -- - void simulate_rdmsr(struct CPUState *cpu) - { - X86CPU *x86_cpu = X86_CPU(cpu); -@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) - val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); - break; - case MSR_IA32_UCODE_REV: -- val = (0x100000000ULL << 32) | 0x100000000ULL; -+ val = x86_cpu->ucode_rev; - break; - case MSR_EFER: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); -diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c -index 3eff6885f8..aed16fe3f0 100644 ---- a/target/i386/misc_helper.c -+++ b/target/i386/misc_helper.c -@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) - #else - void helper_wrmsr(CPUX86State *env) - { -+ X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); -@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) - env->msr_bndcfgs = val; - cpu_sync_bndcs_hflags(env); - break; -+ case MSR_IA32_UCODE_REV: -+ val = x86_cpu->ucode_rev; -+ break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + --- -2.27.0 - diff --git a/target-i386-add-control-bits-support-for-LAM.patch b/target-i386-add-control-bits-support-for-LAM.patch new file mode 100644 index 0000000000000000000000000000000000000000..d2a3974ae3b29a3423c1828bc7e822ed8e700483 --- /dev/null +++ b/target-i386-add-control-bits-support-for-LAM.patch @@ -0,0 +1,99 @@ +From 03e73f225c44daa067ff1c57845dcd4678897a49 Mon Sep 17 00:00:00 2001 +From: Binbin Wu +Date: Fri, 12 Jan 2024 14:00:42 +0800 +Subject: [PATCH] target/i386: add control bits support for LAM + +commit 0117067131f99acaab4f4d2cca0290c5510e37cf upstream. + +LAM uses CR3[61] and CR3[62] to configure/enable LAM on user pointers. +LAM uses CR4[28] to configure/enable LAM on supervisor pointers. + +For CR3 LAM bits, no additional handling needed: +- TCG + LAM is not supported for TCG of target-i386. helper_write_crN() and + helper_vmrun() check max physical address bits before calling + cpu_x86_update_cr3(), no change needed, i.e. CR3 LAM bits are not allowed + to be set in TCG. +- gdbstub + x86_cpu_gdb_write_register() will call cpu_x86_update_cr3() to update cr3. + Allow gdb to set the LAM bit(s) to CR3, if vcpu doesn't support LAM, + KVM_SET_SREGS will fail as other reserved bits. + +For CR4 LAM bit, its reservation depends on vcpu supporting LAM feature or +not. +- TCG + LAM is not supported for TCG of target-i386. helper_write_crN() and + helper_vmrun() check CR4 reserved bit before calling cpu_x86_update_cr4(), + i.e. CR4 LAM bit is not allowed to be set in TCG. +- gdbstub + x86_cpu_gdb_write_register() will call cpu_x86_update_cr4() to update cr4. + Mask out LAM bit on CR4 if vcpu doesn't support LAM. +- x86_cpu_reset_hold() doesn't need special handling. + +Intel-SIG: commit 0117067131f9 target/i386: add control bits support for +LAM +Backport Qemu Linear Address Masking (LAM) support. + +Signed-off-by: Binbin Wu +Tested-by: Xuelian Guo +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <20240112060042.19925-3-binbin.wu@linux.intel.com> +Signed-off-by: Paolo Bonzini +[ Zhiquan Li: amend commit log ] +Signed-off-by: Zhiquan Li +--- + target/i386/cpu.h | 7 ++++++- + target/i386/helper.c | 4 ++++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 8dbcb4a35f..b0666167d2 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -262,6 +262,7 @@ typedef enum X86Seg { + #define CR4_SMAP_MASK (1U << 21) + #define CR4_PKE_MASK (1U << 22) + #define CR4_PKS_MASK (1U << 24) ++#define CR4_LAM_SUP_MASK (1U << 28) + + #define CR4_RESERVED_MASK \ + (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ +@@ -270,7 +271,8 @@ typedef enum X86Seg { + | CR4_OSFXSR_MASK | CR4_OSXMMEXCPT_MASK | CR4_UMIP_MASK \ + | CR4_LA57_MASK \ + | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \ +- | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK)) ++ | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \ ++ | CR4_LAM_SUP_MASK)) + + #define DR6_BD (1 << 13) + #define DR6_BS (1 << 14) +@@ -2527,6 +2529,9 @@ static inline uint64_t cr4_reserved_bits(CPUX86State *env) + if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS)) { + reserved_bits |= CR4_PKS_MASK; + } ++ if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) { ++ reserved_bits |= CR4_LAM_SUP_MASK; ++ } + return reserved_bits; + } + +diff --git a/target/i386/helper.c b/target/i386/helper.c +index 2070dd0dda..1da7a7d315 100644 +--- a/target/i386/helper.c ++++ b/target/i386/helper.c +@@ -219,6 +219,10 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) + new_cr4 &= ~CR4_PKS_MASK; + } + ++ if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) { ++ new_cr4 &= ~CR4_LAM_SUP_MASK; ++ } ++ + env->cr[4] = new_cr4; + env->hflags = hflags; + +-- +2.41.0.windows.1 + diff --git a/target-i386-add-guest-phys-bits-cpu-property.patch b/target-i386-add-guest-phys-bits-cpu-property.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2f5b0e7e37dfc175f683ffe511f22b551349ed3 --- /dev/null +++ b/target-i386-add-guest-phys-bits-cpu-property.patch @@ -0,0 +1,106 @@ +From b6bfee023b15f25c1db077df7bfd2e9212cda762 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:36 +0100 +Subject: [PATCH] target/i386: add guest-phys-bits cpu property + +commit 513ba32dccc659c80722b3a43233b26eaa50309a upstream. + +Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16]) +via -cpu $model,guest-phys-bits=$nr. + +Intel-SIG: commit 513ba32dccc6 target/i386: add guest-phys-bits cpu property + +Signed-off-by: Gerd Hoffmann +Message-ID: <20240318155336.156197-3-kraxel@redhat.com> +Reviewed-by: Zhao Liu +Signed-off-by: Paolo Bonzini +[jz: compatible property for 9.0 machines not included] +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 22 ++++++++++++++++++++++ + target/i386/cpu.h | 8 ++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index ca7e5337b0..93f88b7bf8 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6827,6 +6827,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + /* 64 bit processor */ + *eax |= (cpu_x86_virtual_addr_width(env) << 8); ++ *eax |= (cpu->guest_phys_bits << 16); + } + *ebx = env->features[FEAT_8000_0008_EBX]; + if (cs->nr_cores * cs->nr_threads > 1) { +@@ -7603,6 +7604,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + goto out; + } + ++ if (cpu->guest_phys_bits == -1) { ++ /* ++ * If it was not set by the user, or by the accelerator via ++ * cpu_exec_realizefn, clear. ++ */ ++ cpu->guest_phys_bits = 0; ++ } ++ + if (cpu->ucode_rev == 0) { + /* + * The default is the same as KVM's. Note that this check +@@ -7653,6 +7662,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + if (cpu->phys_bits == 0) { + cpu->phys_bits = TCG_PHYS_ADDR_BITS; + } ++ if (cpu->guest_phys_bits && ++ (cpu->guest_phys_bits > cpu->phys_bits || ++ cpu->guest_phys_bits < 32)) { ++ error_setg(errp, "guest-phys-bits should be between 32 and %u " ++ " (but is %u)", ++ cpu->phys_bits, cpu->guest_phys_bits); ++ return; ++ } + } else { + /* For 32 bit systems don't use the user set value, but keep + * phys_bits consistent with what we tell the guest. +@@ -7661,6 +7678,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + error_setg(errp, "phys-bits is not user-configurable in 32 bit"); + return; + } ++ if (cpu->guest_phys_bits != 0) { ++ error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit"); ++ return; ++ } + + if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { + cpu->phys_bits = 36; +@@ -8167,6 +8188,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), ++ DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, false), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 34f9615b98..d6fdcc04ca 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -2029,6 +2029,14 @@ struct ArchCPU { + /* Number of physical address bits supported */ + uint32_t phys_bits; + ++ /* ++ * Number of guest physical address bits available. Usually this is ++ * identical to host physical address bits. With NPT or EPT 4-level ++ * paging, guest physical address space might be restricted to 48 bits ++ * even if the host cpu supports more physical address bits. ++ */ ++ uint32_t guest_phys_bits; ++ + /* in order to simplify APIC support, we leave this pointer to the + user */ + struct DeviceState *apic_state; +-- +2.41.0.windows.1 + diff --git a/target-i386-add-sha512-sm3-sm4-feature-bits.patch b/target-i386-add-sha512-sm3-sm4-feature-bits.patch new file mode 100644 index 0000000000000000000000000000000000000000..21bf96c24a11ff48e7574802cdae10027d8583e8 --- /dev/null +++ b/target-i386-add-sha512-sm3-sm4-feature-bits.patch @@ -0,0 +1,40 @@ +From 87871b854241cc52f967805e005bdd66a923c555 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 3 Jul 2024 13:42:49 +0200 +Subject: [PATCH] target/i386: add sha512, sm3, sm4 feature bits + +commit 78be258c0eeba3d5613c37888889e84f2ba9bd94 upstream. + +SHA512, SM3, SM4 (CPUID[EAX=7,ECX=1).EAX bits 0 to 2) is supported by +Clearwater Forest processor, add it to QEMU as it does not need any +specific enablement. + +See https://lore.kernel.org/kvm/20241105054825.870939-1-tao1.su@linux.intel.com/ +for reference. + +Intel-SIG: commit 78be258c0eeb target/i386: add sha512, sm3, sm4 feature bits. + +Reviewed-by: Tao Su +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index b5231432e7..6ed4e84b5c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -962,7 +962,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + [FEAT_7_1_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +- NULL, NULL, NULL, NULL, ++ "sha512", "sm3", "sm4", NULL, + "avx-vnni", "avx512-bf16", NULL, "cmpccxadd", + NULL, NULL, "fzrm", "fsrs", + "fsrc", NULL, NULL, NULL, +-- +2.41.0.windows.1 + diff --git a/target-i386-add-support-for-FRED-in-CPUID-enumeratio.patch b/target-i386-add-support-for-FRED-in-CPUID-enumeratio.patch new file mode 100644 index 0000000000000000000000000000000000000000..8afdb72a0104bd4e0408956ca036af9b28e3af5a --- /dev/null +++ b/target-i386-add-support-for-FRED-in-CPUID-enumeratio.patch @@ -0,0 +1,108 @@ +From 110184b14d17c13e046e9c4ebed6c3cec29b31d0 Mon Sep 17 00:00:00 2001 +From: Xin Li +Date: Wed, 8 Nov 2023 23:20:07 -0800 +Subject: [PATCH] target/i386: add support for FRED in CPUID enumeration + +commit c1acad9f72d14daf918563eb77d2b31c39fbd06a upstream. + +FRED, i.e., the Intel flexible return and event delivery architecture, +defines simple new transitions that change privilege level (ring +transitions). + +The new transitions defined by the FRED architecture are FRED event +delivery and, for returning from events, two FRED return instructions. +FRED event delivery can effect a transition from ring 3 to ring 0, but +it is used also to deliver events incident to ring 0. One FRED +instruction (ERETU) effects a return from ring 0 to ring 3, while the +other (ERETS) returns while remaining in ring 0. Collectively, FRED +event delivery and the FRED return instructions are FRED transitions. + +In addition to these transitions, the FRED architecture defines a new +instruction (LKGS) for managing the state of the GS segment register. +The LKGS instruction can be used by 64-bit operating systems that do +not use the new FRED transitions. + +WRMSRNS is an instruction that behaves exactly like WRMSR, with the +only difference being that it is not a serializing instruction by +default. Under certain conditions, WRMSRNS may replace WRMSR to improve +performance. FRED uses it to switch RSP0 in a faster manner. + +Search for the latest FRED spec in most search engines with this search +pattern: + + site:intel.com FRED (flexible return and event delivery) specification + +The CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[17] enumerates FRED, and +the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[18] enumerates LKGS, and +the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[19] enumerates WRMSRNS. + +Add CPUID definitions for FRED/LKGS/WRMSRNS, and expose them to KVM guests. + +Because FRED relies on LKGS and WRMSRNS, add that to feature dependency +map. + +Intel-SIG: commit c1acad9f72d1 target/i386: add support for FRED in CPUID enumeration + +Tested-by: Shan Kang +Signed-off-by: Xin Li +Message-ID: <20231109072012.8078-2-xin3.li@intel.com> +[Fix order of dependencies, add dependencies from LM to FRED. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 14 +++++++++++++- + target/i386/cpu.h | 6 ++++++ + 2 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 860934b39f..47f00392be 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -966,7 +966,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "avx-vnni", "avx512-bf16", NULL, "cmpccxadd", + NULL, NULL, "fzrm", "fsrs", + "fsrc", NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, "fred", "lkgs", "wrmsrns", + NULL, "amx-fp16", NULL, "avx-ifma", + NULL, NULL, "lam", NULL, + NULL, NULL, NULL, NULL, +@@ -1553,6 +1553,18 @@ static FeatureDep feature_dependencies[] = { + .from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, + }, ++ { ++ .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM }, ++ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, ++ }, ++ { ++ .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_LKGS }, ++ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, ++ }, ++ { ++ .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, ++ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, ++ }, + }; + + typedef struct X86RegisterInfo32 { +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 21fb769cce..f392626f98 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -941,6 +941,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) + /* PREFETCHIT0/1 Instructions */ + #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) ++/* Flexible return and event delivery (FRED) */ ++#define CPUID_7_1_EAX_FRED (1U << 17) ++/* Load into IA32_KERNEL_GS_BASE (LKGS) */ ++#define CPUID_7_1_EAX_LKGS (1U << 18) ++/* Non-Serializing Write to Model Specific Register (WRMSRNS) */ ++#define CPUID_7_1_EAX_WRMSRNS (1U << 19) + + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ + #define CPUID_7_2_EDX_MCDT_NO (1U << 5) +-- +2.41.0.windows.1 + diff --git a/target-i386-add-support-for-LAM-in-CPUID-enumeration.patch b/target-i386-add-support-for-LAM-in-CPUID-enumeration.patch new file mode 100644 index 0000000000000000000000000000000000000000..cbe303dab96721b96d76d6b3e448533557c6273b --- /dev/null +++ b/target-i386-add-support-for-LAM-in-CPUID-enumeration.patch @@ -0,0 +1,69 @@ +From 8bc3dd094a9daa348d49436dc4d0867b7b514ba7 Mon Sep 17 00:00:00 2001 +From: Robert Hoo +Date: Fri, 12 Jan 2024 14:00:41 +0800 +Subject: [PATCH] target/i386: add support for LAM in CPUID enumeration + +commit ba6780905943696d790cc880c8e5684b51f027fe upstream. + +Linear Address Masking (LAM) is a new Intel CPU feature, which allows +software to use of the untranslated address bits for metadata. + +The bit definition: +CPUID.(EAX=7,ECX=1):EAX[26] + +Add CPUID definition for LAM. + +Note LAM feature is not supported for TCG of target-i386, LAM CPIUD bit +will not be added to TCG_7_1_EAX_FEATURES. + +More info can be found in Intel ISE Chapter "LINEAR ADDRESS MASKING(LAM)" +https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Intel-SIG: commit ba6780905943 target/i386: add support for LAM in CPUID +enumeration +Backport Qemu Linear Address Masking (LAM) support. + +Signed-off-by: Robert Hoo +Co-developed-by: Binbin Wu +Signed-off-by: Binbin Wu +Tested-by: Xuelian Guo +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <20240112060042.19925-2-binbin.wu@linux.intel.com> +Signed-off-by: Paolo Bonzini +[ Zhiquan Li: amend commit log ] +Signed-off-by: Zhiquan Li +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 711370d9b8..19ebd49e8c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -967,7 +967,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "fsrc", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "amx-fp16", NULL, "avx-ifma", +- NULL, NULL, NULL, NULL, ++ NULL, NULL, "lam", NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 6993552cd9..8dbcb4a35f 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -926,6 +926,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_7_1_EAX_AMX_FP16 (1U << 21) + /* Support for VPMADD52[H,L]UQ */ + #define CPUID_7_1_EAX_AVX_IFMA (1U << 23) ++/* Linear Address Masking */ ++#define CPUID_7_1_EAX_LAM (1U << 26) + + /* Support for VPDPB[SU,UU,SS]D[,S] */ + #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) +-- +2.41.0.windows.1 + diff --git a/target-i386-add-two-missing-VMX-features-for-Skylake.patch b/target-i386-add-two-missing-VMX-features-for-Skylake.patch deleted file mode 100644 index fa3c6d8f39ad28cb75995e142697bee2bf48e97a..0000000000000000000000000000000000000000 --- a/target-i386-add-two-missing-VMX-features-for-Skylake.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 1faa48f4de44c123143d43e67cd5a478628a45a4 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 25 Nov 2019 19:12:16 +0100 -Subject: [PATCH] target/i386: add two missing VMX features for Skylake and - CascadeLake Server - -They are present in client (Core) Skylake but pasted wrong into the server -SKUs. - -Reported-by: Dr. David Alan Gilbert -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 2f32d67aa5..6f27a5170a 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2997,7 +2997,8 @@ static X86CPUDefinition builtin_x86_defs[] = { - VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | - VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -- VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Skylake)", - .versions = (X86CPUVersionDefinition[]) { -@@ -3113,7 +3114,8 @@ static X86CPUDefinition builtin_x86_defs[] = { - VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | - VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -- VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Cascadelake)", - .versions = (X86CPUVersionDefinition[]) { --- -2.27.0 - diff --git a/target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/target-i386-check-for-availability-of-MSR_IA32_UCODE.patch deleted file mode 100644 index 377226a4cbff7b6a4c83d410f8f78ff18bc97190..0000000000000000000000000000000000000000 --- a/target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 0633e7684b4f4da858a3739d68cb57a1d49bdf01 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 11 Feb 2020 18:55:16 +0100 -Subject: [PATCH] target/i386: check for availability of MSR_IA32_UCODE_REV as - an emulated MSR - -Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, -which added it to the emulated MSR list, a bug caused the microcode -version to revert to 0x100000000 on INIT. As a result, processors other -than the bootstrap processor would not see the host microcode revision; -some Windows version complain loudly about this and crash with a -fairly explicit MICROCODE REVISION MISMATCH error. - -[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset - microcode version on INIT or RESET" should also be applied.] - -Reported-by: Alex Williamson -Message-id: <20200211175516.10716-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini ---- - target/i386/kvm.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 7437f86130..e49a2d2585 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -99,6 +99,7 @@ static bool has_msr_smi_count; - static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; -+static bool has_msr_ucode_rev; - static bool has_msr_vmx_procbased_ctls2; - - static uint32_t has_architectural_pmu_version; -@@ -1985,6 +1986,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_VMX_VMFUNC: - has_msr_vmx_vmfunc = true; - break; -+ case MSR_IA32_UCODE_REV: -+ has_msr_ucode_rev = true; -+ break; - case MSR_IA32_VMX_PROCBASED_CTLS2: - has_msr_vmx_procbased_ctls2 = true; - break; -@@ -2628,8 +2632,7 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -- if (kvm_arch_get_supported_msr_feature(kvm_state, -- MSR_IA32_UCODE_REV)) { -+ if (has_msr_ucode_rev) { - kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); - } - --- -2.27.0 - diff --git a/target-i386-cpu-Fix-notes-for-CPU-models.patch b/target-i386-cpu-Fix-notes-for-CPU-models.patch new file mode 100644 index 0000000000000000000000000000000000000000..cd5f0d8879d69d5ea1160f815e755f8fb8c39d93 --- /dev/null +++ b/target-i386-cpu-Fix-notes-for-CPU-models.patch @@ -0,0 +1,41 @@ +From 96b5acaa5dbff1e5bf8809fd818e6ff813e5a170 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Tue, 31 Dec 2024 02:04:04 -0500 +Subject: [PATCH] target/i386/cpu: Fix notes for CPU models + +cheery-pick from 93dcc9390e5ad0696ae7e9b7b3a5b08c2d1b6de6 + +Fixes: 644e3c5d812 ("missing vmx features for Skylake-Server and Cascadelake-Server") +Signed-off-by: Han Han +Reviewed-by: Chenyi Qiang +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +Signed-off-by: qihao_yewu +--- + target/i386/cpu.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 727beb6a65..1fa08265bc 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3453,6 +3453,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .version = 4, ++ .note = "IBRS, EPT switching, no TSX", + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } +@@ -3587,7 +3588,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + }, + { .version = 4, +- .note = "ARCH_CAPABILITIES, no TSX", ++ .note = "ARCH_CAPABILITIES, EPT switching, no TSX", + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } +-- +2.41.0.windows.1 + diff --git a/target-i386-cpu-Populate-CPUID-0x8000_001F-when-CSV3.patch b/target-i386-cpu-Populate-CPUID-0x8000_001F-when-CSV3.patch new file mode 100644 index 0000000000000000000000000000000000000000..73362342ccdc1d967fc44c38d8f07fcc0c4211be --- /dev/null +++ b/target-i386-cpu-Populate-CPUID-0x8000_001F-when-CSV3.patch @@ -0,0 +1,41 @@ +From 120d0b9e5c92de91c69fb9fbea038b51c820013d Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Tue, 24 Aug 2021 17:31:28 +0800 +Subject: [PATCH] target/i386: cpu: Populate CPUID 0x8000_001F when CSV3 is + active + +On Hygon platform, bit 30 of EAX indicates whether +this feature is supported in hardware. + +When CSV3 is active, CPUID 0x8000_001F provides +information for it. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + target/i386/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index ca7e5337b0..36f7ad6460 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -29,6 +29,7 @@ + #include "hvf/hvf-i386.h" + #include "kvm/kvm_i386.h" + #include "sev.h" ++#include "csv.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "qapi/qapi-visit-machine.h" +@@ -6943,6 +6944,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; ++ *eax |= csv3_enabled() ? 0x40000000 : 0; /* bit 30 for CSV3 */ + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-CSV3-context.patch b/target-i386-csv-Add-CSV3-context.patch new file mode 100644 index 0000000000000000000000000000000000000000..3e12a6961fcc2cf91cd355457126f59ded87e31b --- /dev/null +++ b/target-i386-csv-Add-CSV3-context.patch @@ -0,0 +1,85 @@ +From 54648e0e5a45acf2e472430ee83bb8dfa057fb30 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Tue, 24 Aug 2021 14:57:28 +0800 +Subject: [PATCH] target/i386: csv: Add CSV3 context + +CSV/CSV2/CSV3 are the secure virtualization features on Hygon CPUs. +The CSV and CSV2 are compatible with the AMD SEV and SEV-ES, +respectively. From CSV3, we introduced more secure features to +protect the guest, users can bit 6 of the guest policy to run a +CSV3 guest. + +Add the context and the build option. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + target/i386/csv.c | 11 +++++++++++ + target/i386/csv.h | 17 +++++++++++++++++ + 2 files changed, 28 insertions(+) + +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 88fb05ac37..9a1de04db7 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -18,3 +18,14 @@ + #include "csv.h" + + bool csv_kvm_cpu_reset_inhibit; ++ ++Csv3GuestState csv3_guest = { 0 }; ++ ++bool ++csv3_enabled(void) ++{ ++ if (!is_hygon_cpu()) ++ return false; ++ ++ return sev_es_enabled() && (csv3_guest.policy & GUEST_POLICY_CSV3_BIT); ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 05e7fd8dc1..ea87c1ba27 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -14,6 +14,9 @@ + #ifndef I386_CSV_H + #define I386_CSV_H + ++#include "qapi/qapi-commands-misc-target.h" ++ ++#define GUEST_POLICY_CSV3_BIT (1 << 6) + #define GUEST_POLICY_REUSE_ASID (1 << 7) + + #ifdef CONFIG_CSV +@@ -40,9 +43,12 @@ static bool __attribute__((unused)) is_hygon_cpu(void) + return false; + } + ++bool csv3_enabled(void); ++ + #else + + #define is_hygon_cpu() (false) ++#define csv3_enabled() (false) + + #endif + +@@ -66,4 +72,15 @@ int csv_load_queued_incoming_pages(QEMUFile *f); + int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent); + int csv_load_incoming_cpu_state(QEMUFile *f); + ++/* CSV3 */ ++struct Csv3GuestState { ++ uint32_t policy; ++ int sev_fd; ++ void *state; ++}; ++ ++typedef struct Csv3GuestState Csv3GuestState; ++ ++extern struct Csv3GuestState csv3_guest; ++ + #endif +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-command-to-initialize-CSV3-conte.patch b/target-i386-csv-Add-command-to-initialize-CSV3-conte.patch new file mode 100644 index 0000000000000000000000000000000000000000..04aea45c55b0be6c1fc10378fad8718c0ca12ec9 --- /dev/null +++ b/target-i386-csv-Add-command-to-initialize-CSV3-conte.patch @@ -0,0 +1,201 @@ +From 4ce59de673b1b190cde76c458ac9e92a6413172d Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Wed, 25 Aug 2021 11:07:41 +0800 +Subject: [PATCH] target/i386: csv: Add command to initialize CSV3 context + +When CSV3 is enabled, KVM_CSV3_INIT command is used to initialize +the platform, which is implemented by reusing the SEV API framework +and extending the functionality. + +The KVM_CSV3_INIT command should be performed earlier than +any other command. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 11 +++++++++ + target/i386/csv-sysemu-stub.c | 5 ++++ + target/i386/csv.c | 45 +++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 4 ++++ + target/i386/sev.c | 17 +++++++++++++ + target/i386/sev.h | 7 ++++++ + 6 files changed, 89 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 8dc00808ec..90869068c8 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2108,6 +2108,17 @@ struct kvm_csv_init { + __u32 len; + }; + ++/* CSV3 command */ ++enum csv3_cmd_id { ++ KVM_CSV3_NR_MIN = 0xc0, ++ ++ KVM_CSV3_INIT = KVM_CSV3_NR_MIN, ++}; ++ ++struct kvm_csv3_init_data { ++ __u64 nodemask; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +index 5874e4cc1d..72f0f5c772 100644 +--- a/target/i386/csv-sysemu-stub.c ++++ b/target/i386/csv-sysemu-stub.c +@@ -14,3 +14,8 @@ + #include "qemu/osdep.h" + #include "sev.h" + #include "csv.h" ++ ++int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) ++{ ++ return 0; ++} +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 9a1de04db7..fd3ea291ca 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -12,6 +12,13 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/error-report.h" ++ ++#include ++ ++#ifdef CONFIG_NUMA ++#include ++#endif + + #include "cpu.h" + #include "sev.h" +@@ -21,6 +28,44 @@ bool csv_kvm_cpu_reset_inhibit; + + Csv3GuestState csv3_guest = { 0 }; + ++int ++csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) ++{ ++ int fw_error; ++ int ret; ++ struct kvm_csv3_init_data data = { 0 }; ++ ++#ifdef CONFIG_NUMA ++ int mode; ++ unsigned long nodemask; ++ ++ /* Set flags as 0 to retrieve the default NUMA policy. */ ++ ret = get_mempolicy(&mode, &nodemask, sizeof(nodemask) * 8, NULL, 0); ++ if (ret == 0 && mode == MPOL_BIND) ++ data.nodemask = nodemask; ++#endif ++ ++ if (!ops || !ops->sev_ioctl || !ops->fw_error_to_str) ++ return -1; ++ ++ csv3_guest.policy = policy; ++ if (csv3_enabled()) { ++ ret = ops->sev_ioctl(fd, KVM_CSV3_INIT, &data, &fw_error); ++ if (ret) { ++ csv3_guest.policy = 0; ++ error_report("%s: Fail to initialize ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, ops->fw_error_to_str(fw_error)); ++ return -1; ++ } ++ ++ csv3_guest.sev_fd = fd; ++ csv3_guest.state = state; ++ csv3_guest.sev_ioctl = ops->sev_ioctl; ++ csv3_guest.fw_error_to_str = ops->fw_error_to_str; ++ } ++ return 0; ++} ++ + bool + csv3_enabled(void) + { +diff --git a/target/i386/csv.h b/target/i386/csv.h +index ea87c1ba27..4096e8658b 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -15,6 +15,7 @@ + #define I386_CSV_H + + #include "qapi/qapi-commands-misc-target.h" ++#include "sev.h" + + #define GUEST_POLICY_CSV3_BIT (1 << 6) + #define GUEST_POLICY_REUSE_ASID (1 << 7) +@@ -77,10 +78,13 @@ struct Csv3GuestState { + uint32_t policy; + int sev_fd; + void *state; ++ int (*sev_ioctl)(int fd, int cmd, void *data, int *error); ++ const char *(*fw_error_to_str)(int code); + }; + + typedef struct Csv3GuestState Csv3GuestState; + + extern struct Csv3GuestState csv3_guest; ++extern int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops); + + #endif +diff --git a/target/i386/sev.c b/target/i386/sev.c +index af61ca5ba8..1c453b3148 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1225,6 +1225,18 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + ++ /* Support CSV3 */ ++ if (!ret && cmd == KVM_SEV_ES_INIT) { ++ ret = csv3_init(sev_guest->policy, sev->sev_fd, (void *)&sev->state, &sev_ops); ++ if (ret) { ++ error_setg(errp, "%s: failed to init csv3 context", __func__); ++ goto err; ++ } ++ /* The CSV3 guest is not resettable */ ++ if (csv3_enabled()) ++ csv_kvm_cpu_reset_inhibit = true; ++ } ++ + /* + * The LAUNCH context is used for new guest, if its an incoming guest + * then RECEIVE context will be created after the connection is established. +@@ -2635,6 +2647,11 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++struct sev_ops sev_ops = { ++ .sev_ioctl = sev_ioctl, ++ .fw_error_to_str = fw_error_to_str, ++}; ++ + static void + sev_register_types(void) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 0bfe3879ef..e91431e0f7 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -80,4 +80,11 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); + + extern bool sev_kvm_has_msr_ghcb; + ++struct sev_ops { ++ int (*sev_ioctl)(int fd, int cmd, void *data, int *error); ++ const char *(*fw_error_to_str)(int code); ++}; ++ ++extern struct sev_ops sev_ops; ++ + #endif +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-command-to-load-data-to-CSV3-gue.patch b/target-i386-csv-Add-command-to-load-data-to-CSV3-gue.patch new file mode 100644 index 0000000000000000000000000000000000000000..3923d48f260fbb623351db24b0ffc55e9d21503a --- /dev/null +++ b/target-i386-csv-Add-command-to-load-data-to-CSV3-gue.patch @@ -0,0 +1,166 @@ +From 53cba8da8fb18cc9a463ec1f57990e8558cd4008 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Wed, 25 Aug 2021 09:59:16 +0800 +Subject: [PATCH] target/i386: csv: Add command to load data to CSV3 guest + memory + +The KVM_CSV3_LAUNCH_ENCRYPT_DATA command is used to load data to an +encrypted guest memory in an isolated memory region that guest owns. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 7 ++++ + target/i386/csv-sysemu-stub.c | 5 +++ + target/i386/csv.c | 69 +++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 2 + + target/i386/trace-events | 3 ++ + 5 files changed, 86 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 90869068c8..dd6d9c2e07 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2113,6 +2113,13 @@ enum csv3_cmd_id { + KVM_CSV3_NR_MIN = 0xc0, + + KVM_CSV3_INIT = KVM_CSV3_NR_MIN, ++ KVM_CSV3_LAUNCH_ENCRYPT_DATA, ++}; ++ ++struct kvm_csv3_launch_encrypt_data { ++ __u64 gpa; ++ __u64 uaddr; ++ __u32 len; + }; + + struct kvm_csv3_init_data { +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +index 72f0f5c772..b0ccbd2f18 100644 +--- a/target/i386/csv-sysemu-stub.c ++++ b/target/i386/csv-sysemu-stub.c +@@ -19,3 +19,8 @@ int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) + { + return 0; + } ++ ++int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp) ++{ ++ g_assert_not_reached(); ++} +diff --git a/target/i386/csv.c b/target/i386/csv.c +index fd3ea291ca..2a596681b8 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -13,6 +13,7 @@ + + #include "qemu/osdep.h" + #include "qemu/error-report.h" ++#include "qapi/error.h" + + #include + +@@ -20,6 +21,7 @@ + #include + #endif + ++#include "trace.h" + #include "cpu.h" + #include "sev.h" + #include "csv.h" +@@ -74,3 +76,70 @@ csv3_enabled(void) + + return sev_es_enabled() && (csv3_guest.policy & GUEST_POLICY_CSV3_BIT); + } ++ ++static bool ++csv3_check_state(SevState state) ++{ ++ return *((SevState *)csv3_guest.state) == state; ++} ++ ++static int ++csv3_ioctl(int cmd, void *data, int *error) ++{ ++ if (csv3_guest.sev_ioctl) ++ return csv3_guest.sev_ioctl(csv3_guest.sev_fd, cmd, data, error); ++ else ++ return -1; ++} ++ ++static const char * ++fw_error_to_str(int code) ++{ ++ if (csv3_guest.fw_error_to_str) ++ return csv3_guest.fw_error_to_str(code); ++ else ++ return NULL; ++} ++ ++static int ++csv3_launch_encrypt_data(uint64_t gpa, uint8_t *addr, uint64_t len) ++{ ++ int ret, fw_error; ++ struct kvm_csv3_launch_encrypt_data update; ++ ++ if (!addr || !len) { ++ return 1; ++ } ++ ++ update.gpa = (__u64)gpa; ++ update.uaddr = (__u64)(unsigned long)addr; ++ update.len = len; ++ trace_kvm_csv3_launch_encrypt_data(gpa, addr, len); ++ ret = csv3_ioctl(KVM_CSV3_LAUNCH_ENCRYPT_DATA, &update, &fw_error); ++ if (ret) { ++ error_report("%s: CSV3 LAUNCH_ENCRYPT_DATA ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ } ++ ++ return ret; ++} ++ ++int ++csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp) ++{ ++ int ret = 0; ++ ++ if (!csv3_enabled()) { ++ error_setg(errp, "%s: CSV3 is not enabled", __func__); ++ return -1; ++ } ++ ++ /* if CSV3 is in update state then load the data to secure memory */ ++ if (csv3_check_state(SEV_STATE_LAUNCH_UPDATE)) { ++ ret = csv3_launch_encrypt_data(gpa, ptr, len); ++ if (ret) ++ error_setg(errp, "%s: CSV3 fail to encrypt data", __func__); ++ } ++ ++ return ret; ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 4096e8658b..27b66f7857 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -87,4 +87,6 @@ typedef struct Csv3GuestState Csv3GuestState; + extern struct Csv3GuestState csv3_guest; + extern int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops); + ++int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); ++ + #endif +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 87b765c73c..34c205ffda 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -19,3 +19,6 @@ kvm_sev_receive_update_data(void *src, void *dst, int len, void *hdr, int hdr_le + kvm_sev_receive_finish(void) "" + kvm_sev_send_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *dst, int len) "cpu_id %d cpu_index %d trans %p len %d" + kvm_sev_receive_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *src, int len, void *hdr, int hdr_len) "cpu_id %d cpu_index %d trans %p len %d hdr %p hdr_len %d" ++ ++# csv.c ++kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" PRIx64 "addr %p len 0x%" PRIx64 +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-command-to-load-vmcb-to-CSV3-gue.patch b/target-i386-csv-Add-command-to-load-vmcb-to-CSV3-gue.patch new file mode 100644 index 0000000000000000000000000000000000000000..b57c352017b2a96759cf4de143ab6917c0b3982c --- /dev/null +++ b/target-i386-csv-Add-command-to-load-vmcb-to-CSV3-gue.patch @@ -0,0 +1,108 @@ +From 368bf2c044fcdd21f10545de103af7cd2a5986f9 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Wed, 25 Aug 2021 12:25:05 +0800 +Subject: [PATCH] target/i386: csv: Add command to load vmcb to CSV3 guest + memory + +The KVM_CSV3_LAUNCH_ENCRYPT_VMCB command is used to load and encrypt +the initial VMCB data to secure memory in an isolated region that +guest owns. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 1 + + target/i386/csv-sysemu-stub.c | 5 +++++ + target/i386/csv.c | 21 +++++++++++++++++++++ + target/i386/csv.h | 1 + + target/i386/sev.c | 8 ++++++-- + 5 files changed, 34 insertions(+), 2 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index dd6d9c2e07..8487d0889b 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2114,6 +2114,7 @@ enum csv3_cmd_id { + + KVM_CSV3_INIT = KVM_CSV3_NR_MIN, + KVM_CSV3_LAUNCH_ENCRYPT_DATA, ++ KVM_CSV3_LAUNCH_ENCRYPT_VMCB, + }; + + struct kvm_csv3_launch_encrypt_data { +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +index b0ccbd2f18..23d885f0f3 100644 +--- a/target/i386/csv-sysemu-stub.c ++++ b/target/i386/csv-sysemu-stub.c +@@ -24,3 +24,8 @@ int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + g_assert_not_reached(); + } ++ ++int csv3_launch_encrypt_vmcb(void) ++{ ++ g_assert_not_reached(); ++} +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 2a596681b8..12282ba451 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -143,3 +143,24 @@ csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp) + + return ret; + } ++ ++int ++csv3_launch_encrypt_vmcb(void) ++{ ++ int ret, fw_error; ++ ++ if (!csv3_enabled()) { ++ error_report("%s: CSV3 is not enabled", __func__); ++ return -1; ++ } ++ ++ ret = csv3_ioctl(KVM_CSV3_LAUNCH_ENCRYPT_VMCB, NULL, &fw_error); ++ if (ret) { ++ error_report("%s: CSV3 LAUNCH_ENCRYPT_VMCB ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++err: ++ return ret; ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 27b66f7857..3caf216743 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -86,6 +86,7 @@ typedef struct Csv3GuestState Csv3GuestState; + + extern struct Csv3GuestState csv3_guest; + extern int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops); ++extern int csv3_launch_encrypt_vmcb(void); + + int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 1c453b3148..6ff8891678 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -880,8 +880,12 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + } + + if (sev_es_enabled()) { +- /* measure all the VM save areas before getting launch_measure */ +- ret = sev_launch_update_vmsa(sev); ++ if (csv3_enabled()) { ++ ret = csv3_launch_encrypt_vmcb(); ++ } else { ++ /* measure all the VM save areas before getting launch_measure */ ++ ret = sev_launch_update_vmsa(sev); ++ } + if (ret) { + exit(1); + } +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-support-for-migrate-VMSA-for-CSV.patch b/target-i386-csv-Add-support-for-migrate-VMSA-for-CSV.patch new file mode 100644 index 0000000000000000000000000000000000000000..483e7848a4b7e3825ef8855e318762dc1fe16be3 --- /dev/null +++ b/target-i386-csv-Add-support-for-migrate-VMSA-for-CSV.patch @@ -0,0 +1,433 @@ +From 940858a3ab39575a0c1d91d4aa5bb65607259a8f Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Tue, 7 Jun 2022 15:19:32 +0800 +Subject: [PATCH] target/i386: csv: Add support for migrate VMSA for CSV2 guest + +CSV2 can protect guest's cpu state through memory encryption. Each +vcpu has its corresponding memory, which is also called VMSA, and +is encrypted by guest's specific encrytion key. + +When CSV2 guest exit to host, the vcpu's state will be encrypted +and saved to VMSA, and the VMSA will be decrypted and loaded to cpu +when the guest's vcpu running at next time. + +If user wants to migrate one CSV2 guest to target machine, the VMSA +of the vcpus also should be migrated to target. CSV firmware provides +SEND_UPDATE_VMSA/RECEIVE_UPDATE_VMSA API through which VMSA can be +converted into secure data and transmitted to the remote end (for +example, network transmission). + +The migration of cpu state is identified by CPUState.cpu_index which +may not equals to vcpu id from KVM's perspective. + +When migrate the VMSA, the source QEMU will invoke SEND_UPDATE_VMSA to +generate data correspond to VMSA, after target QEMU received the data, +it will calc target vcpu id in the KVM by CPUState.cpu_index, and then +invoke RECEIVE_UPDATE_VMSA to restore VMSA correspond to vcpu. + +Signed-off-by: hanliyang +--- + include/exec/confidential-guest-support.h | 6 + + linux-headers/linux/kvm.h | 16 ++ + migration/ram.c | 42 +++++ + target/i386/csv.h | 2 + + target/i386/sev.c | 201 ++++++++++++++++++++++ + target/i386/sev.h | 1 + + target/i386/trace-events | 2 + + 7 files changed, 270 insertions(+) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index cb14b815cb..2cba27642f 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -90,6 +90,12 @@ struct ConfidentialGuestMemoryEncryptionOps { + + /* Load the incoming encrypted pages queued in list into guest memory */ + int (*load_queued_incoming_pages)(QEMUFile *f); ++ ++ /* Write the encrypted cpu state */ ++ int (*save_outgoing_cpu_state)(QEMUFile *f, uint64_t *bytes_sent); ++ ++ /* Load the encrypted cpu state */ ++ int (*load_incoming_cpu_state)(QEMUFile *f); + }; + + typedef struct ConfidentialGuestSupportClass { +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index fcd09126a1..e9cd0ebaf1 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2052,6 +2052,14 @@ struct kvm_sev_send_update_data { + __u32 trans_len; + }; + ++struct kvm_sev_send_update_vmsa { ++ __u32 vcpu_id; ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ + struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; +@@ -2070,6 +2078,14 @@ struct kvm_sev_receive_update_data { + __u32 trans_len; + }; + ++struct kvm_sev_receive_update_vmsa { ++ __u32 vcpu_id; ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ + struct kvm_csv_batch_list_node { + __u64 cmd_data_addr; + __u64 addr; +diff --git a/migration/ram.c b/migration/ram.c +index 790c0413c1..1377b9eb37 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1281,6 +1281,33 @@ static int ram_save_shared_region_list(RAMState *rs, QEMUFile *f) + return 0; + } + ++/** ++ * ram_save_encrypted_cpu_state: send the encrypted cpu state ++ */ ++static int ram_save_encrypted_cpu_state(RAMState *rs, QEMUFile *f) ++{ ++ int ret; ++ uint64_t bytes_xmit = 0; ++ PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ ++ ram_transferred_add(save_page_header(pss, f, ++ pss->last_sent_block, ++ RAM_SAVE_FLAG_ENCRYPTED_DATA)); ++ qemu_put_be32(f, RAM_SAVE_ENCRYPTED_CPU_STATE); ++ ret = ops->save_outgoing_cpu_state(f, &bytes_xmit); ++ if (ret < 0) { ++ return ret; ++ } ++ ram_transferred_add(4 + bytes_xmit); ++ ++ return 0; ++} ++ + static int load_encrypted_data(QEMUFile *f, uint8_t *ptr) + { + MachineState *ms = MACHINE(qdev_get_machine()); +@@ -1305,6 +1332,8 @@ static int load_encrypted_data(QEMUFile *f, uint8_t *ptr) + return -EINVAL; + } + return ops->load_queued_incoming_pages(f); ++ } else if (flag == RAM_SAVE_ENCRYPTED_CPU_STATE) { ++ return ops->load_incoming_cpu_state(f); + } else { + error_report("unknown encrypted flag %x", flag); + return 1; +@@ -3494,6 +3523,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + qemu_file_set_error(f, ret); + return ret; + } ++ ++ /* ++ * send the encrypted cpu state, for example, CSV2 guest's ++ * vmsa for each vcpu. ++ */ ++ if (is_hygon_cpu()) { ++ ret = ram_save_encrypted_cpu_state(rs, f); ++ if (ret < 0) { ++ error_report("Failed to save encrypted cpu state"); ++ qemu_file_set_error(f, ret); ++ return ret; ++ } ++ } + } + } + +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 74a54f9b9c..47741a0a4f 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -59,5 +59,7 @@ int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); + int csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); + int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr); + int csv_load_queued_incoming_pages(QEMUFile *f); ++int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent); ++int csv_load_incoming_cpu_state(QEMUFile *f); + + #endif +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 2dee46d852..6ba71c91d7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -90,6 +90,10 @@ struct SevGuestState { + gchar *send_packet_hdr; + size_t send_packet_hdr_len; + ++ /* needed by live migration of HYGON CSV2 guest */ ++ gchar *send_vmsa_packet_hdr; ++ size_t send_vmsa_packet_hdr_len; ++ + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; +@@ -183,6 +187,9 @@ static const char *const sev_fw_errlist[] = { + #define SHARED_REGION_LIST_CONT 0x1 + #define SHARED_REGION_LIST_END 0x2 + ++#define ENCRYPTED_CPU_STATE_CONT 0x1 ++#define ENCRYPTED_CPU_STATE_END 0x2 ++ + static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_setup = sev_save_setup, + .save_outgoing_page = sev_save_outgoing_page, +@@ -194,6 +201,8 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_queued_outgoing_pages = csv_save_queued_outgoing_pages, + .queue_incoming_page = csv_queue_incoming_page, + .load_queued_incoming_pages = csv_load_queued_incoming_pages, ++ .save_outgoing_cpu_state = csv_save_outgoing_cpu_state, ++ .load_incoming_cpu_state = csv_load_incoming_cpu_state, + }; + + static int +@@ -1047,6 +1056,9 @@ sev_send_finish(void) + } + + g_free(sev_guest->send_packet_hdr); ++ if (sev_es_enabled() && is_hygon_cpu()) { ++ g_free(sev_guest->send_vmsa_packet_hdr); ++ } + sev_set_guest_state(sev_guest, SEV_STATE_RUNNING); + } + +@@ -2238,6 +2250,195 @@ int csv_load_queued_incoming_pages(QEMUFile *f) + return csv_receive_update_data_batch(s); + } + ++static int ++sev_send_vmsa_get_packet_len(int *fw_err) ++{ ++ int ret; ++ struct kvm_sev_send_update_vmsa update = { 0, }; ++ ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, ++ &update, fw_err); ++ if (*fw_err != SEV_RET_INVALID_LEN) { ++ ret = 0; ++ error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", ++ __func__, ret, *fw_err, fw_error_to_str(*fw_err)); ++ goto err; ++ } ++ ++ ret = update.hdr_len; ++ ++err: ++ return ret; ++} ++ ++static int ++sev_send_update_vmsa(SevGuestState *s, QEMUFile *f, uint32_t cpu_id, ++ uint32_t cpu_index, uint32_t size, uint64_t *bytes_sent) ++{ ++ int ret, fw_error; ++ guchar *trans = NULL; ++ struct kvm_sev_send_update_vmsa update = {}; ++ ++ /* ++ * If this is first call then query the packet header bytes and allocate ++ * the packet buffer. ++ */ ++ if (!s->send_vmsa_packet_hdr) { ++ s->send_vmsa_packet_hdr_len = sev_send_vmsa_get_packet_len(&fw_error); ++ if (s->send_vmsa_packet_hdr_len < 1) { ++ error_report("%s: SEND_UPDATE_VMSA fw_error=%d '%s'", ++ __func__, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ s->send_vmsa_packet_hdr = g_new(gchar, s->send_vmsa_packet_hdr_len); ++ } ++ ++ /* allocate transport buffer */ ++ trans = g_new(guchar, size); ++ ++ update.vcpu_id = cpu_id; ++ update.hdr_uaddr = (uintptr_t)s->send_vmsa_packet_hdr; ++ update.hdr_len = s->send_vmsa_packet_hdr_len; ++ update.trans_uaddr = (uintptr_t)trans; ++ update.trans_len = size; ++ ++ trace_kvm_sev_send_update_vmsa(cpu_id, cpu_index, trans, size); ++ ++ ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, &fw_error); ++ if (ret) { ++ error_report("%s: SEND_UPDATE_VMSA ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++ /* ++ * Migration of vCPU's VMState according to the instance_id ++ * (i.e. CPUState.cpu_index) ++ */ ++ qemu_put_be32(f, sizeof(uint32_t)); ++ qemu_put_buffer(f, (uint8_t *)&cpu_index, sizeof(uint32_t)); ++ *bytes_sent += 4 + sizeof(uint32_t); ++ ++ qemu_put_be32(f, update.hdr_len); ++ qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); ++ *bytes_sent += 4 + update.hdr_len; ++ ++ qemu_put_be32(f, update.trans_len); ++ qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ *bytes_sent += 4 + update.trans_len; ++ ++err: ++ g_free(trans); ++ return ret; ++} ++ ++int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent) ++{ ++ SevGuestState *s = sev_guest; ++ CPUState *cpu; ++ int ret = 0; ++ ++ /* Only support migrate VMSAs for HYGON CSV2 guest */ ++ if (!sev_es_enabled() || !is_hygon_cpu()) { ++ return 0; ++ } ++ ++ CPU_FOREACH(cpu) { ++ qemu_put_be32(f, ENCRYPTED_CPU_STATE_CONT); ++ *bytes_sent += 4; ++ ret = sev_send_update_vmsa(s, f, kvm_arch_vcpu_id(cpu), ++ cpu->cpu_index, TARGET_PAGE_SIZE, bytes_sent); ++ if (ret) { ++ goto err; ++ } ++ } ++ ++ qemu_put_be32(f, ENCRYPTED_CPU_STATE_END); ++ *bytes_sent += 4; ++ ++err: ++ return ret; ++} ++ ++static int sev_receive_update_vmsa(QEMUFile *f) ++{ ++ int ret = 1, fw_error = 0; ++ CPUState *cpu; ++ uint32_t cpu_index, cpu_id = 0; ++ gchar *hdr = NULL, *trans = NULL; ++ struct kvm_sev_receive_update_vmsa update = {}; ++ ++ /* get cpu index buffer */ ++ assert(qemu_get_be32(f) == sizeof(uint32_t)); ++ qemu_get_buffer(f, (uint8_t *)&cpu_index, sizeof(uint32_t)); ++ ++ CPU_FOREACH(cpu) { ++ if (cpu->cpu_index == cpu_index) { ++ cpu_id = kvm_arch_vcpu_id(cpu); ++ break; ++ } ++ } ++ update.vcpu_id = cpu_id; ++ ++ /* get packet header */ ++ update.hdr_len = qemu_get_be32(f); ++ if (!check_blob_length(update.hdr_len)) { ++ return 1; ++ } ++ ++ hdr = g_new(gchar, update.hdr_len); ++ qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); ++ update.hdr_uaddr = (uintptr_t)hdr; ++ ++ /* get transport buffer */ ++ update.trans_len = qemu_get_be32(f); ++ if (!check_blob_length(update.trans_len)) { ++ goto err; ++ } ++ ++ trans = g_new(gchar, update.trans_len); ++ update.trans_uaddr = (uintptr_t)trans; ++ qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ ++ trace_kvm_sev_receive_update_vmsa(cpu_id, cpu_index, ++ trans, update.trans_len, hdr, update.hdr_len); ++ ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_VMSA, ++ &update, &fw_error); ++ if (ret) { ++ error_report("Error RECEIVE_UPDATE_VMSA ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ } ++ ++err: ++ g_free(trans); ++ g_free(hdr); ++ return ret; ++} ++ ++int csv_load_incoming_cpu_state(QEMUFile *f) ++{ ++ int status, ret = 0; ++ ++ /* Only support migrate VMSAs for HYGON CSV2 guest */ ++ if (!sev_es_enabled() || !is_hygon_cpu()) { ++ return 0; ++ } ++ ++ status = qemu_get_be32(f); ++ while (status == ENCRYPTED_CPU_STATE_CONT) { ++ ret = sev_receive_update_vmsa(f); ++ if (ret) { ++ break; ++ } ++ ++ status = qemu_get_be32(f); ++ } ++ ++ return ret; ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +diff --git a/target/i386/sev.h b/target/i386/sev.h +index f7886116e7..209c92fd6f 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -43,6 +43,7 @@ typedef struct SevKernelLoaderContext { + + #define RAM_SAVE_ENCRYPTED_PAGE_BATCH 0x4 + #define RAM_SAVE_ENCRYPTED_PAGE_BATCH_END 0x5 ++#define RAM_SAVE_ENCRYPTED_CPU_STATE 0x6 + + #ifdef CONFIG_SEV + bool sev_enabled(void); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 475de65ad4..87b765c73c 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -17,3 +17,5 @@ kvm_sev_send_finish(void) "" + kvm_sev_receive_start(int policy, void *session, void *pdh) "policy 0x%x session %p pdh %p" + kvm_sev_receive_update_data(void *src, void *dst, int len, void *hdr, int hdr_len) "guest %p trans %p len %d hdr %p hdr_len %d" + kvm_sev_receive_finish(void) "" ++kvm_sev_send_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *dst, int len) "cpu_id %d cpu_index %d trans %p len %d" ++kvm_sev_receive_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *src, int len, void *hdr, int hdr_len) "cpu_id %d cpu_index %d trans %p len %d hdr %p hdr_len %d" +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-support-to-migrate-the-incoming--new.patch b/target-i386-csv-Add-support-to-migrate-the-incoming--new.patch new file mode 100644 index 0000000000000000000000000000000000000000..7990698b08b76233d1f8ccc26f11295ba94d2ab2 --- /dev/null +++ b/target-i386-csv-Add-support-to-migrate-the-incoming--new.patch @@ -0,0 +1,110 @@ +From b31be8b06440deccdf00de2a7886d04fe87dc802 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Fri, 17 Jun 2022 10:00:46 +0800 +Subject: [PATCH] target/i386: csv: Add support to migrate the incoming context + for CSV3 guest + +The csv3_load_incoming_context() provides the method to read incoming +guest's context from socket. It loads them into guest private memory. +This is the last step during migration and RECEIVE FINISH command is +performed by then to complete the whole migration. + +Signed-off-by: Jiang Xin +Signed-off-by: hanliyang +--- + target/i386/csv.c | 45 ++++++++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 1 + + target/i386/trace-events | 1 + + 3 files changed, 47 insertions(+) + +diff --git a/target/i386/csv.c b/target/i386/csv.c +index cc90b57e5b..571beeb61f 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -47,6 +47,7 @@ struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops = { + .queue_incoming_page = NULL, + .load_queued_incoming_pages = NULL, + .save_outgoing_cpu_state = csv3_save_outgoing_context, ++ .load_incoming_cpu_state = csv3_load_incoming_context, + }; + + #define CSV3_OUTGOING_PAGE_NUM \ +@@ -644,6 +645,42 @@ err: + return ret; + } + ++static int ++csv3_receive_encrypt_context(Csv3GuestState *s, QEMUFile *f) ++{ ++ int ret = 1, fw_error = 0; ++ gchar *hdr = NULL, *trans = NULL; ++ struct kvm_csv3_receive_encrypt_context update = {}; ++ ++ /* get packet header */ ++ update.hdr_len = qemu_get_be32(f); ++ ++ hdr = g_new(gchar, update.hdr_len); ++ qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); ++ update.hdr_uaddr = (uintptr_t)hdr; ++ ++ /* get transport buffer */ ++ update.trans_len = qemu_get_be32(f); ++ ++ trans = g_new(gchar, update.trans_len); ++ update.trans_uaddr = (uintptr_t)trans; ++ qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ ++ trace_kvm_csv3_receive_encrypt_context(trans, update.trans_len, hdr, update.hdr_len); ++ ++ ret = csv3_ioctl(KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT, &update, &fw_error); ++ if (ret) { ++ error_report("Error RECEIVE_ENCRYPT_CONTEXT ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++err: ++ g_free(trans); ++ g_free(hdr); ++ return ret; ++} ++ + int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent) + { + Csv3GuestState *s = &csv3_guest; +@@ -651,3 +688,11 @@ int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent) + /* send csv3 context. */ + return csv3_send_encrypt_context(s, f, bytes_sent); + } ++ ++int csv3_load_incoming_context(QEMUFile *f) ++{ ++ Csv3GuestState *s = &csv3_guest; ++ ++ /* receive csv3 context. */ ++ return csv3_receive_encrypt_context(s, f); ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 9f83a271fd..8621f0b6fd 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -123,6 +123,7 @@ int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + int csv3_shared_region_dma_map(uint64_t start, uint64_t end); + void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); + int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr); ++int csv3_load_incoming_context(QEMUFile *f); + int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); + int csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); + int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 043412c569..ad3cfb9612 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -25,3 +25,4 @@ kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" P + kvm_csv3_send_encrypt_data(void *dst, int len) "trans %p len %d" + kvm_csv3_send_encrypt_context(void *dst, int len) "trans %p len %d" + kvm_csv3_receive_encrypt_data(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d" ++kvm_csv3_receive_encrypt_context(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d" +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-support-to-migrate-the-incoming-.patch b/target-i386-csv-Add-support-to-migrate-the-incoming-.patch new file mode 100644 index 0000000000000000000000000000000000000000..53491fbfd32d942487b0428130cb2bf48d18ae02 --- /dev/null +++ b/target-i386-csv-Add-support-to-migrate-the-incoming-.patch @@ -0,0 +1,205 @@ +From 3434042340ca031b6d355cc79dd00e166bd2e2fd Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Fri, 17 Jun 2022 09:45:45 +0800 +Subject: [PATCH] target/i386: csv: Add support to migrate the incoming page + for CSV3 guest + +The csv3_receive_encrypt_data() provides the method to read incoming +guest private pages from socket and load them into guest memory. +The routine is similar to CSV2's. Usually, it starts with a RECEIVE +START command to create the migration context. Then RECEIVE ENCRYPT +DATA command is performed to let the firmware load incoming pages +into guest memory. After migration is completed, a RECEIVE FINISH +command is performed to the firmware. + +Signed-off-by: Jiang Xin +Signed-off-by: hanliyang +--- + target/i386/csv.c | 87 ++++++++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 2 + + target/i386/sev.c | 8 ++++ + target/i386/sev.h | 1 + + target/i386/trace-events | 1 + + 5 files changed, 99 insertions(+) + +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 22e709a95c..ac080b3766 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -38,11 +38,14 @@ bool csv_kvm_cpu_reset_inhibit; + struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops = { + .save_setup = sev_save_setup, + .save_outgoing_page = NULL, ++ .load_incoming_page = csv3_load_incoming_page, + .is_gfn_in_unshared_region = NULL, + .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, + .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, + .queue_outgoing_page = csv3_queue_outgoing_page, + .save_queued_outgoing_pages = csv3_save_queued_outgoing_pages, ++ .queue_incoming_page = NULL, ++ .load_queued_incoming_pages = NULL, + }; + + #define CSV3_OUTGOING_PAGE_NUM \ +@@ -89,6 +92,7 @@ csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) + QTAILQ_INIT(&csv3_guest.dma_map_regions_list); + qemu_mutex_init(&csv3_guest.dma_map_regions_list_mutex); + csv3_guest.sev_send_start = ops->sev_send_start; ++ csv3_guest.sev_receive_start = ops->sev_receive_start; + } + return 0; + } +@@ -483,3 +487,86 @@ csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) + + return csv3_send_encrypt_data(s, f, NULL, 0, bytes_sent); + } ++ ++static int ++csv3_receive_start(QEMUFile *f) ++{ ++ if (csv3_guest.sev_receive_start) ++ return csv3_guest.sev_receive_start(f); ++ else ++ return -1; ++} ++ ++static int csv3_receive_encrypt_data(QEMUFile *f, uint8_t *ptr) ++{ ++ int ret = 1, fw_error = 0; ++ uint32_t i, guest_addr_entry_num; ++ gchar *hdr = NULL, *trans = NULL; ++ struct guest_addr_entry *guest_addr_data; ++ struct kvm_csv3_receive_encrypt_data update = {}; ++ void *hva = NULL; ++ MemoryRegion *mr = NULL; ++ ++ /* get packet header */ ++ update.hdr_len = qemu_get_be32(f); ++ ++ hdr = g_new(gchar, update.hdr_len); ++ qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); ++ update.hdr_uaddr = (uintptr_t)hdr; ++ ++ /* get guest addr data */ ++ update.guest_addr_len = qemu_get_be32(f); ++ ++ guest_addr_data = (struct guest_addr_entry *)g_new(gchar, update.guest_addr_len); ++ qemu_get_buffer(f, (uint8_t *)guest_addr_data, update.guest_addr_len); ++ update.guest_addr_data = (uintptr_t)guest_addr_data; ++ ++ /* get transport buffer */ ++ update.trans_len = qemu_get_be32(f); ++ ++ trans = g_new(gchar, update.trans_len); ++ update.trans_uaddr = (uintptr_t)trans; ++ qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ ++ /* update share memory. */ ++ guest_addr_entry_num = update.guest_addr_len / sizeof(struct guest_addr_entry); ++ for (i = 0; i < guest_addr_entry_num; i++) { ++ if (guest_addr_data[i].share) { ++ hva = gpa2hva(&mr, ++ ((uint64_t)guest_addr_data[i].gfn << TARGET_PAGE_BITS), ++ TARGET_PAGE_SIZE, ++ NULL); ++ if (hva) ++ memcpy(hva, trans + i * TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); ++ } ++ } ++ ++ trace_kvm_csv3_receive_encrypt_data(trans, update.trans_len, hdr, update.hdr_len); ++ ++ ret = csv3_ioctl(KVM_CSV3_RECEIVE_ENCRYPT_DATA, &update, &fw_error); ++ if (ret) { ++ error_report("Error RECEIVE_ENCRYPT_DATA ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++err: ++ g_free(trans); ++ g_free(guest_addr_data); ++ g_free(hdr); ++ return ret; ++} ++ ++int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr) ++{ ++ /* ++ * If this is first buffer and SEV is not in recieiving state then ++ * use RECEIVE_START command to create a encryption context. ++ */ ++ if (!csv3_check_state(SEV_STATE_RECEIVE_UPDATE) && ++ csv3_receive_start(f)) { ++ return 1; ++ } ++ ++ return csv3_receive_encrypt_data(f, ptr); ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 12c1b22659..afcd59180c 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -108,6 +108,7 @@ struct Csv3GuestState { + size_t guest_addr_len; + + int (*sev_send_start)(QEMUFile *f, uint64_t *bytes_sent); ++ int (*sev_receive_start)(QEMUFile *f); + }; + + typedef struct Csv3GuestState Csv3GuestState; +@@ -121,6 +122,7 @@ int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + + int csv3_shared_region_dma_map(uint64_t start, uint64_t end); + void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); ++int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr); + int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); + int csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 5a96b0b452..5124bf3dee 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2665,10 +2665,18 @@ static int _sev_send_start(QEMUFile *f, uint64_t *bytes_sent) + return sev_send_start(s, f, bytes_sent); + } + ++static int _sev_receive_start(QEMUFile *f) ++{ ++ SevGuestState *s = sev_guest; ++ ++ return sev_receive_start(s, f); ++} ++ + struct sev_ops sev_ops = { + .sev_ioctl = sev_ioctl, + .fw_error_to_str = fw_error_to_str, + .sev_send_start = _sev_send_start, ++ .sev_receive_start = _sev_receive_start, + }; + + static void +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 8ccef22a95..647b426b16 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -84,6 +84,7 @@ struct sev_ops { + int (*sev_ioctl)(int fd, int cmd, void *data, int *error); + const char *(*fw_error_to_str)(int code); + int (*sev_send_start)(QEMUFile *f, uint64_t *bytes_sent); ++ int (*sev_receive_start)(QEMUFile *f); + }; + + extern struct sev_ops sev_ops; +diff --git a/target/i386/trace-events b/target/i386/trace-events +index a4a58b12a1..b3cb9aaf71 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -23,3 +23,4 @@ kvm_sev_receive_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *src, int + # csv.c + kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" PRIx64 "addr %p len 0x%" PRIx64 + kvm_csv3_send_encrypt_data(void *dst, int len) "trans %p len %d" ++kvm_csv3_receive_encrypt_data(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d" +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-support-to-migrate-the-outgoing--new.patch b/target-i386-csv-Add-support-to-migrate-the-outgoing--new.patch new file mode 100644 index 0000000000000000000000000000000000000000..bef06efc36eb4d6d34518e5b738617b5f601a1ff --- /dev/null +++ b/target-i386-csv-Add-support-to-migrate-the-outgoing--new.patch @@ -0,0 +1,139 @@ +From 0ebf32463e858c5f9cbd98e3f2fe494d0fbea259 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Fri, 17 Jun 2022 09:52:31 +0800 +Subject: [PATCH] target/i386: csv: Add support to migrate the outgoing context + for CSV3 guest + +CSV3 needs to migrate guest cpu's context pages. Prior to migration +of the context, it should query transfer buffer length and header +data length by SEND ENCRYPT CONTEXT command. New migration flag +RAM_SAVE_ENCRYPTED_CSV3_CONTEXT is defined for CSV3. + +Signed-off-by: Jiang Xin +Signed-off-by: hanliyang +--- + target/i386/csv.c | 81 ++++++++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 1 + + target/i386/trace-events | 1 + + 3 files changed, 83 insertions(+) + +diff --git a/target/i386/csv.c b/target/i386/csv.c +index ac080b3766..cc90b57e5b 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -46,6 +46,7 @@ struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops = { + .save_queued_outgoing_pages = csv3_save_queued_outgoing_pages, + .queue_incoming_page = NULL, + .load_queued_incoming_pages = NULL, ++ .save_outgoing_cpu_state = csv3_save_outgoing_context, + }; + + #define CSV3_OUTGOING_PAGE_NUM \ +@@ -570,3 +571,83 @@ int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr) + + return csv3_receive_encrypt_data(f, ptr); + } ++ ++static int ++csv3_send_get_context_len(int *fw_err, int *context_len, int *hdr_len) ++{ ++ int ret = 0; ++ struct kvm_csv3_send_encrypt_context update = { 0 }; ++ ++ ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_CONTEXT, &update, fw_err); ++ if (*fw_err != SEV_RET_INVALID_LEN) { ++ error_report("%s: failed to get context length ret=%d fw_error=%d '%s'", ++ __func__, ret, *fw_err, fw_error_to_str(*fw_err)); ++ ret = -1; ++ goto err; ++ } ++ ++ if (update.trans_len <= INT_MAX && update.hdr_len <= INT_MAX) { ++ *context_len = update.trans_len; ++ *hdr_len = update.hdr_len; ++ } ++ ret = 0; ++err: ++ return ret; ++} ++ ++static int ++csv3_send_encrypt_context(Csv3GuestState *s, QEMUFile *f, uint64_t *bytes_sent) ++{ ++ int ret, fw_error = 0; ++ int context_len = 0; ++ int hdr_len = 0; ++ guchar *trans; ++ guchar *hdr; ++ struct kvm_csv3_send_encrypt_context update = { }; ++ ++ ret = csv3_send_get_context_len(&fw_error, &context_len, &hdr_len); ++ if (context_len < 1 || hdr_len < 1) { ++ error_report("%s: fail to get context length fw_error=%d '%s'", ++ __func__, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ /* allocate transport buffer */ ++ trans = g_new(guchar, context_len); ++ hdr = g_new(guchar, hdr_len); ++ ++ update.hdr_uaddr = (uintptr_t)hdr; ++ update.hdr_len = hdr_len; ++ update.trans_uaddr = (uintptr_t)trans; ++ update.trans_len = context_len; ++ ++ trace_kvm_csv3_send_encrypt_context(trans, update.trans_len); ++ ++ ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_CONTEXT, &update, &fw_error); ++ if (ret) { ++ error_report("%s: SEND_ENCRYPT_CONTEXT ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++ qemu_put_be32(f, update.hdr_len); ++ qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); ++ *bytes_sent += 4 + update.hdr_len; ++ ++ qemu_put_be32(f, update.trans_len); ++ qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ *bytes_sent += 4 + update.trans_len; ++ ++err: ++ g_free(trans); ++ g_free(hdr); ++ return ret; ++} ++ ++int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent) ++{ ++ Csv3GuestState *s = &csv3_guest; ++ ++ /* send csv3 context. */ ++ return csv3_send_encrypt_context(s, f, bytes_sent); ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index afcd59180c..9f83a271fd 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -125,5 +125,6 @@ void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); + int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr); + int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); + int csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); ++int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent); + + #endif +diff --git a/target/i386/trace-events b/target/i386/trace-events +index b3cb9aaf71..043412c569 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -23,4 +23,5 @@ kvm_sev_receive_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *src, int + # csv.c + kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" PRIx64 "addr %p len 0x%" PRIx64 + kvm_csv3_send_encrypt_data(void *dst, int len) "trans %p len %d" ++kvm_csv3_send_encrypt_context(void *dst, int len) "trans %p len %d" + kvm_csv3_receive_encrypt_data(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d" +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Add-support-to-migrate-the-outgoing-.patch b/target-i386-csv-Add-support-to-migrate-the-outgoing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..886f065a8d588e03e8e97b830cc7a73bf508f0ca --- /dev/null +++ b/target-i386-csv-Add-support-to-migrate-the-outgoing-.patch @@ -0,0 +1,452 @@ +From 13bd2629b78f528b0b4684a643f59d30b7274aa8 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Fri, 17 Jun 2022 09:37:56 +0800 +Subject: [PATCH] target/i386: csv: Add support to migrate the outgoing page + for CSV3 guest + +The csv3_send_encrypt_data() provides the method to encrypt the +guest's private pages during migration. The routine is similar to +CSV2's. Usually, it starts with a SEND_START command to create the +migration context. Then SEND_ENCRYPT_DATA command is performed to +encrypt guest pages. After migration is completed, a SEND_FINISH +command is performed to the firmware. + +Signed-off-by: Jiang Xin +Signed-off-by: hanliyang +--- + migration/ram.c | 87 +++++++++++++++++++ + target/i386/csv.c | 182 +++++++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 22 +++++ + target/i386/sev.c | 14 ++- + target/i386/sev.h | 1 + + target/i386/trace-events | 1 + + 6 files changed, 306 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 1377b9eb37..1f9348fd06 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2480,6 +2480,90 @@ ram_save_encrypted_pages_in_batch(RAMState *rs, PageSearchStatus *pss) + } + #endif + ++/** ++ * ram_save_csv3_pages - send the given csv3 VM pages to the stream ++ */ ++static int ram_save_csv3_pages(RAMState *rs, PageSearchStatus *pss) ++{ ++ bool page_dirty; ++ int ret; ++ int tmppages, pages = 0; ++ uint8_t *p; ++ uint32_t host_len = 0; ++ uint64_t bytes_xmit = 0; ++ RAMBlock *block = pss->block; ++ ram_addr_t offset = 0; ++ hwaddr paddr = RAM_ADDR_INVALID; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); ++ struct ConfidentialGuestMemoryEncryptionOps *ops = ++ cgs_class->memory_encryption_ops; ++ ++ if (!kvm_csv3_enabled()) ++ return 0; ++ ++ do { ++ page_dirty = migration_bitmap_clear_dirty(rs, block, pss->page); ++ ++ /* Check the pages is dirty and if it is send it */ ++ if (page_dirty) { ++ ret = kvm_physical_memory_addr_from_host(kvm_state, ++ block->host + (pss->page << TARGET_PAGE_BITS), &paddr); ++ /* Process ROM or MMIO */ ++ if (paddr == RAM_ADDR_INVALID || ++ memory_region_is_rom(block->mr)) { ++ tmppages = migration_ops->ram_save_target_page(rs, pss); ++ } else { ++ /* Caculate the offset and host virtual address of the page */ ++ offset = pss->page << TARGET_PAGE_BITS; ++ p = block->host + offset; ++ ++ if (ops->queue_outgoing_page(p, TARGET_PAGE_SIZE, offset)) ++ return -1; ++ ++ tmppages = 1; ++ host_len += TARGET_PAGE_SIZE; ++ ++ stat64_add(&mig_stats.normal_pages, 1); ++ } ++ } else { ++ tmppages = 0; ++ } ++ ++ if (tmppages >= 0) { ++ pages += tmppages; ++ } else { ++ return tmppages; ++ } ++ ++ pss_find_next_dirty(pss); ++ } while (offset_in_ramblock(block, ++ ((ram_addr_t)pss->page) << TARGET_PAGE_BITS) && ++ host_len < CSV3_OUTGOING_PAGE_WINDOW_SIZE); ++ ++ /* Check if there are any queued pages */ ++ if (host_len != 0) { ++ /* Always set offset as 0 for csv3. */ ++ ram_transferred_add(save_page_header(pss, pss->pss_channel, ++ block, 0 | RAM_SAVE_FLAG_ENCRYPTED_DATA)); ++ ++ qemu_put_be32(pss->pss_channel, RAM_SAVE_ENCRYPTED_PAGE); ++ ram_transferred_add(4); ++ /* Process the queued pages in batch */ ++ ret = ops->save_queued_outgoing_pages(pss->pss_channel, &bytes_xmit); ++ if (ret) { ++ return -1; ++ } ++ ram_transferred_add(bytes_xmit); ++ } ++ ++ /* The offset we leave with is the last one we looked at */ ++ pss->page--; ++ ++ return pages; ++} ++ + /** + * ram_save_host_page: save a whole host page + * +@@ -2515,6 +2599,9 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) + return 0; + } + ++ if (kvm_csv3_enabled()) ++ return ram_save_csv3_pages(rs, pss); ++ + #ifdef CONFIG_HYGON_CSV_MIG_ACCEL + /* + * If command_batch function is enabled and memory encryption is enabled +diff --git a/target/i386/csv.c b/target/i386/csv.c +index e4706efa27..22e709a95c 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -16,8 +16,13 @@ + #include "qapi/error.h" + #include "sysemu/kvm.h" + #include "exec/address-spaces.h" ++#include "migration/blocker.h" ++#include "migration/qemu-file.h" ++#include "migration/misc.h" ++#include "monitor/monitor.h" + + #include ++#include + + #ifdef CONFIG_NUMA + #include +@@ -30,6 +35,19 @@ + + bool csv_kvm_cpu_reset_inhibit; + ++struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops = { ++ .save_setup = sev_save_setup, ++ .save_outgoing_page = NULL, ++ .is_gfn_in_unshared_region = NULL, ++ .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, ++ .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, ++ .queue_outgoing_page = csv3_queue_outgoing_page, ++ .save_queued_outgoing_pages = csv3_save_queued_outgoing_pages, ++}; ++ ++#define CSV3_OUTGOING_PAGE_NUM \ ++ (CSV3_OUTGOING_PAGE_WINDOW_SIZE / TARGET_PAGE_SIZE) ++ + Csv3GuestState csv3_guest = { 0 }; + + int +@@ -70,6 +88,7 @@ csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) + csv3_guest.fw_error_to_str = ops->fw_error_to_str; + QTAILQ_INIT(&csv3_guest.dma_map_regions_list); + qemu_mutex_init(&csv3_guest.dma_map_regions_list_mutex); ++ csv3_guest.sev_send_start = ops->sev_send_start; + } + return 0; + } +@@ -301,3 +320,166 @@ end: + qemu_mutex_unlock(&s->dma_map_regions_list_mutex); + return; + } ++ ++static inline hwaddr csv3_hva_to_gfn(uint8_t *ptr) ++{ ++ ram_addr_t offset = RAM_ADDR_INVALID; ++ ++ kvm_physical_memory_addr_from_host(kvm_state, ptr, &offset); ++ ++ return offset >> TARGET_PAGE_BITS; ++} ++ ++static int ++csv3_send_start(QEMUFile *f, uint64_t *bytes_sent) ++{ ++ if (csv3_guest.sev_send_start) ++ return csv3_guest.sev_send_start(f, bytes_sent); ++ else ++ return -1; ++} ++ ++static int ++csv3_send_get_packet_len(int *fw_err) ++{ ++ int ret; ++ struct kvm_csv3_send_encrypt_data update = {0}; ++ ++ update.hdr_len = 0; ++ update.trans_len = 0; ++ ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_DATA, &update, fw_err); ++ if (*fw_err != SEV_RET_INVALID_LEN) { ++ error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", ++ __func__, ret, *fw_err, fw_error_to_str(*fw_err)); ++ ret = 0; ++ goto err; ++ } ++ ++ if (update.hdr_len <= INT_MAX) ++ ret = update.hdr_len; ++ else ++ ret = 0; ++ ++err: ++ return ret; ++} ++ ++static int ++csv3_send_encrypt_data(Csv3GuestState *s, QEMUFile *f, ++ uint8_t *ptr, uint32_t size, uint64_t *bytes_sent) ++{ ++ int ret, fw_error = 0; ++ guchar *trans; ++ uint32_t guest_addr_entry_num; ++ uint32_t i; ++ struct kvm_csv3_send_encrypt_data update = { }; ++ ++ /* ++ * If this is first call then query the packet header bytes and allocate ++ * the packet buffer. ++ */ ++ if (!s->send_packet_hdr) { ++ s->send_packet_hdr_len = csv3_send_get_packet_len(&fw_error); ++ if (s->send_packet_hdr_len < 1) { ++ error_report("%s: SEND_UPDATE fw_error=%d '%s'", ++ __func__, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ s->send_packet_hdr = g_new(gchar, s->send_packet_hdr_len); ++ } ++ ++ if (!s->guest_addr_len || !s->guest_addr_data) { ++ error_report("%s: invalid host address or size", __func__); ++ return 1; ++ } else { ++ guest_addr_entry_num = s->guest_addr_len / sizeof(struct guest_addr_entry); ++ } ++ ++ /* allocate transport buffer */ ++ trans = g_new(guchar, guest_addr_entry_num * TARGET_PAGE_SIZE); ++ ++ update.hdr_uaddr = (uintptr_t)s->send_packet_hdr; ++ update.hdr_len = s->send_packet_hdr_len; ++ update.guest_addr_data = (uintptr_t)s->guest_addr_data; ++ update.guest_addr_len = s->guest_addr_len; ++ update.trans_uaddr = (uintptr_t)trans; ++ update.trans_len = guest_addr_entry_num * TARGET_PAGE_SIZE; ++ ++ trace_kvm_csv3_send_encrypt_data(trans, update.trans_len); ++ ++ ret = csv3_ioctl(KVM_CSV3_SEND_ENCRYPT_DATA, &update, &fw_error); ++ if (ret) { ++ error_report("%s: SEND_ENCRYPT_DATA ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++ for (i = 0; i < guest_addr_entry_num; i++) { ++ if (s->guest_addr_data[i].share) ++ memcpy(trans + i * TARGET_PAGE_SIZE, (guchar *)s->guest_hva_data[i].hva, ++ TARGET_PAGE_SIZE); ++ } ++ ++ qemu_put_be32(f, update.hdr_len); ++ qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); ++ *bytes_sent += 4 + update.hdr_len; ++ ++ qemu_put_be32(f, update.guest_addr_len); ++ qemu_put_buffer(f, (uint8_t *)update.guest_addr_data, update.guest_addr_len); ++ *bytes_sent += 4 + update.guest_addr_len; ++ ++ qemu_put_be32(f, update.trans_len); ++ qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ *bytes_sent += (4 + update.trans_len); ++ ++err: ++ s->guest_addr_len = 0; ++ g_free(trans); ++ return ret; ++} ++ ++int ++csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) ++{ ++ Csv3GuestState *s = &csv3_guest; ++ uint32_t i = 0; ++ ++ if (!s->guest_addr_data) { ++ s->guest_hva_data = g_new0(struct guest_hva_entry, CSV3_OUTGOING_PAGE_NUM); ++ s->guest_addr_data = g_new0(struct guest_addr_entry, CSV3_OUTGOING_PAGE_NUM); ++ s->guest_addr_len = 0; ++ } ++ ++ if (s->guest_addr_len >= sizeof(struct guest_addr_entry) * CSV3_OUTGOING_PAGE_NUM) { ++ error_report("Failed to queue outgoing page"); ++ return 1; ++ } ++ ++ i = s->guest_addr_len / sizeof(struct guest_addr_entry); ++ s->guest_hva_data[i].hva = (uintptr_t)ptr; ++ s->guest_addr_data[i].share = 0; ++ s->guest_addr_data[i].reserved = 0; ++ s->guest_addr_data[i].gfn = csv3_hva_to_gfn(ptr); ++ s->guest_addr_len += sizeof(struct guest_addr_entry); ++ ++ return 0; ++} ++ ++int ++csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) ++{ ++ Csv3GuestState *s = &csv3_guest; ++ ++ /* ++ * If this is a first buffer then create outgoing encryption context ++ * and write our PDH, policy and session data. ++ */ ++ if (!csv3_check_state(SEV_STATE_SEND_UPDATE) && ++ csv3_send_start(f, bytes_sent)) { ++ error_report("Failed to create outgoing context"); ++ return 1; ++ } ++ ++ return csv3_send_encrypt_data(s, f, NULL, 0, bytes_sent); ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 12733341b3..12c1b22659 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -81,6 +81,18 @@ struct dma_map_region { + QTAILQ_ENTRY(dma_map_region) list; + }; + ++#define CSV3_OUTGOING_PAGE_WINDOW_SIZE (512 * TARGET_PAGE_SIZE) ++ ++struct guest_addr_entry { ++ uint64_t share: 1; ++ uint64_t reserved: 11; ++ uint64_t gfn: 52; ++}; ++ ++struct guest_hva_entry { ++ uint64_t hva; ++}; ++ + struct Csv3GuestState { + uint32_t policy; + int sev_fd; +@@ -89,11 +101,19 @@ struct Csv3GuestState { + const char *(*fw_error_to_str)(int code); + QTAILQ_HEAD(, dma_map_region) dma_map_regions_list; + QemuMutex dma_map_regions_list_mutex; ++ gchar *send_packet_hdr; ++ size_t send_packet_hdr_len; ++ struct guest_hva_entry *guest_hva_data; ++ struct guest_addr_entry *guest_addr_data; ++ size_t guest_addr_len; ++ ++ int (*sev_send_start)(QEMUFile *f, uint64_t *bytes_sent); + }; + + typedef struct Csv3GuestState Csv3GuestState; + + extern struct Csv3GuestState csv3_guest; ++extern struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops; + extern int csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops); + extern int csv3_launch_encrypt_vmcb(void); + +@@ -101,5 +121,7 @@ int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + + int csv3_shared_region_dma_map(uint64_t start, uint64_t end); + void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); ++int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); ++int csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); + + #endif +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 0012a5efb0..5a96b0b452 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1270,7 +1270,11 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + qemu_add_vm_change_state_handler(sev_vm_state_change, sev); + migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); + +- cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; ++ if (csv3_enabled()) { ++ cgs_class->memory_encryption_ops = &csv3_memory_encryption_ops; ++ } else { ++ cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; ++ } + QTAILQ_INIT(&sev->shared_regions_list); + + /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ +@@ -2654,9 +2658,17 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static int _sev_send_start(QEMUFile *f, uint64_t *bytes_sent) ++{ ++ SevGuestState *s = sev_guest; ++ ++ return sev_send_start(s, f, bytes_sent); ++} ++ + struct sev_ops sev_ops = { + .sev_ioctl = sev_ioctl, + .fw_error_to_str = fw_error_to_str, ++ .sev_send_start = _sev_send_start, + }; + + static void +diff --git a/target/i386/sev.h b/target/i386/sev.h +index e91431e0f7..8ccef22a95 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -83,6 +83,7 @@ extern bool sev_kvm_has_msr_ghcb; + struct sev_ops { + int (*sev_ioctl)(int fd, int cmd, void *data, int *error); + const char *(*fw_error_to_str)(int code); ++ int (*sev_send_start)(QEMUFile *f, uint64_t *bytes_sent); + }; + + extern struct sev_ops sev_ops; +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 34c205ffda..a4a58b12a1 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -22,3 +22,4 @@ kvm_sev_receive_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *src, int + + # csv.c + kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" PRIx64 "addr %p len 0x%" PRIx64 ++kvm_csv3_send_encrypt_data(void *dst, int len) "trans %p len %d" +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Do-not-register-unregister-guest-sec.patch b/target-i386-csv-Do-not-register-unregister-guest-sec.patch new file mode 100644 index 0000000000000000000000000000000000000000..419aa8bc7352cc29c77745824fa28171efddb6e2 --- /dev/null +++ b/target-i386-csv-Do-not-register-unregister-guest-sec.patch @@ -0,0 +1,35 @@ +From a3e8267b93d1e77dc547fff6fb9af6f8d48a674f Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Wed, 25 Aug 2021 12:36:00 +0800 +Subject: [PATCH] target/i386: csv: Do not register/unregister guest secure + memory for CSV3 guest + +CSV3's guest memory is allocated by firmware in secure processor +from dedicated memory reserved upon system boot up, consequently +it is not necessary to add notifier to pin/unpin memory. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + target/i386/sev.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 6ff8891678..0012a5efb0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1262,7 +1262,10 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + } + +- ram_block_notifier_add(&sev_ram_notifier); ++ /* CSV3 guest do not need notifier to reg/unreg memory */ ++ if (!csv3_enabled()) { ++ ram_block_notifier_add(&sev_ram_notifier); ++ } + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev); + migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Load-initial-image-to-private-memory.patch b/target-i386-csv-Load-initial-image-to-private-memory.patch new file mode 100644 index 0000000000000000000000000000000000000000..011ce59be4132b566b400141c5496c6e7f9dad63 --- /dev/null +++ b/target-i386-csv-Load-initial-image-to-private-memory.patch @@ -0,0 +1,52 @@ +From ed3c233cc00d4c30718fc64b3afc48a51b4eb438 Mon Sep 17 00:00:00 2001 +From: jiangxin +Date: Wed, 25 Aug 2021 14:29:40 +0800 +Subject: [PATCH] target/i386: csv: Load initial image to private memory for + CSV3 guest + +The initial image of CSV3 guest should be loaded into private memory +before boot the guest. + +Add APIs to implement the image load. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + hw/i386/pc_sysfw.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index c8d9e71b88..2bbcbb8d35 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -37,6 +37,7 @@ + #include "hw/block/flash.h" + #include "sysemu/kvm.h" + #include "sev.h" ++#include "csv.h" + + #define FLASH_SECTOR_SIZE 4096 + +@@ -263,7 +264,18 @@ void x86_firmware_configure(void *ptr, int size) + error_report("failed to locate and/or save reset vector"); + exit(1); + } ++ if (csv3_enabled()) { ++ ram_addr_t offset = 0; ++ MemoryRegion *mr; + +- sev_encrypt_flash(ptr, size, &error_fatal); ++ mr = memory_region_from_host(ptr, &offset); ++ if (!mr) { ++ error_report("failed to get memory region of flash"); ++ exit(1); ++ } ++ csv3_load_data(mr->addr + offset, ptr, size, &error_fatal); ++ } else { ++ sev_encrypt_flash(ptr, size, &error_fatal); ++ } + } + } +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Read-cert-chain-from-file-when-prepa.patch b/target-i386-csv-Read-cert-chain-from-file-when-prepa.patch new file mode 100644 index 0000000000000000000000000000000000000000..e9440f46f8d6936157c839bdee0c27095ab3e0d4 --- /dev/null +++ b/target-i386-csv-Read-cert-chain-from-file-when-prepa.patch @@ -0,0 +1,140 @@ +From d23c6a2bcc836587620bd35726ca4d5f71c0a844 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Mon, 13 Nov 2023 21:55:33 +0000 +Subject: [PATCH] target/i386: csv: Read cert chain from file when prepared for + CSV live migration + +The cert chain is too long when encoded with base64, use the filename +of cert chain instead of the encoded string when prepared for CSV live +migration. + +[ Fix conflicts. ] +Signed-off-by: hanliyang +--- + qapi/migration.json | 24 +++++++++++++++--------- + target/i386/sev.c | 30 ++++++++++++++++++++++++++---- + 2 files changed, 41 insertions(+), 13 deletions(-) + +diff --git a/qapi/migration.json b/qapi/migration.json +index 038e99cba3..3aed216c3b 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -891,14 +891,16 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # +-# @sev-pdh: The target host platform diffie-hellman key encoded in base64 ++# @sev-pdh: The target host platform diffie-hellman key encoded in base64, or ++# pdh filename for hygon + # (Since 4.2) + # +-# @sev-plat-cert: The target host platform certificate chain encoded in base64 ++# @sev-plat-cert: The target host platform certificate chain encoded in base64, ++# or plat cert filename for hygon + # (Since 4.2) + # + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in +-# base64 (Since 4.2) ++# base64, or vendor cert filename for hygon (Since 4.2) + # + # Features: + # +@@ -1093,14 +1095,16 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # +-# @sev-pdh: The target host platform diffie-hellman key encoded in base64 ++# @sev-pdh: The target host platform diffie-hellman key encoded in base64, or ++# pdh filename for hygon + # (Since 4.2) + # +-# @sev-plat-cert: The target host platform certificate chain encoded in base64 ++# @sev-plat-cert: The target host platform certificate chain encoded in base64, ++# or plat cert filename for hygon + # (Since 4.2) + # + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in +-# base64 (Since 4.2) ++# base64, or vendor cert filename for hygon (Since 4.2) + # + # Features: + # +@@ -1340,14 +1344,16 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # +-# @sev-pdh: The target host platform diffie-hellman key encoded in base64 ++# @sev-pdh: The target host platform diffie-hellman key encoded in base64, or ++# pdh filename for hygon + # (Since 4.2) + # +-# @sev-plat-cert: The target host platform certificate chain encoded in base64 ++# @sev-plat-cert: The target host platform certificate chain encoded in base64, ++# or plat cert filename for hygon + # (Since 4.2) + # + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in +-# base64 (Since 4.2) ++# base64, or vendor cert filename for hygon (Since 4.2) + # + # Features: + # +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 0b0f589aee..331dfa4516 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -27,6 +27,7 @@ + #include "crypto/hash.h" + #include "sysemu/kvm.h" + #include "sev.h" ++#include "csv.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" + #include "trace.h" +@@ -979,18 +980,39 @@ int sev_save_setup(const char *pdh, const char *plat_cert, + { + SevGuestState *s = sev_guest; + +- s->remote_pdh = g_base64_decode(pdh, &s->remote_pdh_len); ++ if (is_hygon_cpu()) { ++ if (sev_read_file_base64(pdh, &s->remote_pdh, ++ &s->remote_pdh_len) < 0) { ++ goto error; ++ } ++ } else { ++ s->remote_pdh = g_base64_decode(pdh, &s->remote_pdh_len); ++ } + if (!check_blob_length(s->remote_pdh_len)) { + goto error; + } + +- s->remote_plat_cert = g_base64_decode(plat_cert, +- &s->remote_plat_cert_len); ++ if (is_hygon_cpu()) { ++ if (sev_read_file_base64(plat_cert, &s->remote_plat_cert, ++ &s->remote_plat_cert_len) < 0) { ++ goto error; ++ } ++ } else { ++ s->remote_plat_cert = g_base64_decode(plat_cert, ++ &s->remote_plat_cert_len); ++ } + if (!check_blob_length(s->remote_plat_cert_len)) { + goto error; + } + +- s->amd_cert = g_base64_decode(amd_cert, &s->amd_cert_len); ++ if (is_hygon_cpu()) { ++ if (sev_read_file_base64(amd_cert, &s->amd_cert, ++ &s->amd_cert_len) < 0) { ++ goto error; ++ } ++ } else { ++ s->amd_cert = g_base64_decode(amd_cert, &s->amd_cert_len); ++ } + if (!check_blob_length(s->amd_cert_len)) { + goto error; + } +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Release-CSV3-shared-pages-after-unma.patch b/target-i386-csv-Release-CSV3-shared-pages-after-unma.patch new file mode 100644 index 0000000000000000000000000000000000000000..51ed89e887e308c78cb4c57ee8991a62135f7501 --- /dev/null +++ b/target-i386-csv-Release-CSV3-shared-pages-after-unma.patch @@ -0,0 +1,134 @@ +From ee97f42ea46a2527d19a3e87f33994d350959a90 Mon Sep 17 00:00:00 2001 +From: eastmoutain <14304864+eastmoutain@user.noreply.gitee.com> +Date: Mon, 20 May 2024 21:12:23 +0800 +Subject: [PATCH] target/i386: csv: Release CSV3 shared pages after unmapping + DMA + +The shared pages are created for Device DMA access, release them +once DMA mapping is removed. + +Signed-off-by: yangwencheng +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 9 +++++++++ + target/i386/csv-sysemu-stub.c | 5 +++++ + target/i386/csv.c | 34 ++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 1 + + target/i386/kvm/kvm.c | 1 + + 5 files changed, 50 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 44a326fddc..a19683f1e9 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2142,6 +2142,7 @@ enum csv3_cmd_id { + KVM_CSV3_SEND_ENCRYPT_CONTEXT, + KVM_CSV3_RECEIVE_ENCRYPT_DATA, + KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT, ++ KVM_CSV3_HANDLE_MEMORY, + + KVM_CSV3_SET_GUEST_PRIVATE_MEMORY = 0xc8, + +@@ -2190,6 +2191,14 @@ struct kvm_csv3_receive_encrypt_context { + __u32 trans_len; + }; + ++#define KVM_CSV3_RELEASE_SHARED_MEMORY (0x0001) ++ ++struct kvm_csv3_handle_memory { ++ __u64 gpa; ++ __u32 num_pages; ++ __u32 opcode; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +index e49755da5c..735cce0e4b 100644 +--- a/target/i386/csv-sysemu-stub.c ++++ b/target/i386/csv-sysemu-stub.c +@@ -40,6 +40,11 @@ void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) + + } + ++void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages) ++{ ++ ++} ++ + int csv3_set_guest_private_memory(Error **errp) + { + g_assert_not_reached(); +diff --git a/target/i386/csv.c b/target/i386/csv.c +index d9b50040a3..b229f7c317 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -270,6 +270,40 @@ end: + return ret; + } + ++void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages) ++{ ++ struct kvm_csv3_handle_memory mem = { 0 }; ++ MemoryRegion *mr = NULL; ++ void *hva; ++ int ret; ++ ++ if (!csv3_enabled()) ++ return; ++ ++ if (!gpa || !num_pages) ++ return; ++ ++ mem.gpa = (__u64)gpa; ++ mem.num_pages = (__u32)num_pages; ++ mem.opcode = (__u32)KVM_CSV3_RELEASE_SHARED_MEMORY; ++ ++ /* unpin the pages */ ++ ret = csv3_ioctl(KVM_CSV3_HANDLE_MEMORY, &mem, NULL); ++ if (ret <= 0) { ++ if (ret < 0) ++ error_report("%s: CSV3 unpin failed ret %d", __func__, ret); ++ return; ++ } ++ ++ /* drop the pages */ ++ hva = gpa2hva(&mr, gpa, num_pages << TARGET_PAGE_BITS, NULL); ++ if (hva) { ++ ret = madvise(hva, num_pages << TARGET_PAGE_BITS, MADV_DONTNEED); ++ if (ret) ++ error_report("%s: madvise failed %d", __func__, ret); ++ } ++} ++ + void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) + { + MemoryRegionSection section; +diff --git a/target/i386/csv.h b/target/i386/csv.h +index fb669279a8..70f9933d3b 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -124,6 +124,7 @@ int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + + int csv3_shared_region_dma_map(uint64_t start, uint64_t end); + void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); ++void csv3_shared_region_release(uint64_t gpa, uint32_t num_pages); + int csv3_load_incoming_page(QEMUFile *f, uint8_t *ptr); + int csv3_load_incoming_context(QEMUFile *f); + int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a867512822..2df3ff99c3 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5099,6 +5099,7 @@ static int kvm_handle_exit_hypercall(X86CPU *cpu, struct kvm_run *run) + if (enc) { + sev_remove_shared_regions_list(gfn_start, gfn_end); + csv3_shared_region_dma_unmap(gpa, gfn_end << TARGET_PAGE_BITS); ++ csv3_shared_region_release(gpa, npages); + } else { + sev_add_shared_regions_list(gfn_start, gfn_end); + csv3_shared_region_dma_map(gpa, gfn_end << TARGET_PAGE_BITS); +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Request-to-set-private-memory-of-CSV.patch b/target-i386-csv-Request-to-set-private-memory-of-CSV.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7210f3fe5ec47df4f04b84beec21ec90acf0d2d --- /dev/null +++ b/target-i386-csv-Request-to-set-private-memory-of-CSV.patch @@ -0,0 +1,149 @@ +From ded4216fbfe740196a3ace80f5cb162b73f676b2 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Sat, 28 Sep 2024 17:37:17 +0800 +Subject: [PATCH] target/i386: csv: Request to set private memory of CSV3 guest + if the extension is enabled + +If Qemu negotiates with Linux KVM to enable the +KVM_CAP_HYGON_COCO_EXT_CSV3_SET_PRIV_MEM capability, then Qemu should +explicitly request the issuance of the CSV3_CMD_SET_GUEST_PRIVATE_MEMORY +command. + +Signed-off-by: hanliyang +--- + hw/i386/pc_sysfw.c | 3 +++ + include/sysemu/kvm.h | 9 +++++++++ + linux-headers/linux/kvm.h | 2 ++ + target/i386/csv-sysemu-stub.c | 5 +++++ + target/i386/csv.c | 23 +++++++++++++++++++++++ + target/i386/csv.h | 2 ++ + target/i386/trace-events | 3 ++- + 7 files changed, 46 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 2bbcbb8d35..7c6a910250 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -268,6 +268,9 @@ void x86_firmware_configure(void *ptr, int size) + ram_addr_t offset = 0; + MemoryRegion *mr; + ++ if (kvm_csv3_should_set_priv_mem()) ++ csv3_set_guest_private_memory(&error_fatal); ++ + mr = memory_region_from_host(ptr, &offset); + if (!mr) { + error_report("failed to get memory region of flash"); +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 438b4e9183..176aa53cbe 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -154,6 +154,14 @@ extern bool kvm_csv3_allowed; + */ + #define kvm_csv3_enabled() (kvm_csv3_allowed) + ++/** ++ * kvm_csv3_should_set_priv_mem: ++ * Returns: true if we should explicitly request ++ * KVM_CSV3_SET_GUEST_PRIVATE_MEMORY. ++ */ ++#define kvm_csv3_should_set_priv_mem() \ ++ (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_SET_PRIV_MEM) ++ + #else + + #define kvm_enabled() (0) +@@ -171,6 +179,7 @@ extern bool kvm_csv3_allowed; + #define kvm_readonly_mem_enabled() (false) + #define kvm_msi_devid_required() (false) + #define kvm_csv3_enabled() (false) ++#define kvm_csv3_should_set_priv_mem() (false) + + #endif /* CONFIG_KVM_IS_POSSIBLE */ + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index ab28e9af5e..84cec64b88 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2133,6 +2133,8 @@ enum csv3_cmd_id { + KVM_CSV3_RECEIVE_ENCRYPT_DATA, + KVM_CSV3_RECEIVE_ENCRYPT_CONTEXT, + ++ KVM_CSV3_SET_GUEST_PRIVATE_MEMORY = 0xc8, ++ + KVM_CSV3_NR_MAX, + }; + +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +index db22c299a6..e49755da5c 100644 +--- a/target/i386/csv-sysemu-stub.c ++++ b/target/i386/csv-sysemu-stub.c +@@ -39,3 +39,8 @@ void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) + { + + } ++ ++int csv3_set_guest_private_memory(Error **errp) ++{ ++ g_assert_not_reached(); ++} +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 4aed225763..d9b50040a3 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -698,3 +698,26 @@ int csv3_load_incoming_context(QEMUFile *f) + /* receive csv3 context. */ + return csv3_receive_encrypt_context(s, f); + } ++ ++int csv3_set_guest_private_memory(Error **errp) ++{ ++ int fw_error; ++ int ret = 0; ++ ++ if (!csv3_enabled()) { ++ error_setg(errp, "%s: CSV3 is not enabled", __func__); ++ return -1; ++ } ++ ++ /* if CSV3 is in update state then load the data to secure memory */ ++ if (csv3_check_state(SEV_STATE_LAUNCH_UPDATE)) { ++ trace_kvm_csv3_set_guest_private_memory(); ++ ret = csv3_ioctl(KVM_CSV3_SET_GUEST_PRIVATE_MEMORY, NULL, &fw_error); ++ if (ret) ++ error_setg(errp, "%s: CSV3 fail set private memory, ret=%d" ++ " fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ } ++ ++ return ret; ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index c1d4cec3e0..fb669279a8 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -130,4 +130,6 @@ int csv3_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); + int csv3_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); + int csv3_save_outgoing_context(QEMUFile *f, uint64_t *bytes_sent); + ++int csv3_set_guest_private_memory(Error **errp); ++ + #endif +diff --git a/target/i386/trace-events b/target/i386/trace-events +index ad3cfb9612..5d4a709a39 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -21,8 +21,9 @@ kvm_sev_send_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *dst, int len + kvm_sev_receive_update_vmsa(uint32_t cpu_id, uint32_t cpu_index, void *src, int len, void *hdr, int hdr_len) "cpu_id %d cpu_index %d trans %p len %d hdr %p hdr_len %d" + + # csv.c +-kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" PRIx64 "addr %p len 0x%" PRIx64 ++kvm_csv3_launch_encrypt_data(uint64_t gpa, void *addr, uint64_t len) "gpa 0x%" PRIx64 " addr %p len 0x%" PRIx64 + kvm_csv3_send_encrypt_data(void *dst, int len) "trans %p len %d" + kvm_csv3_send_encrypt_context(void *dst, int len) "trans %p len %d" + kvm_csv3_receive_encrypt_data(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d" + kvm_csv3_receive_encrypt_context(void *dst, int len, void *hdr, int hdr_len) "trans %p len %d hdr %p hdr_len %d" ++kvm_csv3_set_guest_private_memory(void) "" +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Support-inject-secret-for-CSV3-guest.patch b/target-i386-csv-Support-inject-secret-for-CSV3-guest.patch new file mode 100644 index 0000000000000000000000000000000000000000..dbb1d97944b0abe7af905c3a652a8294d2df8b80 --- /dev/null +++ b/target-i386-csv-Support-inject-secret-for-CSV3-guest.patch @@ -0,0 +1,43 @@ +From b74c6b8971610ffc9c901a9b22c92b40084a74bf Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Sun, 29 Sep 2024 15:03:47 +0800 +Subject: [PATCH] target/i386: csv: Support inject secret for CSV3 guest only + if the extension is enabled + +The CSV3 guest can only inject secrets when the +KVM_CAP_HYGON_COCO_EXT_CSV3_INJ_SECRET capability is enabled. + +Additionally, if the guest is a CSV3 guest, the guest_uaddr field of the +KVM ioctl's input should be set to the value of the GPA. + +Signed-off-by: hanliyang +--- + target/i386/sev.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 3a9c9ceec7..b4b42fd716 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1416,7 +1416,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + input.trans_uaddr = (uint64_t)(unsigned long)data; + input.trans_len = data_sz; + +- input.guest_uaddr = (uint64_t)(unsigned long)hva; ++ /* For Hygon CSV3 guest, the guest_uaddr should be the gpa */ ++ if (csv3_enabled()) { ++ if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_INJ_SECRET) { ++ input.guest_uaddr = gpa; ++ } else { ++ error_setg(errp, "CSV3 inject secret unsupported!"); ++ return 1; ++ } ++ } else { ++ input.guest_uaddr = (uint64_t)(unsigned long)hva; ++ } + input.guest_len = data_sz; + + trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-Support-load-kernel-hashes-for-CSV3-.patch b/target-i386-csv-Support-load-kernel-hashes-for-CSV3-.patch new file mode 100644 index 0000000000000000000000000000000000000000..a0930a66c415f339414f3b55f6e17967e7ee2afb --- /dev/null +++ b/target-i386-csv-Support-load-kernel-hashes-for-CSV3-.patch @@ -0,0 +1,40 @@ +From ca6d5f032ab4c93d78c90a83beefcfb05bf1ad79 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Sat, 28 Sep 2024 17:55:13 +0800 +Subject: [PATCH] target/i386: csv: Support load kernel hashes for CSV3 guest + only if the extension is enabled + +The CSV3 guest can only update kernel hashes when the +KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA capability is enabled. + +Signed-off-by: hanliyang +--- + target/i386/sev.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 721eca2150..3a9c9ceec7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2748,7 +2748,17 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { ++ if (csv3_enabled()) { ++ if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA) { ++ if (csv3_load_data(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { ++ error_report("%s: CSV3 load kernel hashes unsupported!", __func__); ++ ret = false; ++ } ++ } else if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { + ret = false; + } + +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-add-support-to-encrypt-the-outgoing-.patch b/target-i386-csv-add-support-to-encrypt-the-outgoing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b5023f8af6f8a20ce22a3d1ff9344771e632308 --- /dev/null +++ b/target-i386-csv-add-support-to-encrypt-the-outgoing-.patch @@ -0,0 +1,207 @@ +From b2091d245563f4bd2974c8d8e6ef186de614f8e2 Mon Sep 17 00:00:00 2001 +From: fangbaoshun +Date: Mon, 2 Aug 2021 11:41:58 +0800 +Subject: [PATCH] target/i386: csv: add support to encrypt the outgoing pages + in the list queued before. + +The csv_save_queued_outgoing_pages() provide the implementation to encrypt +the guest private pages during transmission. The routines uses SEND_START +command to create the outgoing encryption context on the first call then +uses COMMAND_BATCH command to send the SEND_UPDATE_DATA commands queued +in the list to encrypt the data before writing it to the socket. While +encrypting the data SEND_UPDATE_DATA produces some metadata (e.g MAC, IV). +The metadata is also sent to the target machine. After migration is completed, +we issue the SEND_FINISH command to transition the SEV guest state from sending +to unrunnable state. + +Signed-off-by: hanliyang +--- + include/exec/confidential-guest-support.h | 4 ++ + linux-headers/linux/kvm.h | 8 +++ + target/i386/csv.h | 1 + + target/i386/sev.c | 88 +++++++++++++++++++++++ + target/i386/sev.h | 3 + + 5 files changed, 104 insertions(+) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index 8949568acc..c84f8c1efc 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -80,6 +80,10 @@ struct ConfidentialGuestMemoryEncryptionOps { + + /* Queue the encrypted page and metadata associated with it into a list */ + int (*queue_outgoing_page)(uint8_t *ptr, uint32_t size, uint64_t addr); ++ ++ /* Write the list queued with encrypted pages and metadata associated ++ * with them */ ++ int (*save_queued_outgoing_pages)(QEMUFile *f, uint64_t *bytes_sent); + }; + + typedef struct ConfidentialGuestSupportClass { +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index ca78fdc8b6..fcd09126a1 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1971,6 +1971,9 @@ enum sev_cmd_id { + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, + ++ /* Hygon CSV batch command */ ++ KVM_CSV_COMMAND_BATCH = 0x18, ++ + KVM_SEV_NR_MAX, + }; + +@@ -2073,6 +2076,11 @@ struct kvm_csv_batch_list_node { + __u64 next_cmd_addr; + }; + ++struct kvm_csv_command_batch { ++ __u32 command_id; ++ __u64 csv_batch_list_uaddr; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 4c1ef20029..2a3a3119d9 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -54,5 +54,6 @@ struct CsvBatchCmdList { + }; + + int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); ++int csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); + + #endif +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7dd35d64ee..1e2bbafe36 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -191,6 +191,7 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, + .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, + .queue_outgoing_page = csv_queue_outgoing_page, ++ .save_queued_outgoing_pages = csv_save_queued_outgoing_pages, + }; + + static int +@@ -2012,6 +2013,69 @@ err: + return ret; + } + ++static int ++csv_command_batch(uint32_t cmd_id, uint64_t head_uaddr, int *fw_err) ++{ ++ int ret; ++ struct kvm_csv_command_batch command_batch = { }; ++ ++ command_batch.command_id = cmd_id; ++ command_batch.csv_batch_list_uaddr = head_uaddr; ++ ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_CSV_COMMAND_BATCH, ++ &command_batch, fw_err); ++ if (ret) { ++ error_report("%s: COMMAND_BATCH ret=%d fw_err=%d '%s'", ++ __func__, ret, *fw_err, fw_error_to_str(*fw_err)); ++ } ++ ++ return ret; ++} ++ ++static int ++csv_send_update_data_batch(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) ++{ ++ int ret, fw_error = 0; ++ struct kvm_sev_send_update_data *update; ++ struct kvm_csv_batch_list_node *node; ++ ++ ret = csv_command_batch(KVM_SEV_SEND_UPDATE_DATA, ++ (uint64_t)s->csv_batch_cmd_list->head, &fw_error); ++ if (ret) { ++ error_report("%s: csv_command_batch ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++ for (node = s->csv_batch_cmd_list->head; ++ node != NULL; ++ node = (struct kvm_csv_batch_list_node *)node->next_cmd_addr) { ++ if (node != s->csv_batch_cmd_list->head) { ++ /* head's page header is saved before send_update_data */ ++ qemu_put_be64(f, node->addr); ++ *bytes_sent += 8; ++ if (node->next_cmd_addr != 0) ++ qemu_put_be32(f, RAM_SAVE_ENCRYPTED_PAGE_BATCH); ++ else ++ qemu_put_be32(f, RAM_SAVE_ENCRYPTED_PAGE_BATCH_END); ++ *bytes_sent += 4; ++ } ++ update = (struct kvm_sev_send_update_data *)node->cmd_data_addr; ++ qemu_put_be32(f, update->hdr_len); ++ qemu_put_buffer(f, (uint8_t *)update->hdr_uaddr, update->hdr_len); ++ *bytes_sent += (4 + update->hdr_len); ++ ++ qemu_put_be32(f, update->trans_len); ++ qemu_put_buffer(f, (uint8_t *)update->trans_uaddr, update->trans_len); ++ *bytes_sent += (4 + update->trans_len); ++ } ++ ++err: ++ csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); ++ s->csv_batch_cmd_list = NULL; ++ return ret; ++} ++ + int + csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) + { +@@ -2026,6 +2090,30 @@ csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) + return csv_send_queue_data(s, ptr, sz, addr); + } + ++int ++csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) ++{ ++ SevGuestState *s = sev_guest; ++ ++ /* Only support for HYGON CSV */ ++ if (!is_hygon_cpu()) { ++ error_report("Only support transfer queued pages for HYGON CSV"); ++ return -EINVAL; ++ } ++ ++ /* ++ * If this is a first buffer then create outgoing encryption context ++ * and write our PDH, policy and session data. ++ */ ++ if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && ++ sev_send_start(s, f, bytes_sent)) { ++ error_report("Failed to create outgoing context"); ++ return 1; ++ } ++ ++ return csv_send_update_data_batch(s, f, bytes_sent); ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 84e3bdf2df..f7886116e7 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -41,6 +41,9 @@ typedef struct SevKernelLoaderContext { + #define RAM_SAVE_ENCRYPTED_PAGE 0x1 + #define RAM_SAVE_SHARED_REGIONS_LIST 0x2 + ++#define RAM_SAVE_ENCRYPTED_PAGE_BATCH 0x4 ++#define RAM_SAVE_ENCRYPTED_PAGE_BATCH_END 0x5 ++ + #ifdef CONFIG_SEV + bool sev_enabled(void); + bool sev_es_enabled(void); +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-add-support-to-load-incoming-encrypt.patch b/target-i386-csv-add-support-to-load-incoming-encrypt.patch new file mode 100644 index 0000000000000000000000000000000000000000..7af0a4e0d9c94d19a44234d6f31724c8f19ee6a7 --- /dev/null +++ b/target-i386-csv-add-support-to-load-incoming-encrypt.patch @@ -0,0 +1,107 @@ +From cb5c1c9c70110639eda0ff50c8dfcf24b0be561d Mon Sep 17 00:00:00 2001 +From: fangbaoshun +Date: Mon, 2 Aug 2021 14:11:43 +0800 +Subject: [PATCH] target/i386: csv: add support to load incoming encrypted + pages queued in the CMD list + +The csv_load_queued_incoming_pages() provide the implementation to read the +incoming guest private pages from the socket queued in the CMD list and load +them into the guest memory. The routines uses the RECEIVE_START command to +create the incoming encryption context on the first call then uses the +COMMAND_BATCH carried with RECEIEVE_UPDATE_DATA commands to load the encrypted +pages into the guest memory. After migration is completed, we issue the +RECEIVE_FINISH command to transition the SEV guest to the runnable state +so that it can be executed. + +Signed-off-by: hanliyang +--- + include/exec/confidential-guest-support.h | 3 +++ + target/i386/csv.h | 1 + + target/i386/sev.c | 32 +++++++++++++++++++++++ + 3 files changed, 36 insertions(+) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index 101cc5220a..cb14b815cb 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -87,6 +87,9 @@ struct ConfidentialGuestMemoryEncryptionOps { + + /* Queue the incoming encrypted page into a list */ + int (*queue_incoming_page)(QEMUFile *f, uint8_t *ptr); ++ ++ /* Load the incoming encrypted pages queued in list into guest memory */ ++ int (*load_queued_incoming_pages)(QEMUFile *f); + }; + + typedef struct ConfidentialGuestSupportClass { +diff --git a/target/i386/csv.h b/target/i386/csv.h +index d1bcc8bc16..977f08b982 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -56,5 +56,6 @@ struct CsvBatchCmdList { + int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); + int csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); + int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr); ++int csv_load_queued_incoming_pages(QEMUFile *f); + + #endif +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 606aaad328..2dee46d852 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -193,6 +193,7 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .queue_outgoing_page = csv_queue_outgoing_page, + .save_queued_outgoing_pages = csv_save_queued_outgoing_pages, + .queue_incoming_page = csv_queue_incoming_page, ++ .load_queued_incoming_pages = csv_load_queued_incoming_pages, + }; + + static int +@@ -2146,6 +2147,24 @@ err: + return ret; + } + ++static int ++csv_receive_update_data_batch(SevGuestState *s) ++{ ++ int ret; ++ int fw_error; ++ ++ ret = csv_command_batch(KVM_SEV_RECEIVE_UPDATE_DATA, ++ (uint64_t)s->csv_batch_cmd_list->head, &fw_error); ++ if (ret) { ++ error_report("%s: csv_command_batch ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ } ++ ++ csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); ++ s->csv_batch_cmd_list = NULL; ++ return ret; ++} ++ + int + csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) + { +@@ -2206,6 +2225,19 @@ csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) + return csv_send_update_data_batch(s, f, bytes_sent); + } + ++int csv_load_queued_incoming_pages(QEMUFile *f) ++{ ++ SevGuestState *s = sev_guest; ++ ++ /* Only support for HYGON CSV */ ++ if (!is_hygon_cpu()) { ++ error_report("Only support load queued pages for HYGON CSV"); ++ return -EINVAL; ++ } ++ ++ return csv_receive_update_data_batch(s); ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-add-support-to-queue-the-incoming-pa.patch b/target-i386-csv-add-support-to-queue-the-incoming-pa.patch new file mode 100644 index 0000000000000000000000000000000000000000..8382ae20bbdd0fbfead55de07f1f21a67660f952 --- /dev/null +++ b/target-i386-csv-add-support-to-queue-the-incoming-pa.patch @@ -0,0 +1,170 @@ +From 8125145bcd3b8348e69686e26f482cf16b16ec98 Mon Sep 17 00:00:00 2001 +From: fangbaoshun +Date: Mon, 2 Aug 2021 13:49:48 +0800 +Subject: [PATCH] target/i386: csv: add support to queue the incoming page into + a list + +The csv_queue_incoming_page() provide the implementation to queue the +guest private pages during transmission. The routines queues the incoming +socket which contains the guest private pages into a list then uses the +COMMAND_BATCH command to load the encrypted pages into the guest memory. + +Signed-off-by: hanliyang +--- + include/exec/confidential-guest-support.h | 3 + + target/i386/csv.h | 1 + + target/i386/sev.c | 92 +++++++++++++++++++++++ + 3 files changed, 96 insertions(+) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index c84f8c1efc..101cc5220a 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -84,6 +84,9 @@ struct ConfidentialGuestMemoryEncryptionOps { + /* Write the list queued with encrypted pages and metadata associated + * with them */ + int (*save_queued_outgoing_pages)(QEMUFile *f, uint64_t *bytes_sent); ++ ++ /* Queue the incoming encrypted page into a list */ ++ int (*queue_incoming_page)(QEMUFile *f, uint8_t *ptr); + }; + + typedef struct ConfidentialGuestSupportClass { +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 2a3a3119d9..d1bcc8bc16 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -55,5 +55,6 @@ struct CsvBatchCmdList { + + int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); + int csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent); ++int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr); + + #endif +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 1e2bbafe36..606aaad328 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -192,6 +192,7 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, + .queue_outgoing_page = csv_queue_outgoing_page, + .save_queued_outgoing_pages = csv_save_queued_outgoing_pages, ++ .queue_incoming_page = csv_queue_incoming_page, + }; + + static int +@@ -1941,6 +1942,15 @@ static void send_update_data_free(void *data) + g_free(update); + } + ++static void receive_update_data_free(void *data) ++{ ++ struct kvm_sev_receive_update_data *update = ++ (struct kvm_sev_receive_update_data *)data; ++ g_free((guchar *)update->hdr_uaddr); ++ g_free((guchar *)update->trans_uaddr); ++ g_free(update); ++} ++ + static int + csv_send_queue_data(SevGuestState *s, uint8_t *ptr, + uint32_t size, uint64_t addr) +@@ -2013,6 +2023,66 @@ err: + return ret; + } + ++static int ++csv_receive_queue_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr) ++{ ++ int ret = 0; ++ gchar *hdr = NULL, *trans = NULL; ++ struct kvm_sev_receive_update_data *update; ++ struct kvm_csv_batch_list_node *new_node = NULL; ++ ++ update = g_new0(struct kvm_sev_receive_update_data, 1); ++ /* get packet header */ ++ update->hdr_len = qemu_get_be32(f); ++ hdr = g_new(gchar, update->hdr_len); ++ qemu_get_buffer(f, (uint8_t *)hdr, update->hdr_len); ++ update->hdr_uaddr = (unsigned long)hdr; ++ ++ /* get transport buffer */ ++ update->trans_len = qemu_get_be32(f); ++ trans = g_new(gchar, update->trans_len); ++ update->trans_uaddr = (unsigned long)trans; ++ qemu_get_buffer(f, (uint8_t *)update->trans_uaddr, update->trans_len); ++ ++ /* set guest address,guest len is page_size */ ++ update->guest_uaddr = (uint64_t)ptr; ++ update->guest_len = TARGET_PAGE_SIZE; ++ ++ new_node = csv_batch_cmd_list_node_create((uint64_t)update, 0); ++ if (!new_node) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ if (s->csv_batch_cmd_list == NULL) { ++ s->csv_batch_cmd_list = csv_batch_cmd_list_create(new_node, ++ receive_update_data_free); ++ if (s->csv_batch_cmd_list == NULL) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ } else { ++ /* Add new_node's command address to the last_node */ ++ csv_batch_cmd_list_add_after(s->csv_batch_cmd_list, new_node); ++ } ++ ++ trace_kvm_sev_receive_update_data(trans, (void *)ptr, update->guest_len, ++ (void *)hdr, update->hdr_len); ++ ++ return ret; ++ ++err: ++ g_free(trans); ++ g_free(update); ++ g_free(hdr); ++ g_free(new_node); ++ if (s->csv_batch_cmd_list) { ++ csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); ++ s->csv_batch_cmd_list = NULL; ++ } ++ return ret; ++} ++ + static int + csv_command_batch(uint32_t cmd_id, uint64_t head_uaddr, int *fw_err) + { +@@ -2090,6 +2160,28 @@ csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) + return csv_send_queue_data(s, ptr, sz, addr); + } + ++int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) ++{ ++ SevGuestState *s = sev_guest; ++ ++ /* Only support for HYGON CSV */ ++ if (!is_hygon_cpu()) { ++ error_report("Only support enqueue received pages for HYGON CSV"); ++ return -EINVAL; ++ } ++ ++ /* ++ * If this is first buffer and SEV is not in recieiving state then ++ * use RECEIVE_START command to create a encryption context. ++ */ ++ if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && ++ sev_receive_start(s, f)) { ++ return 1; ++ } ++ ++ return csv_receive_queue_data(s, f, ptr); ++} ++ + int + csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) + { +-- +2.41.0.windows.1 + diff --git a/target-i386-csv-add-support-to-queue-the-outgoing-pa.patch b/target-i386-csv-add-support-to-queue-the-outgoing-pa.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a6da4fc9825080d5f694002f9d20d371f566202 --- /dev/null +++ b/target-i386-csv-add-support-to-queue-the-outgoing-pa.patch @@ -0,0 +1,259 @@ +From e6d587b63c3950f5d5af9002a8ae14e0904d62c3 Mon Sep 17 00:00:00 2001 +From: fangbaoshun +Date: Mon, 2 Aug 2021 11:00:07 +0800 +Subject: [PATCH] target/i386: csv: add support to queue the outgoing page into + a list + +The csv_queue_outgoing_page() provide the implementation to queue the +guest private pages during transmission. The routines queues the outgoing +pages into a listi, and then issues the KVM_CSV_COMMAND_BATCH command to +encrypt the pages togather before writing them to the socket. + +Signed-off-by: hanliyang +--- + include/exec/confidential-guest-support.h | 3 + + linux-headers/linux/kvm.h | 6 + + target/i386/csv.h | 11 ++ + target/i386/sev.c | 161 ++++++++++++++++++++++ + 4 files changed, 181 insertions(+) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index dd4887f65f..8949568acc 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -77,6 +77,9 @@ struct ConfidentialGuestMemoryEncryptionOps { + + /* Load the shared regions list */ + int (*load_incoming_shared_regions_list)(QEMUFile *f); ++ ++ /* Queue the encrypted page and metadata associated with it into a list */ ++ int (*queue_outgoing_page)(uint8_t *ptr, uint32_t size, uint64_t addr); + }; + + typedef struct ConfidentialGuestSupportClass { +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 9489a20835..ca78fdc8b6 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2067,6 +2067,12 @@ struct kvm_sev_receive_update_data { + __u32 trans_len; + }; + ++struct kvm_csv_batch_list_node { ++ __u64 cmd_data_addr; ++ __u64 addr; ++ __u64 next_cmd_addr; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +diff --git a/target/i386/csv.h b/target/i386/csv.h +index f935babe97..4c1ef20029 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -44,4 +44,15 @@ static bool __attribute__((unused)) is_hygon_cpu(void) + + #endif + ++typedef struct CsvBatchCmdList CsvBatchCmdList; ++typedef void (*CsvDestroyCmdNodeFn) (void *data); ++ ++struct CsvBatchCmdList { ++ struct kvm_csv_batch_list_node *head; ++ struct kvm_csv_batch_list_node *tail; ++ CsvDestroyCmdNodeFn destroy_fn; ++}; ++ ++int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr); ++ + #endif +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 331dfa4516..7dd35d64ee 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -95,6 +95,9 @@ struct SevGuestState { + bool reset_data_valid; + + QTAILQ_HEAD(, shared_region) shared_regions_list; ++ ++ /* link list used for HYGON CSV */ ++ CsvBatchCmdList *csv_batch_cmd_list; + }; + + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ +@@ -187,6 +190,7 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .is_gfn_in_unshared_region = sev_is_gfn_in_unshared_region, + .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list, + .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list, ++ .queue_outgoing_page = csv_queue_outgoing_page, + }; + + static int +@@ -1865,6 +1869,163 @@ bool sev_is_gfn_in_unshared_region(unsigned long gfn) + return true; + } + ++static CsvBatchCmdList * ++csv_batch_cmd_list_create(struct kvm_csv_batch_list_node *head, ++ CsvDestroyCmdNodeFn func) ++{ ++ CsvBatchCmdList *csv_batch_cmd_list = ++ g_malloc0(sizeof(*csv_batch_cmd_list)); ++ ++ if (!csv_batch_cmd_list) { ++ return NULL; ++ } ++ ++ csv_batch_cmd_list->head = head; ++ csv_batch_cmd_list->tail = head; ++ csv_batch_cmd_list->destroy_fn = func; ++ ++ return csv_batch_cmd_list; ++} ++ ++static int ++csv_batch_cmd_list_add_after(CsvBatchCmdList *list, ++ struct kvm_csv_batch_list_node *new_node) ++{ ++ list->tail->next_cmd_addr = (__u64)new_node; ++ list->tail = new_node; ++ ++ return 0; ++} ++ ++static struct kvm_csv_batch_list_node * ++csv_batch_cmd_list_node_create(uint64_t cmd_data_addr, uint64_t addr) ++{ ++ struct kvm_csv_batch_list_node *new_node = ++ g_malloc0(sizeof(struct kvm_csv_batch_list_node)); ++ ++ if (!new_node) { ++ return NULL; ++ } ++ ++ new_node->cmd_data_addr = cmd_data_addr; ++ new_node->addr = addr; ++ new_node->next_cmd_addr = 0; ++ ++ return new_node; ++} ++ ++static int csv_batch_cmd_list_destroy(CsvBatchCmdList *list) ++{ ++ struct kvm_csv_batch_list_node *node = list->head; ++ ++ while (node != NULL) { ++ if (list->destroy_fn != NULL) ++ list->destroy_fn((void *)node->cmd_data_addr); ++ ++ list->head = (struct kvm_csv_batch_list_node *)node->next_cmd_addr; ++ g_free(node); ++ node = list->head; ++ } ++ ++ g_free(list); ++ return 0; ++} ++ ++static void send_update_data_free(void *data) ++{ ++ struct kvm_sev_send_update_data *update = ++ (struct kvm_sev_send_update_data *)data; ++ g_free((guchar *)update->hdr_uaddr); ++ g_free((guchar *)update->trans_uaddr); ++ g_free(update); ++} ++ ++static int ++csv_send_queue_data(SevGuestState *s, uint8_t *ptr, ++ uint32_t size, uint64_t addr) ++{ ++ int ret = 0; ++ int fw_error; ++ guchar *trans; ++ guchar *packet_hdr; ++ struct kvm_sev_send_update_data *update; ++ struct kvm_csv_batch_list_node *new_node = NULL; ++ ++ /* If this is first call then query the packet header bytes and allocate ++ * the packet buffer. ++ */ ++ if (s->send_packet_hdr_len < 1) { ++ s->send_packet_hdr_len = sev_send_get_packet_len(&fw_error); ++ if (s->send_packet_hdr_len < 1) { ++ error_report("%s: SEND_UPDATE fw_error=%d '%s'", ++ __func__, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ } ++ ++ packet_hdr = g_new(guchar, s->send_packet_hdr_len); ++ memset(packet_hdr, 0, s->send_packet_hdr_len); ++ ++ update = g_new0(struct kvm_sev_send_update_data, 1); ++ ++ /* allocate transport buffer */ ++ trans = g_new(guchar, size); ++ ++ update->hdr_uaddr = (unsigned long)packet_hdr; ++ update->hdr_len = s->send_packet_hdr_len; ++ update->guest_uaddr = (unsigned long)ptr; ++ update->guest_len = size; ++ update->trans_uaddr = (unsigned long)trans; ++ update->trans_len = size; ++ ++ new_node = csv_batch_cmd_list_node_create((uint64_t)update, addr); ++ if (!new_node) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ if (s->csv_batch_cmd_list == NULL) { ++ s->csv_batch_cmd_list = csv_batch_cmd_list_create(new_node, ++ send_update_data_free); ++ if (s->csv_batch_cmd_list == NULL) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ } else { ++ /* Add new_node's command address to the last_node */ ++ csv_batch_cmd_list_add_after(s->csv_batch_cmd_list, new_node); ++ } ++ ++ trace_kvm_sev_send_update_data(ptr, trans, size); ++ ++ return ret; ++ ++err: ++ g_free(trans); ++ g_free(update); ++ g_free(packet_hdr); ++ g_free(new_node); ++ if (s->csv_batch_cmd_list) { ++ csv_batch_cmd_list_destroy(s->csv_batch_cmd_list); ++ s->csv_batch_cmd_list = NULL; ++ } ++ return ret; ++} ++ ++int ++csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) ++{ ++ SevGuestState *s = sev_guest; ++ ++ /* Only support for HYGON CSV */ ++ if (!is_hygon_cpu()) { ++ error_report("Only support enqueue pages for HYGON CSV"); ++ return -EINVAL; ++ } ++ ++ return csv_send_queue_data(s, ptr, sz, addr); ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +-- +2.41.0.windows.1 + diff --git a/target-i386-disable-VMX-features-if-nested-0.patch b/target-i386-disable-VMX-features-if-nested-0.patch deleted file mode 100644 index fa7edfdb2a7296a0039a44e2c4c1af9b5b324951..0000000000000000000000000000000000000000 --- a/target-i386-disable-VMX-features-if-nested-0.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 26f01427d155510edcab07e312a72f5bacddafb2 Mon Sep 17 00:00:00 2001 -From: Yang Zhong -Date: Fri, 6 Dec 2019 15:11:11 +0800 -Subject: [PATCH] target/i386: disable VMX features if nested=0 - -If kvm does not support VMX feature by nested=0, the kvm_vmx_basic -can't get the right value from MSR_IA32_VMX_BASIC register, which -make qemu coredump when qemu do KVM_SET_MSRS. - -The coredump info: -error: failed to set MSR 0x480 to 0x0 -kvm_put_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. - -Signed-off-by: Yang Zhong -Message-Id: <20191206071111.12128-1-yang.zhong@intel.com> -Reported-by: Catherine Ho -Signed-off-by: Paolo Bonzini ---- - target/i386/kvm.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index b97f40df6b..5ee0c50d7c 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -2493,6 +2493,14 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) - uint64_t kvm_vmx_basic = - kvm_arch_get_supported_msr_feature(kvm_state, - MSR_IA32_VMX_BASIC); -+ -+ if (!kvm_vmx_basic) { -+ /* If the kernel doesn't support VMX feature (kvm_intel.nested=0), -+ * then kvm_vmx_basic will be 0 and KVM_SET_MSR will fail. -+ */ -+ return; -+ } -+ - uint64_t kvm_vmx_misc = - kvm_arch_get_supported_msr_feature(kvm_state, - MSR_IA32_VMX_MISC); --- -2.27.0 - diff --git a/target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/target-i386-do-not-set-unsupported-VMX-secondary-exe.patch deleted file mode 100644 index 8eda458156b202ba7c5405bf1e261e56a7aa1771..0000000000000000000000000000000000000000 --- a/target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 472ccc3e48cab962ec9acf3f31e4467544b51705 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Tue, 31 Mar 2020 18:27:52 +0200 -Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution - controls - -Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for -secondary execution controls") added a workaround for KVM pre-dating -commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm -KVM_GET_MSRS") which wasn't setting certain available controls. The -workaround uses generic CPUID feature bits to set missing VMX controls. - -It was found that in some cases it is possible to observe hosts which -have certain CPUID features but lack the corresponding VMX control. - -In particular, it was reported that Azure VMs have RDSEED but lack -VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature -bit result in QEMU abort. - -Resolve the issue but not applying the workaround when we don't have -to. As there is no good way to find out if KVM has the fix itself, use -95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead -as these [are supposed to] come together. - -Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") -Suggested-by: Paolo Bonzini -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini ---- - target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- - 1 file changed, 26 insertions(+), 15 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 5ee0c50d7c..7328746d92 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -97,6 +97,7 @@ static bool has_msr_smi_count; - static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; -+static bool has_msr_vmx_procbased_ctls2; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -474,21 +475,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - value = msr_data.entries[0].data; - switch (index) { - case MSR_IA32_VMX_PROCBASED_CTLS2: -- /* KVM forgot to add these bits for some time, do this ourselves. */ -- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ if (!has_msr_vmx_procbased_ctls2) { -+ /* KVM forgot to add these bits for some time, do this ourselves. */ -+ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & -+ CPUID_XSAVE_XSAVES) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & -+ CPUID_EXT_RDRAND) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_INVPCID) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_RDSEED) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & -+ CPUID_EXT2_RDTSCP) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ } - } - /* fall through */ - case MSR_IA32_VMX_TRUE_PINBASED_CTLS: -@@ -1973,6 +1981,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_VMX_VMFUNC: - has_msr_vmx_vmfunc = true; - break; -+ case MSR_IA32_VMX_PROCBASED_CTLS2: -+ has_msr_vmx_procbased_ctls2 = true; -+ break; - } - } - } --- -2.27.0 - diff --git a/target-i386-enable-monitor-and-ucode-revision-with-c.patch b/target-i386-enable-monitor-and-ucode-revision-with-c.patch deleted file mode 100644 index 398a79d1648aa2d595ad39098a49c00b7b8ab95a..0000000000000000000000000000000000000000 --- a/target-i386-enable-monitor-and-ucode-revision-with-c.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 8470399d9508b3b56d625866ea235c2a5b4cb39a Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:16 +0000 -Subject: [PATCH] target/i386: enable monitor and ucode revision with -cpu max -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-7-pbonzini@redhat.com> -Patchwork-id: 93910 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -These two features were incorrectly tied to host_cpuid_required rather than -cpu->max_features. As a result, -cpu max was not enabling either MONITOR -features or ucode revision. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) - -[RHEL7: context, upstream uses g_autofree] - -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 22e0e89718..6147cd419a 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - g_free(name); - goto out; - } -+ } - -+ if (cpu->max_features && accel_uses_host_cpuid()) { - if (enable_cpu_pm) { - host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, - &cpu->mwait.ecx, &cpu->mwait.edx); --- -2.27.0 - diff --git a/target-i386-enumerate-VMX-nested-exception-support.patch b/target-i386-enumerate-VMX-nested-exception-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..92cb7464ebe90b4f683202a2045094237b1c7c0a --- /dev/null +++ b/target-i386-enumerate-VMX-nested-exception-support.patch @@ -0,0 +1,62 @@ +From 5f828613ba69ce640512a900f630515d980208dd Mon Sep 17 00:00:00 2001 +From: Xin Li +Date: Wed, 8 Nov 2023 23:20:11 -0800 +Subject: [PATCH] target/i386: enumerate VMX nested-exception support + +commit ef202d64c3020f3df03c39d3ad688732d81aaae8 upstream. + +Allow VMX nested-exception support to be exposed in KVM guests, thus +nested KVM guests can enumerate it. + +Intel-SIG: commit ef202d64c302 target/i386: enumerate VMX nested-exception support + +Tested-by: Shan Kang +Signed-off-by: Xin Li +Message-ID: <20231109072012.8078-6-xin3.li@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + scripts/kvm/vmxcap | 1 + + target/i386/cpu.c | 1 + + target/i386/cpu.h | 1 + + 3 files changed, 3 insertions(+) + +diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap +index 44898d73c2..508be19c75 100755 +--- a/scripts/kvm/vmxcap ++++ b/scripts/kvm/vmxcap +@@ -117,6 +117,7 @@ controls = [ + 54: 'INS/OUTS instruction information', + 55: 'IA32_VMX_TRUE_*_CTLS support', + 56: 'Skip checks on event error code', ++ 58: 'VMX nested exception support', + }, + msr = MSR_IA32_VMX_BASIC, + ), +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 47f00392be..00e636e61c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1344,6 +1344,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + [54] = "vmx-ins-outs", + [55] = "vmx-true-ctls", + [56] = "vmx-any-errcode", ++ [58] = "vmx-nested-exception", + }, + .msr = { + .index = MSR_IA32_VMX_BASIC, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 418daeab04..b03237c305 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1065,6 +1065,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) + #define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) + #define MSR_VMX_BASIC_ANY_ERRCODE (1ULL << 56) ++#define MSR_VMX_BASIC_NESTED_EXCEPTION (1ULL << 58) + + #define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full + #define MSR_VMX_MISC_STORE_LMA (1ULL << 5) +-- +2.41.0.windows.1 + diff --git a/target-i386-expand-feature-words-to-64-bits.patch b/target-i386-expand-feature-words-to-64-bits.patch deleted file mode 100644 index e4a06e5954aec3ec8d30e29c61234612ed36d0c7..0000000000000000000000000000000000000000 --- a/target-i386-expand-feature-words-to-64-bits.patch +++ /dev/null @@ -1,295 +0,0 @@ -From bec2d75a3d3c6405d0afe59c343d23199b009666 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 1 Jul 2019 17:38:54 +0200 -Subject: [PATCH] target/i386: expand feature words to 64 bits - -VMX requires 64-bit feature words for the IA32_VMX_EPT_VPID_CAP -and IA32_VMX_BASIC MSRs. (The VMX control MSRs are 64-bit wide but -actually have only 32 bits of information). - -Signed-off-by: Paolo Bonzini ---- - include/sysemu/kvm.h | 2 +- - target/i386/cpu.c | 71 +++++++++++++++++++++++--------------------- - target/i386/cpu.h | 2 +- - target/i386/kvm.c | 2 +- - 4 files changed, 40 insertions(+), 37 deletions(-) - -diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h -index 565adb4e2c..875b2bf10d 100644 ---- a/include/sysemu/kvm.h -+++ b/include/sysemu/kvm.h -@@ -464,7 +464,7 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension); - - uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, - uint32_t index, int reg); --uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); -+uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); - - - void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index d4a435ba96..3d6541c4a8 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -789,7 +789,7 @@ typedef struct FeatureWordInfo { - * In cases of disagreement between feature naming conventions, - * aliases may be added. - */ -- const char *feat_names[32]; -+ const char *feat_names[64]; - union { - /* If type==CPUID_FEATURE_WORD */ - struct { -@@ -803,11 +803,11 @@ typedef struct FeatureWordInfo { - uint32_t index; - } msr; - }; -- uint32_t tcg_features; /* Feature flags supported by TCG */ -- uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */ -- uint32_t migratable_flags; /* Feature flags known to be migratable */ -+ uint64_t tcg_features; /* Feature flags supported by TCG */ -+ uint64_t unmigratable_flags; /* Feature flags known to be unmigratable */ -+ uint64_t migratable_flags; /* Feature flags known to be migratable */ - /* Features that shouldn't be auto-enabled by "-cpu host" */ -- uint32_t no_autoenable_flags; -+ uint64_t no_autoenable_flags; - } FeatureWordInfo; - - static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { -@@ -1236,7 +1236,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - - typedef struct FeatureMask { - FeatureWord index; -- uint32_t mask; -+ uint64_t mask; - } FeatureMask; - - typedef struct FeatureDep { -@@ -1246,11 +1246,11 @@ typedef struct FeatureDep { - static FeatureDep feature_dependencies[] = { - { - .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, -- .to = { FEAT_ARCH_CAPABILITIES, ~0u }, -+ .to = { FEAT_ARCH_CAPABILITIES, ~0ull }, - }, - { - .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, -- .to = { FEAT_CORE_CAPABILITY, ~0u }, -+ .to = { FEAT_CORE_CAPABILITY, ~0ull }, - }, - }; - -@@ -1362,14 +1362,14 @@ const char *get_register_name_32(unsigned int reg) - * Returns the set of feature flags that are supported and migratable by - * QEMU, for a given FeatureWord. - */ --static uint32_t x86_cpu_get_migratable_flags(FeatureWord w) -+static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) - { - FeatureWordInfo *wi = &feature_word_info[w]; -- uint32_t r = 0; -+ uint64_t r = 0; - int i; - -- for (i = 0; i < 32; i++) { -- uint32_t f = 1U << i; -+ for (i = 0; i < 64; i++) { -+ uint64_t f = 1ULL << i; - - /* If the feature name is known, it is implicitly considered migratable, - * unless it is explicitly set in unmigratable_flags */ -@@ -3051,7 +3051,7 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value) - assert(pv->prop); - } - --static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, -+static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - bool migratable_only); - - static bool lmce_supported(void) -@@ -3237,7 +3237,7 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) - return false; - } - --static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, -+static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, - const char *verbose_prefix) - { - CPUX86State *env = &cpu->env; -@@ -3254,8 +3254,8 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, - return; - } - -- for (i = 0; i < 32; ++i) { -- if ((1UL << i) & mask) { -+ for (i = 0; i < 64; ++i) { -+ if ((1ULL << i) & mask) { - feat_word_str = feature_word_description(f, i); - warn_report("%s: %s%s%s [bit %d]", - verbose_prefix, -@@ -3498,7 +3498,7 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) - { -- uint32_t *array = (uint32_t *)opaque; -+ uint64_t *array = (uint64_t *)opaque; - FeatureWord w; - X86CPUFeatureWordInfo word_infos[FEATURE_WORDS] = { }; - X86CPUFeatureWordInfoList list_entries[FEATURE_WORDS] = { }; -@@ -3542,6 +3542,7 @@ static inline void feat2prop(char *s) - /* Return the feature property name for a feature flag bit */ - static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) - { -+ const char *name; - /* XSAVE components are automatically enabled by other features, - * so return the original feature name instead - */ -@@ -3555,9 +3556,11 @@ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) - } - } - -- assert(bitnr < 32); -+ assert(bitnr < 64); - assert(w < FEATURE_WORDS); -- return feature_word_info[w].feat_names[bitnr]; -+ name = feature_word_info[w].feat_names[bitnr]; -+ assert(bitnr < 32 || !(name && feature_word_info[w].type == CPUID_FEATURE_WORD)); -+ return name; - } - - /* Compatibily hack to maintain legacy +-feat semantic, -@@ -3673,10 +3676,10 @@ static void x86_cpu_list_feature_names(FeatureWordArray features, - strList **next = feat_names; - - for (w = 0; w < FEATURE_WORDS; w++) { -- uint32_t filtered = features[w]; -+ uint64_t filtered = features[w]; - int i; -- for (i = 0; i < 32; i++) { -- if (filtered & (1UL << i)) { -+ for (i = 0; i < 64; i++) { -+ if (filtered & (1ULL << i)) { - strList *new = g_new0(strList, 1); - new->value = g_strdup(x86_cpu_feature_name(w, i)); - *next = new; -@@ -3845,7 +3848,7 @@ void x86_cpu_list(void) - names = NULL; - for (i = 0; i < ARRAY_SIZE(feature_word_info); i++) { - FeatureWordInfo *fw = &feature_word_info[i]; -- for (j = 0; j < 32; j++) { -+ for (j = 0; j < 64; j++) { - if (fw->feat_names[j]) { - names = g_list_append(names, (gpointer)fw->feat_names[j]); - } -@@ -3900,11 +3903,11 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) - return cpu_list; - } - --static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, -+static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - bool migratable_only) - { - FeatureWordInfo *wi = &feature_word_info[w]; -- uint32_t r = 0; -+ uint64_t r = 0; - - if (kvm_enabled()) { - switch (wi->type) { -@@ -4075,7 +4078,7 @@ static QDict *x86_cpu_static_props(void) - for (w = 0; w < FEATURE_WORDS; w++) { - FeatureWordInfo *fi = &feature_word_info[w]; - int bit; -- for (bit = 0; bit < 32; bit++) { -+ for (bit = 0; bit < 64; bit++) { - if (!fi->feat_names[bit]) { - continue; - } -@@ -5231,7 +5234,7 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { - FeatureDep *d = &feature_dependencies[i]; - if (!(env->features[d->from.index] & d->from.mask)) { -- uint32_t unavailable_features = env->features[d->to.index] & d->to.mask; -+ uint64_t unavailable_features = env->features[d->to.index] & d->to.mask; - - /* Not an error unless the dependent feature was added explicitly. */ - mark_unavailable_features(cpu, d->to.index, -@@ -5326,10 +5329,10 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) - } - - for (w = 0; w < FEATURE_WORDS; w++) { -- uint32_t host_feat = -+ uint64_t host_feat = - x86_cpu_get_supported_feature_word(w, false); -- uint32_t requested_features = env->features[w]; -- uint32_t unavailable_features = requested_features & ~host_feat; -+ uint64_t requested_features = env->features[w]; -+ uint64_t unavailable_features = requested_features & ~host_feat; - mark_unavailable_features(cpu, w, unavailable_features, prefix); - } - -@@ -5626,7 +5629,7 @@ static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp) - - typedef struct BitProperty { - FeatureWord w; -- uint32_t mask; -+ uint64_t mask; - } BitProperty; - - static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, -@@ -5634,7 +5637,7 @@ static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, - { - X86CPU *cpu = X86_CPU(obj); - BitProperty *fp = opaque; -- uint32_t f = cpu->env.features[fp->w]; -+ uint64_t f = cpu->env.features[fp->w]; - bool value = (f & fp->mask) == fp->mask; - visit_type_bool(v, name, &value, errp); - } -@@ -5687,7 +5690,7 @@ static void x86_cpu_register_bit_prop(X86CPU *cpu, - { - BitProperty *fp; - ObjectProperty *op; -- uint32_t mask = (1UL << bitnr); -+ uint64_t mask = (1ULL << bitnr); - - op = object_property_find(OBJECT(cpu), prop_name, NULL); - if (op) { -@@ -5821,7 +5824,7 @@ static void x86_cpu_initfn(Object *obj) - for (w = 0; w < FEATURE_WORDS; w++) { - int bitnr; - -- for (bitnr = 0; bitnr < 32; bitnr++) { -+ for (bitnr = 0; bitnr < 64; bitnr++) { - x86_cpu_register_feature_bit_props(cpu, w, bitnr); - } - } -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 24d489db0f..9a105b2251 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -502,7 +502,7 @@ typedef enum FeatureWord { - FEATURE_WORDS, - } FeatureWord; - --typedef uint32_t FeatureWordArray[FEATURE_WORDS]; -+typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - - /* cpuid_features bits */ - #define CPUID_FP87 (1U << 0) -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index f55d4b4b97..e9a6293ab2 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -437,7 +437,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, - return ret; - } - --uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) -+uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - { - struct { - struct kvm_msrs info; --- -2.27.0 - diff --git a/target-i386-fix-feature-dependency-for-WAITPKG.patch b/target-i386-fix-feature-dependency-for-WAITPKG.patch new file mode 100644 index 0000000000000000000000000000000000000000..0cea61cff54b0ecfa751efeadc88ca04853c2ad2 --- /dev/null +++ b/target-i386-fix-feature-dependency-for-WAITPKG.patch @@ -0,0 +1,39 @@ +From bce44f92530fed18cac1e51f81217a6addf992bd Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 8 May 2024 11:10:54 +0200 +Subject: [PATCH] target/i386: fix feature dependency for WAITPKG + +commit fe01af5d47d4cf7fdf90c54d43f784e5068c8d72 upstream. + +The VMX feature bit depends on general availability of WAITPKG, +not the other way round. + +Intel-SIG: commit fe01af5d47d4 target/i386: fix feature dependency for WAITPKG + +Fixes: 33cc88261c3 ("target/i386: add support for VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE", 2023-08-28) +Cc: qemu-stable@nongnu.org +Reviewed-by: Zhao Liu +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f3df62127c..860934b39f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1550,8 +1550,8 @@ static FeatureDep feature_dependencies[] = { + .to = { FEAT_SVM, ~0ull }, + }, + { +- .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, +- .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, ++ .from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, + }, + }; + +-- +2.41.0.windows.1 + diff --git a/target-i386-fix-hang-when-using-slow-path-for-ptw_se.patch b/target-i386-fix-hang-when-using-slow-path-for-ptw_se.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c50e9d3b0bad6d95350926ba05926ac22ac3a0d --- /dev/null +++ b/target-i386-fix-hang-when-using-slow-path-for-ptw_se.patch @@ -0,0 +1,59 @@ +From ddb2cb652db80b24ba5ddf0b00dd3ba3f9224eba Mon Sep 17 00:00:00 2001 +From: Pierrick Bouvier +Date: Fri, 25 Oct 2024 10:58:56 -0700 +Subject: [PATCH] target/i386: fix hang when using slow path for ptw_setl + +When instrumenting memory accesses for plugin, we force memory accesses +to use the slow path for mmu [1]. This create a situation where we end +up calling ptw_setl_slow. This was fixed recently in [2] but the issue +still could appear out of plugins use case. + +Since this function gets called during a cpu_exec, start_exclusive then +hangs. This exclusive section was introduced initially for security +reasons [3]. + +I suspect this code path was never triggered, because ptw_setl_slow +would always be called transitively from cpu_exec, resulting in a hang. + +[1] https://gitlab.com/qemu-project/qemu/-/commit/6d03226b42247b68ab2f0b3663e0f624335a4055 +[2] https://gitlab.com/qemu-project/qemu/-/commit/115ade42d50144c15b74368d32dc734ea277d853 +[2] https://gitlab.com/qemu-project/qemu/-/commit/9a96406787afcc9960fbe8791892c78311d6971f in 8.2.x series +[3] https://gitlab.com/qemu-project/qemu/-/issues/279 + +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/2566 +Signed-off-by: Pierrick Bouvier +Reviewed-by: Richard Henderson +Message-ID: <20241025175857.2554252-2-pierrick.bouvier@linaro.org> +Signed-off-by: Richard Henderson +(cherry picked from commit 7ba055b49b74c4d2f4a338c5198485bdff373fb1) +Signed-off-by: zhujun2 +--- + target/i386/tcg/sysemu/excp_helper.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c +index 5b86f439ad..294dbc50e2 100644 +--- a/target/i386/tcg/sysemu/excp_helper.c ++++ b/target/i386/tcg/sysemu/excp_helper.c +@@ -107,6 +107,10 @@ static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new) + { + uint32_t cmp; + ++ CPUState *cpu = env_cpu(in->env); ++ /* We are in cpu_exec, and start_exclusive can't be called directly.*/ ++ g_assert(cpu->running); ++ cpu_exec_end(cpu); + /* Does x86 really perform a rmw cycle on mmio for ptw? */ + start_exclusive(); + cmp = cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0); +@@ -114,6 +118,7 @@ static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new) + cpu_stl_mmuidx_ra(in->env, in->gaddr, new, in->ptw_idx, 0); + } + end_exclusive(); ++ cpu_exec_start(cpu); + return cmp == old; + } + +-- +2.41.0.windows.1 + diff --git a/target-i386-fix-size-of-EBP-writeback-in-gen_enter.patch b/target-i386-fix-size-of-EBP-writeback-in-gen_enter.patch new file mode 100644 index 0000000000000000000000000000000000000000..9690299f4d3e2ecd54c41c8ad1382ab53c9b12ec --- /dev/null +++ b/target-i386-fix-size-of-EBP-writeback-in-gen_enter.patch @@ -0,0 +1,43 @@ +From 6d0eefdf70135a01476b787df50f34da77ae5529 Mon Sep 17 00:00:00 2001 +From: Mark Cave-Ayland +Date: Thu, 6 Jun 2024 10:53:19 +0100 +Subject: [PATCH] target/i386: fix size of EBP writeback in gen_enter() + +The calculation of FrameTemp is done using the size indicated by mo_pushpop() +before being written back to EBP, but the final writeback to EBP is done using +the size indicated by mo_stacksize(). + +In the case where mo_pushpop() is MO_32 and mo_stacksize() is MO_16 then the +final writeback to EBP is done using MO_16 which can leave junk in the top +16-bits of EBP after executing ENTER. + +Change the writeback of EBP to use the same size indicated by mo_pushpop() to +ensure that the full value is written back. + +Signed-off-by: Mark Cave-Ayland +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2198 +Message-ID: <20240606095319.229650-5-mark.cave-ayland@ilande.co.uk> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3973615e7fbaeef1deeaa067577e373781ced70a) +Signed-off-by: zhujun2 +--- + target/i386/tcg/translate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c +index dc672d7995..19b8250452 100644 +--- a/target/i386/tcg/translate.c ++++ b/target/i386/tcg/translate.c +@@ -2661,7 +2661,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) + } + + /* Copy the FrameTemp value to EBP. */ +- gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1); ++ gen_op_mov_reg_v(s, d_ot, R_EBP, s->T1); + + /* Compute the final value of ESP. */ + tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level); +-- +2.41.0.windows.1 + diff --git a/target-i386-get-set-migrate-GHCB-state.patch b/target-i386-get-set-migrate-GHCB-state.patch new file mode 100644 index 0000000000000000000000000000000000000000..4e09ad530f33c90e9f447a4a9c5e659f41aeb26e --- /dev/null +++ b/target-i386-get-set-migrate-GHCB-state.patch @@ -0,0 +1,190 @@ +From 6a8b58a3ce6dc162cae4b74ca8f39392672e6cba Mon Sep 17 00:00:00 2001 +From: panpingsheng +Date: Sat, 12 Jun 2021 15:15:29 +0800 +Subject: [PATCH] target/i386: get/set/migrate GHCB state + +GHCB state is necessary to CSV2 guest when migrating to target. + +Add GHCB related definition, it also adds corresponding part +to kvm_get/put, and vmstate. + +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 2 ++ + target/i386/cpu.h | 5 +++++ + target/i386/kvm/kvm.c | 11 +++++++++++ + target/i386/kvm/sev-stub.c | 2 ++ + target/i386/machine.c | 24 ++++++++++++++++++++++++ + target/i386/sev.c | 10 ++++++++++ + target/i386/sev.h | 2 ++ + 7 files changed, 56 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index e9cd0ebaf1..e796105b76 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1203,6 +1203,8 @@ struct kvm_ppc_resize_hpt { + + #define KVM_CAP_ARM_TMM 300 + ++#define KVM_CAP_SEV_ES_GHCB 500 ++ + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + + #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 6993552cd9..a9a646bba2 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -520,6 +520,8 @@ typedef enum X86Seg { + + #define MSR_VM_HSAVE_PA 0xc0010117 + ++#define MSR_AMD64_SEV_ES_GHCB 0xc0010130 ++ + #define MSR_IA32_XFD 0x000001c4 + #define MSR_IA32_XFD_ERR 0x000001c5 + +@@ -1885,6 +1887,9 @@ typedef struct CPUArchState { + + /* Number of dies within this CPU package. */ + unsigned nr_dies; ++ ++ /* GHCB guest physical address info */ ++ uint64_t ghcb_gpa; + } CPUX86State; + + struct kvm_msrs; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5730d0e0c0..9e65242739 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3625,6 +3625,10 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + } + ++ if (sev_kvm_has_msr_ghcb) { ++ kvm_msr_entry_add(cpu, MSR_AMD64_SEV_ES_GHCB, env->ghcb_gpa); ++ } ++ + return kvm_buf_set_msrs(cpu); + } + +@@ -3999,6 +4003,10 @@ static int kvm_get_msrs(X86CPU *cpu) + } + } + ++ if (sev_kvm_has_msr_ghcb) { ++ kvm_msr_entry_add(cpu, MSR_AMD64_SEV_ES_GHCB, 0); ++ } ++ + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); + if (ret < 0) { + return ret; +@@ -4319,6 +4327,9 @@ static int kvm_get_msrs(X86CPU *cpu) + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31: + env->lbr_records[index - MSR_ARCH_LBR_INFO_0].info = msrs[i].data; + break; ++ case MSR_AMD64_SEV_ES_GHCB: ++ env->ghcb_gpa = msrs[i].data; ++ break; + } + } + +diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c +index 99899688e4..a0aac1117f 100644 +--- a/target/i386/kvm/sev-stub.c ++++ b/target/i386/kvm/sev-stub.c +@@ -14,6 +14,8 @@ + #include "qemu/osdep.h" + #include "sev.h" + ++bool sev_kvm_has_msr_ghcb; ++ + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + /* If we get here, cgs must be some non-SEV thing */ +diff --git a/target/i386/machine.c b/target/i386/machine.c +index a1041ef828..9a1cb8f3b8 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -1605,6 +1605,27 @@ static const VMStateDescription vmstate_triple_fault = { + } + }; + ++#if defined(CONFIG_KVM) && defined(TARGET_X86_64) ++static bool msr_ghcb_gpa_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return env->ghcb_gpa != 0; ++} ++ ++static const VMStateDescription vmstate_msr_ghcb_gpa = { ++ .name = "cpu/svm_msr_ghcb_gpa", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = msr_ghcb_gpa_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.ghcb_gpa, X86CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++#endif ++ + const VMStateDescription vmstate_x86_cpu = { + .name = "cpu", + .version_id = 12, +@@ -1751,6 +1772,9 @@ const VMStateDescription vmstate_x86_cpu = { + #endif + &vmstate_arch_lbr, + &vmstate_triple_fault, ++#if defined(CONFIG_KVM) && defined(TARGET_X86_64) ++ &vmstate_msr_ghcb_gpa, ++#endif + NULL + } + }; +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 6ba71c91d7..7744378112 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -152,6 +152,8 @@ QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + static SevGuestState *sev_guest; + static Error *sev_mig_blocker; + ++bool sev_kvm_has_msr_ghcb; ++ + static const char *const sev_fw_errlist[] = { + [SEV_RET_SUCCESS] = "", + [SEV_RET_INVALID_PLATFORM_STATE] = "Platform state is invalid", +@@ -1198,6 +1200,14 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; + QTAILQ_INIT(&sev->shared_regions_list); + ++ /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ ++ if (sev_es_enabled()) { ++ sev_kvm_has_msr_ghcb = ++ kvm_vm_check_extension(kvm_state, KVM_CAP_SEV_ES_GHCB); ++ } else { ++ sev_kvm_has_msr_ghcb = false; ++ } ++ + cgs->ready = true; + + return 0; +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 209c92fd6f..0bfe3879ef 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -78,4 +78,6 @@ void sev_del_migrate_blocker(void); + + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); + ++extern bool sev_kvm_has_msr_ghcb; ++ + #endif +-- +2.41.0.windows.1 + diff --git a/target-i386-handle-filtered_features-in-a-new-functi.patch b/target-i386-handle-filtered_features-in-a-new-functi.patch deleted file mode 100644 index ba35948dd1e1c10f566c327750026c22b626a5f2..0000000000000000000000000000000000000000 --- a/target-i386-handle-filtered_features-in-a-new-functi.patch +++ /dev/null @@ -1,176 +0,0 @@ -From b9d29966103ca671718ef1eb5b68067b05fad340 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 2 Jul 2019 15:32:41 +0200 -Subject: [PATCH] target/i386: handle filtered_features in a new function - mark_unavailable_features - -The next patch will add a different reason for filtering features, unrelated -to host feature support. Extract a new function that takes care of disabling -the features and optionally reporting them. - -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.c | 87 ++++++++++++++++++++++++++--------------------- - 1 file changed, 48 insertions(+), 39 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index e65f372f25..8798cafc7a 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3216,17 +3216,41 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) - return NULL; - } - --static void report_unavailable_features(FeatureWord w, uint32_t mask) -+static bool x86_cpu_have_filtered_features(X86CPU *cpu) - { -+ FeatureWord w; -+ -+ for (w = 0; w < FEATURE_WORDS; w++) { -+ if (cpu->filtered_features[w]) { -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, -+ const char *verbose_prefix) -+{ -+ CPUX86State *env = &cpu->env; - FeatureWordInfo *f = &feature_word_info[w]; - int i; - char *feat_word_str; - -+ if (!cpu->force_features) { -+ env->features[w] &= ~mask; -+ } -+ cpu->filtered_features[w] |= mask; -+ -+ if (!verbose_prefix) { -+ return; -+ } -+ - for (i = 0; i < 32; ++i) { - if ((1UL << i) & mask) { - feat_word_str = feature_word_description(f, i); -- warn_report("%s doesn't support requested feature: %s%s%s [bit %d]", -- accel_uses_host_cpuid() ? "host" : "TCG", -+ warn_report("%s: %s%s%s [bit %d]", -+ verbose_prefix, - feat_word_str, - f->feat_names[i] ? "." : "", - f->feat_names[i] ? f->feat_names[i] : "", i); -@@ -3631,7 +3655,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, - } - - static void x86_cpu_expand_features(X86CPU *cpu, Error **errp); --static int x86_cpu_filter_features(X86CPU *cpu); -+static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); - - /* Build a list with the name of all features on a feature word array */ - static void x86_cpu_list_feature_names(FeatureWordArray features, -@@ -3696,7 +3720,7 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, - next = &new->next; - } - -- x86_cpu_filter_features(xc); -+ x86_cpu_filter_features(xc, false); - - x86_cpu_list_feature_names(xc->filtered_features, next); - -@@ -3904,15 +3928,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, - return r; - } - --static void x86_cpu_report_filtered_features(X86CPU *cpu) --{ -- FeatureWord w; -- -- for (w = 0; w < FEATURE_WORDS; w++) { -- report_unavailable_features(w, cpu->filtered_features[w]); -- } --} -- - static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) - { - PropValue *pv; -@@ -5274,24 +5289,24 @@ out: - * - * Returns: 0 if all flags are supported by the host, non-zero otherwise. - */ --static int x86_cpu_filter_features(X86CPU *cpu) -+static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) - { - CPUX86State *env = &cpu->env; - FeatureWord w; -- int rv = 0; -+ const char *prefix = NULL; -+ -+ if (verbose) { -+ prefix = accel_uses_host_cpuid() -+ ? "host doesn't support requested feature" -+ : "TCG doesn't support requested feature"; -+ } - - for (w = 0; w < FEATURE_WORDS; w++) { - uint32_t host_feat = - x86_cpu_get_supported_feature_word(w, false); - uint32_t requested_features = env->features[w]; -- uint32_t available_features = requested_features & host_feat; -- if (!cpu->force_features) { -- env->features[w] = available_features; -- } -- cpu->filtered_features[w] = requested_features & ~available_features; -- if (cpu->filtered_features[w]) { -- rv = 1; -- } -+ uint32_t unavailable_features = requested_features & ~host_feat; -+ mark_unavailable_features(cpu, w, unavailable_features, prefix); - } - - if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && -@@ -5317,13 +5332,9 @@ static int x86_cpu_filter_features(X86CPU *cpu) - * host can't emulate the capabilities we report on - * cpu_x86_cpuid(), intel-pt can't be enabled on the current host. - */ -- env->features[FEAT_7_0_EBX] &= ~CPUID_7_0_EBX_INTEL_PT; -- cpu->filtered_features[FEAT_7_0_EBX] |= CPUID_7_0_EBX_INTEL_PT; -- rv = 1; -+ mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); - } - } -- -- return rv; - } - - static void x86_cpu_realizefn(DeviceState *dev, Error **errp) -@@ -5364,16 +5375,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - goto out; - } - -- if (x86_cpu_filter_features(cpu) && -- (cpu->check_cpuid || cpu->enforce_cpuid)) { -- x86_cpu_report_filtered_features(cpu); -- if (cpu->enforce_cpuid) { -- error_setg(&local_err, -- accel_uses_host_cpuid() ? -- "Host doesn't support requested features" : -- "TCG doesn't support requested features"); -- goto out; -- } -+ x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); -+ -+ if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { -+ error_setg(&local_err, -+ accel_uses_host_cpuid() ? -+ "Host doesn't support requested features" : -+ "TCG doesn't support requested features"); -+ goto out; - } - - /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on --- -2.27.0 - diff --git a/target-i386-introduce-generic-feature-dependency-mec.patch b/target-i386-introduce-generic-feature-dependency-mec.patch deleted file mode 100644 index da374c58652d5559993c9a584d7c83377d6669cd..0000000000000000000000000000000000000000 --- a/target-i386-introduce-generic-feature-dependency-mec.patch +++ /dev/null @@ -1,146 +0,0 @@ -From ed8fa9d895a0e06434b4163405aeaacbe65bcf44 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 1 Jul 2019 17:26:45 +0200 -Subject: [PATCH] target/i386: introduce generic feature dependency mechanism - -Sometimes a CPU feature does not make sense unless another is -present. In the case of VMX features, KVM does not even allow -setting the VMX controls to some invalid combinations. - -Therefore, this patch adds a generic mechanism that looks for bits -that the user explicitly cleared, and uses them to remove other bits -from the expanded CPU definition. If these dependent bits were also -explicitly *set* by the user, this will be a warning for "-cpu check" -and an error for "-cpu enforce". If not, then the dependent bits are -cleared silently, for convenience. - -With VMX features, this will be used so that for example -"-cpu host,-rdrand" will also hide support for RDRAND exiting. - -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.c | 72 +++++++++++++++++++++++++++++++---------------- - 1 file changed, 48 insertions(+), 24 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 8798cafc7a..d4a435ba96 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -801,10 +801,6 @@ typedef struct FeatureWordInfo { - /* If type==MSR_FEATURE_WORD */ - struct { - uint32_t index; -- struct { /*CPUID that enumerate this MSR*/ -- FeatureWord cpuid_class; -- uint32_t cpuid_flag; -- } cpuid_dep; - } msr; - }; - uint32_t tcg_features; /* Feature flags supported by TCG */ -@@ -1218,10 +1214,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - }, - .msr = { - .index = MSR_IA32_ARCH_CAPABILITIES, -- .cpuid_dep = { -- FEAT_7_0_EDX, -- CPUID_7_0_EDX_ARCH_CAPABILITIES -- } - }, - }, - [FEAT_CORE_CAPABILITY] = { -@@ -1238,14 +1230,30 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - }, - .msr = { - .index = MSR_IA32_CORE_CAPABILITY, -- .cpuid_dep = { -- FEAT_7_0_EDX, -- CPUID_7_0_EDX_CORE_CAPABILITY, -- }, - }, - }, - }; - -+typedef struct FeatureMask { -+ FeatureWord index; -+ uint32_t mask; -+} FeatureMask; -+ -+typedef struct FeatureDep { -+ FeatureMask from, to; -+} FeatureDep; -+ -+static FeatureDep feature_dependencies[] = { -+ { -+ .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, -+ .to = { FEAT_ARCH_CAPABILITIES, ~0u }, -+ }, -+ { -+ .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, -+ .to = { FEAT_CORE_CAPABILITY, ~0u }, -+ }, -+}; -+ - typedef struct X86RegisterInfo32 { - /* Name of register */ - const char *name; -@@ -5183,9 +5191,26 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - { - CPUX86State *env = &cpu->env; - FeatureWord w; -+ int i; - GList *l; - Error *local_err = NULL; - -+ for (l = plus_features; l; l = l->next) { -+ const char *prop = l->data; -+ object_property_set_bool(OBJECT(cpu), true, prop, &local_err); -+ if (local_err) { -+ goto out; -+ } -+ } -+ -+ for (l = minus_features; l; l = l->next) { -+ const char *prop = l->data; -+ object_property_set_bool(OBJECT(cpu), false, prop, &local_err); -+ if (local_err) { -+ goto out; -+ } -+ } -+ - /*TODO: Now cpu->max_features doesn't overwrite features - * set using QOM properties, and we can convert - * plus_features & minus_features to global properties -@@ -5203,19 +5228,18 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - } - } - -- for (l = plus_features; l; l = l->next) { -- const char *prop = l->data; -- object_property_set_bool(OBJECT(cpu), true, prop, &local_err); -- if (local_err) { -- goto out; -- } -- } -+ for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { -+ FeatureDep *d = &feature_dependencies[i]; -+ if (!(env->features[d->from.index] & d->from.mask)) { -+ uint32_t unavailable_features = env->features[d->to.index] & d->to.mask; - -- for (l = minus_features; l; l = l->next) { -- const char *prop = l->data; -- object_property_set_bool(OBJECT(cpu), false, prop, &local_err); -- if (local_err) { -- goto out; -+ /* Not an error unless the dependent feature was added explicitly. */ -+ mark_unavailable_features(cpu, d->to.index, -+ unavailable_features & env->user_features[d->to.index], -+ "This feature depends on other features that were not requested"); -+ -+ env->user_features[d->to.index] |= unavailable_features; -+ env->features[d->to.index] &= ~unavailable_features; - } - } - --- -2.27.0 - diff --git a/target-i386-kvm-Fix-the-resettable-info-when-emulate.patch b/target-i386-kvm-Fix-the-resettable-info-when-emulate.patch new file mode 100644 index 0000000000000000000000000000000000000000..171bde129a285f1627ce50f4158834460199bb6d --- /dev/null +++ b/target-i386-kvm-Fix-the-resettable-info-when-emulate.patch @@ -0,0 +1,179 @@ +From 366c11c56875ae053043c48c8b93349c6e3125cc Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Sun, 19 Jun 2022 16:49:45 +0800 +Subject: [PATCH] target/i386/kvm: Fix the resettable info when emulate Hygon + CSV2 guest + +SEV-ES guest will be terminated by QEMU when receive reboot request. +In order to support reboot for CSV2 guest, report resettable in +kvm_arch_cpu_check_are_resettable(). But the CSV2 guest is still not +resettable if it was migrated to target machine. + +Signed-off-by: hanliyang +--- + target/i386/csv-sysemu-stub.c | 16 ++++++++++++++++ + target/i386/csv.c | 20 ++++++++++++++++++++ + target/i386/csv.h | 2 ++ + target/i386/kvm/csv-stub.c | 17 +++++++++++++++++ + target/i386/kvm/kvm.c | 4 ++++ + target/i386/kvm/meson.build | 1 + + target/i386/meson.build | 1 + + target/i386/sev.c | 9 +++++++++ + 8 files changed, 70 insertions(+) + create mode 100644 target/i386/csv-sysemu-stub.c + create mode 100644 target/i386/csv.c + create mode 100644 target/i386/kvm/csv-stub.c + +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +new file mode 100644 +index 0000000000..5874e4cc1d +--- /dev/null ++++ b/target/i386/csv-sysemu-stub.c +@@ -0,0 +1,16 @@ ++/* ++ * QEMU CSV system stub ++ * ++ * Copyright: Hygon Info Technologies Ltd. 2022 ++ * ++ * Author: ++ * Jiang Xin ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "sev.h" ++#include "csv.h" +diff --git a/target/i386/csv.c b/target/i386/csv.c +new file mode 100644 +index 0000000000..88fb05ac37 +--- /dev/null ++++ b/target/i386/csv.c +@@ -0,0 +1,20 @@ ++/* ++ * QEMU CSV support ++ * ++ * Copyright: Hygon Info Technologies Ltd. 2022 ++ * ++ * Author: ++ * Jiang Xin ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "cpu.h" ++#include "sev.h" ++#include "csv.h" ++ ++bool csv_kvm_cpu_reset_inhibit; +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 47741a0a4f..ac4bb5bee1 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -46,6 +46,8 @@ static bool __attribute__((unused)) is_hygon_cpu(void) + + #define CSV_OUTGOING_PAGE_WINDOW_SIZE (4094 * TARGET_PAGE_SIZE) + ++extern bool csv_kvm_cpu_reset_inhibit; ++ + typedef struct CsvBatchCmdList CsvBatchCmdList; + typedef void (*CsvDestroyCmdNodeFn) (void *data); + +diff --git a/target/i386/kvm/csv-stub.c b/target/i386/kvm/csv-stub.c +new file mode 100644 +index 0000000000..4d1376f268 +--- /dev/null ++++ b/target/i386/kvm/csv-stub.c +@@ -0,0 +1,17 @@ ++/* ++ * QEMU CSV stub ++ * ++ * Copyright Hygon Info Technologies Ltd. 2024 ++ * ++ * Authors: ++ * Han Liyang ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "csv.h" ++ ++bool csv_kvm_cpu_reset_inhibit; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 9e65242739..2866a6d0ec 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -32,6 +32,7 @@ + #include "sysemu/runstate.h" + #include "kvm_i386.h" + #include "sev.h" ++#include "csv.h" + #include "xen-emu.h" + #include "hyperv.h" + #include "hyperv-proto.h" +@@ -5710,6 +5711,9 @@ bool kvm_has_waitpkg(void) + + bool kvm_arch_cpu_check_are_resettable(void) + { ++ if (is_hygon_cpu()) ++ return !csv_kvm_cpu_reset_inhibit; ++ + return !sev_es_enabled(); + } + +diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build +index 84d9143e60..3c3f8cf93c 100644 +--- a/target/i386/kvm/meson.build ++++ b/target/i386/kvm/meson.build +@@ -8,6 +8,7 @@ i386_kvm_ss.add(files( + i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) + + i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) ++i386_kvm_ss.add(when: 'CONFIG_CSV', if_false: files('csv-stub.c')) + + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) + +diff --git a/target/i386/meson.build b/target/i386/meson.build +index 7c74bfa859..594a0a6abf 100644 +--- a/target/i386/meson.build ++++ b/target/i386/meson.build +@@ -21,6 +21,7 @@ i386_system_ss.add(files( + 'cpu-sysemu.c', + )) + i386_system_ss.add(when: 'CONFIG_SEV', if_true: files('sev.c'), if_false: files('sev-sysemu-stub.c')) ++i386_system_ss.add(when: 'CONFIG_CSV', if_true: files('csv.c'), if_false: files('csv-sysemu-stub.c')) + + i386_user_ss = ss.source_set() + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7744378112..2c6aecd1a3 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1190,6 +1190,15 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; + } ++ } else { ++ /* ++ * The CSV2 guest is not resettable after migrated to target machine, ++ * set csv_kvm_cpu_reset_inhibit to true to indicate the CSV2 guest is ++ * not resettable. ++ */ ++ if (is_hygon_cpu() && sev_es_enabled()) { ++ csv_kvm_cpu_reset_inhibit = true; ++ } + } + + ram_block_notifier_add(&sev_ram_notifier); +-- +2.41.0.windows.1 + diff --git a/target-i386-kvm-Refine-VMX-controls-setting-for-back.patch b/target-i386-kvm-Refine-VMX-controls-setting-for-back.patch new file mode 100644 index 0000000000000000000000000000000000000000..47b7268724ed5105288bbacb8f6544a887c6b3a0 --- /dev/null +++ b/target-i386-kvm-Refine-VMX-controls-setting-for-back.patch @@ -0,0 +1,68 @@ +From ef76ea9ce99c8158a09f243a9a7f5c9819a2b97b Mon Sep 17 00:00:00 2001 +From: EwanHai +Date: Thu, 5 Jun 2025 03:44:26 -0400 +Subject: [PATCH] target/i386/kvm: Refine VMX controls setting for backward + compatibility + +Upstream qemu commit 4a910e1 ("target/i386: do not set unsupported VMX secondary +execution controls") implemented a workaround for hosts that have +specific CPUID features but do not support the corresponding VMX +controls, e.g., hosts support RDSEED but do not support RDSEED-Exiting. + +In detail, commit 4a910e1 introduced a flag `has_msr_vmx_procbased_clts2`. +If KVM has `MSR_IA32_VMX_PROCBASED_CTLS2` in its msr list, QEMU would +use KVM's settings, avoiding any modifications to this MSR. + +However, this commit (4a910e1) didn't account for cases in older Linux +kernels(4.17~5.2) where `MSR_IA32_VMX_PROCBASED_CTLS2` is in +`kvm_feature_msrs`-obtained by ioctl(KVM_GET_MSR_FEATURE_INDEX_LIST), +but not in `kvm_msr_list`-obtained by ioctl(KVM_GET_MSR_INDEX_LIST). +As a result,it did not set the `has_msr_vmx_procbased_clts2` flag based +on `kvm_msr_list` alone, even though KVM does maintain the value of +this MSR. + +This patch supplements the above logic, ensuring that +`has_msr_vmx_procbased_clts2` is correctly set by checking both MSR +lists, thus maintaining compatibility with older kernels. + +Signed-off-by: EwanHai +--- + target/i386/kvm/kvm.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 3a88e65635..2f379876e6 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2314,6 +2314,7 @@ void kvm_arch_do_init_vcpu(X86CPU *cpu) + static int kvm_get_supported_feature_msrs(KVMState *s) + { + int ret = 0; ++ int i; + + if (kvm_feature_msrs != NULL) { + return 0; +@@ -2348,6 +2349,20 @@ static int kvm_get_supported_feature_msrs(KVMState *s) + return ret; + } + ++ /* ++ * Compatibility fix: ++ * Older Linux kernels (4.17~5.2) report MSR_IA32_VMX_PROCBASED_CTLS2 ++ * in KVM_GET_MSR_FEATURE_INDEX_LIST but not in KVM_GET_MSR_INDEX_LIST. ++ * This leads to an issue in older kernel versions where QEMU, ++ * through the KVM_GET_MSR_INDEX_LIST check, assumes the kernel ++ * doesn't maintain MSR_IA32_VMX_PROCBASED_CTLS2, resulting in ++ * incorrect settings by QEMU for this MSR. ++ */ ++ for (i = 0; i < kvm_feature_msrs->nmsrs; i++) { ++ if (kvm_feature_msrs->indices[i] == MSR_IA32_VMX_PROCBASED_CTLS2) { ++ has_msr_vmx_procbased_ctls2 = true; ++ } ++ } + return 0; + } + +-- +2.33.0 + diff --git a/target-i386-kvm-Support-to-get-and-enable-extensions.patch b/target-i386-kvm-Support-to-get-and-enable-extensions.patch new file mode 100644 index 0000000000000000000000000000000000000000..0686e1d4d1ea64b0a7cabd91808ccfe2a0fbfdb9 --- /dev/null +++ b/target-i386-kvm-Support-to-get-and-enable-extensions.patch @@ -0,0 +1,105 @@ +From 9eb75830e70638d12efa0ec15a2f8b55e7c905da Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Sat, 28 Sep 2024 14:46:28 +0800 +Subject: [PATCH] target/i386: kvm: Support to get and enable extensions for + Hygon CoCo guest + +To enable advanced Hygon CoCo features, we should detect these features +during the initialization of VMs in the KVM accelerator. It is +suggested to enable these features if they are detected, allowing the +guest VM to run with additional functionalities. + +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 7 +++++++ + target/i386/csv.c | 2 ++ + target/i386/csv.h | 2 ++ + target/i386/kvm/csv-stub.c | 2 ++ + target/i386/kvm/kvm.c | 17 +++++++++++++++++ + 5 files changed, 30 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 05e499b45b..ab28e9af5e 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1204,6 +1204,13 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_TMM 300 + + #define KVM_CAP_SEV_ES_GHCB 500 ++#define KVM_CAP_HYGON_COCO_EXT 501 ++/* support userspace to request firmware to build CSV3 guest's memory space */ ++#define KVM_CAP_HYGON_COCO_EXT_CSV3_SET_PRIV_MEM (1 << 0) ++/* support request to update CSV3 guest's memory region multiple times */ ++#define KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA (1 << 1) ++/* support request to inject secret to CSV3 guest */ ++#define KVM_CAP_HYGON_COCO_EXT_CSV3_INJ_SECRET (1 << 2) + + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 571beeb61f..4aed225763 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -34,6 +34,8 @@ + #include "csv.h" + + bool csv_kvm_cpu_reset_inhibit; ++uint32_t kvm_hygon_coco_ext; ++uint32_t kvm_hygon_coco_ext_inuse; + + struct ConfidentialGuestMemoryEncryptionOps csv3_memory_encryption_ops = { + .save_setup = sev_save_setup, +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 8621f0b6fd..c1d4cec3e0 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -58,6 +58,8 @@ bool csv3_enabled(void); + #define CSV_OUTGOING_PAGE_WINDOW_SIZE (4094 * TARGET_PAGE_SIZE) + + extern bool csv_kvm_cpu_reset_inhibit; ++extern uint32_t kvm_hygon_coco_ext; ++extern uint32_t kvm_hygon_coco_ext_inuse; + + typedef struct CsvBatchCmdList CsvBatchCmdList; + typedef void (*CsvDestroyCmdNodeFn) (void *data); +diff --git a/target/i386/kvm/csv-stub.c b/target/i386/kvm/csv-stub.c +index 4d1376f268..8662d33206 100644 +--- a/target/i386/kvm/csv-stub.c ++++ b/target/i386/kvm/csv-stub.c +@@ -15,3 +15,5 @@ + #include "csv.h" + + bool csv_kvm_cpu_reset_inhibit; ++uint32_t kvm_hygon_coco_ext; ++uint32_t kvm_hygon_coco_ext_inuse; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 925f4f8040..12e920bbb4 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2639,6 +2639,23 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + } + } + ++ if (is_hygon_cpu()) { ++ /* check and enable Hygon coco extensions */ ++ kvm_hygon_coco_ext = (uint32_t)kvm_vm_check_extension(s, ++ KVM_CAP_HYGON_COCO_EXT); ++ if (kvm_hygon_coco_ext) { ++ ret = kvm_vm_enable_cap(s, KVM_CAP_HYGON_COCO_EXT, 0, ++ (uint64_t)kvm_hygon_coco_ext); ++ if (ret == -EINVAL) { ++ error_report("kvm: Failed to enable KVM_CAP_HYGON_COCO_EXT cap: %s", ++ strerror(-ret)); ++ kvm_hygon_coco_ext_inuse = 0; ++ } else { ++ kvm_hygon_coco_ext_inuse = (uint32_t)ret; ++ } ++ } ++ } ++ + ret = kvm_get_supported_msrs(s); + if (ret < 0) { + return ret; +-- +2.41.0.windows.1 + diff --git a/target-i386-kvm-initialize-feature-MSRs-very-early.patch b/target-i386-kvm-initialize-feature-MSRs-very-early.patch deleted file mode 100644 index 90b6f6fa4ba76fb4dde12567fcf3aee236236bde..0000000000000000000000000000000000000000 --- a/target-i386-kvm-initialize-feature-MSRs-very-early.patch +++ /dev/null @@ -1,178 +0,0 @@ -From c222711e37196e4be1776a084a1acb3c5a1f7283 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:11 +0000 -Subject: [PATCH] target/i386: kvm: initialize feature MSRs very early -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-2-pbonzini@redhat.com> -Patchwork-id: 93899 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Some read-only MSRs affect the behavior of ioctls such as -KVM_SET_NESTED_STATE. We can initialize them once and for all -right after the CPU is realized, since they will never be modified -by the guest. - -Reported-by: Qingua Cheng -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 81 +++++++++++++++++++++++++----------------- - target/i386/kvm_i386.h | 1 + - 2 files changed, 49 insertions(+), 33 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 7328746d92..60060087fd 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -63,6 +63,8 @@ - * 255 kvm_msr_entry structs */ - #define MSR_BUF_SIZE 4096 - -+static void kvm_init_msrs(X86CPU *cpu); -+ - const KVMCapabilityInfo kvm_arch_required_capabilities[] = { - KVM_CAP_INFO(SET_TSS_ADDR), - KVM_CAP_INFO(EXT_CPUID), -@@ -1777,6 +1779,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - has_msr_tsc_aux = false; - } - -+ kvm_init_msrs(cpu); -+ - r = hyperv_init_vcpu(cpu); - if (r) { - goto fail; -@@ -2592,11 +2596,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) - VMCS12_MAX_FIELD_INDEX << 1); - } - -+static int kvm_buf_set_msrs(X86CPU *cpu) -+{ -+ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if (ret < cpu->kvm_msr_buf->nmsrs) { -+ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -+ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -+ (uint32_t)e->index, (uint64_t)e->data); -+ } -+ -+ assert(ret == cpu->kvm_msr_buf->nmsrs); -+ return 0; -+} -+ -+static void kvm_init_msrs(X86CPU *cpu) -+{ -+ CPUX86State *env = &cpu->env; -+ -+ kvm_msr_buf_reset(cpu); -+ if (has_msr_arch_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -+ env->features[FEAT_ARCH_CAPABILITIES]); -+ } -+ -+ if (has_msr_core_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -+ env->features[FEAT_CORE_CAPABILITY]); -+ } -+ -+ /* -+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -+ * all kernels with MSR features should have them. -+ */ -+ if (kvm_feature_msrs && cpu_has_vmx(env)) { -+ kvm_msr_entry_add_vmx(cpu, env->features); -+ } -+ -+ assert(kvm_buf_set_msrs(cpu) == 0); -+} -+ - static int kvm_put_msrs(X86CPU *cpu, int level) - { - CPUX86State *env = &cpu->env; - int i; -- int ret; - - kvm_msr_buf_reset(cpu); - -@@ -2648,17 +2694,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - #endif - -- /* If host supports feature MSR, write down. */ -- if (has_msr_arch_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -- env->features[FEAT_ARCH_CAPABILITIES]); -- } -- -- if (has_msr_core_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -- env->features[FEAT_CORE_CAPABILITY]); -- } -- - /* - * The following MSRs have side effects on the guest or are too heavy - * for normal writeback. Limit them to reset or full state updates. -@@ -2831,14 +2866,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - - /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see - * kvm_put_msr_feature_control. */ -- -- /* -- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -- * all kernels with MSR features should have them. -- */ -- if (kvm_feature_msrs && cpu_has_vmx(env)) { -- kvm_msr_entry_add_vmx(cpu, env->features); -- } - } - - if (env->mcg_cap) { -@@ -2854,19 +2881,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - } - -- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -- if (ret < 0) { -- return ret; -- } -- -- if (ret < cpu->kvm_msr_buf->nmsrs) { -- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -- (uint32_t)e->index, (uint64_t)e->data); -- } -- -- assert(ret == cpu->kvm_msr_buf->nmsrs); -- return 0; -+ return kvm_buf_set_msrs(cpu); - } - - -diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h -index 06fe06bdb3..d98c6f69d0 100644 ---- a/target/i386/kvm_i386.h -+++ b/target/i386/kvm_i386.h -@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); - bool kvm_has_x2apic_api(void); - - bool kvm_hv_vpindex_settable(void); -+ - #endif --- -2.27.0 - diff --git a/target-i386-kvm-initialize-microcode-revision-from-K.patch b/target-i386-kvm-initialize-microcode-revision-from-K.patch deleted file mode 100644 index 5c15a47a5c025cc76e9f0fb2d9ade102a6cee294..0000000000000000000000000000000000000000 --- a/target-i386-kvm-initialize-microcode-revision-from-K.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 8664cd20e4cdb8594076a26dacef592a4b4816b2 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 20 Jan 2020 19:21:44 +0100 -Subject: [PATCH] target/i386: kvm: initialize microcode revision from KVM - -KVM can return the host microcode revision as a feature MSR. -Use it as the default value for -cpu host. - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini ---- - target/i386/cpu.c | 4 ++++ - target/i386/kvm.c | 5 +++++ - 2 files changed, 9 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index ec8bc9957e..1962f00c77 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6330,6 +6330,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - &cpu->mwait.ecx, &cpu->mwait.edx); - env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; - } -+ if (kvm_enabled() && cpu->ucode_rev == 0) { -+ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV); -+ } - } - - if (cpu->ucode_rev == 0) { -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 60060087fd..7437f86130 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -2628,6 +2628,11 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -+ if (kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV)) { -+ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); -+ } -+ - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. --- -2.27.0 - diff --git a/target-i386-mark-CR4.FRED-not-reserved.patch b/target-i386-mark-CR4.FRED-not-reserved.patch new file mode 100644 index 0000000000000000000000000000000000000000..055447413f2192f51d7c7ba51fbfed648ea4263b --- /dev/null +++ b/target-i386-mark-CR4.FRED-not-reserved.patch @@ -0,0 +1,67 @@ +From 1a2ee56c173984212ba7b9970aa36e307094d460 Mon Sep 17 00:00:00 2001 +From: Xin Li +Date: Wed, 8 Nov 2023 23:20:08 -0800 +Subject: [PATCH] target/i386: mark CR4.FRED not reserved + +commit f88ddc40c6d8b591a357108feec52cea13796d2d upstream. + +The CR4.FRED bit, i.e., CR4[32], is no longer a reserved bit when FRED +is exposed to guests, otherwise it is still a reserved bit. + +Intel-SIG: commit f88ddc40c6d8 target/i386: mark CR4.FRED not reserved + +Tested-by: Shan Kang +Signed-off-by: Xin Li +Reviewed-by: Zhao Liu +Message-ID: <20231109072012.8078-3-xin3.li@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.h | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index f392626f98..418daeab04 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -264,6 +264,18 @@ typedef enum X86Seg { + #define CR4_PKS_MASK (1U << 24) + #define CR4_LAM_SUP_MASK (1U << 28) + ++#ifdef TARGET_X86_64 ++#define CR4_FRED_MASK (1ULL << 32) ++#else ++#define CR4_FRED_MASK 0 ++#endif ++ ++#ifdef TARGET_X86_64 ++#define CR4_FRED_MASK (1ULL << 32) ++#else ++#define CR4_FRED_MASK 0 ++#endif ++ + #define CR4_RESERVED_MASK \ + (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ + | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \ +@@ -272,7 +284,7 @@ typedef enum X86Seg { + | CR4_LA57_MASK \ + | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \ + | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \ +- | CR4_LAM_SUP_MASK)) ++ | CR4_LAM_SUP_MASK | CR4_FRED_MASK)) + + #define DR6_BD (1 << 13) + #define DR6_BS (1 << 14) +@@ -2551,6 +2563,9 @@ static inline uint64_t cr4_reserved_bits(CPUX86State *env) + if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) { + reserved_bits |= CR4_LAM_SUP_MASK; + } ++ if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED)) { ++ reserved_bits |= CR4_FRED_MASK; ++ } + return reserved_bits; + } + +-- +2.41.0.windows.1 + diff --git a/target-i386-no-single-step-exception-after-MOV-or-PO.patch b/target-i386-no-single-step-exception-after-MOV-or-PO.patch new file mode 100644 index 0000000000000000000000000000000000000000..994472363319107b7c033b993655b8ac706ab263 --- /dev/null +++ b/target-i386-no-single-step-exception-after-MOV-or-PO.patch @@ -0,0 +1,34 @@ +From 004e0a984118380ff89ceaabb6ace1ebbfb1eb6d Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 11:08:13 +0800 +Subject: [PATCH] target/i386: no single-step exception after MOV or POP SS + +cherry picked from commitd f0f0136abba688a6516647a79cc91e03fad6d5d7 + +Intel SDM 18.3.1.4 "If an occurrence of the MOV or POP instruction +loads the SS register executes with EFLAGS.TF = 1, no single-step debug +exception occurs following the MOV or POP instruction." + +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Gao Jiazhen +--- + target/i386/tcg/translate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c +index 037bc47e7c..dc672d7995 100644 +--- a/target/i386/tcg/translate.c ++++ b/target/i386/tcg/translate.c +@@ -2790,7 +2790,7 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) + if (recheck_tf) { + gen_helper_rechecking_single_step(tcg_env); + tcg_gen_exit_tb(NULL, 0); +- } else if (s->flags & HF_TF_MASK) { ++ } else if ((s->flags & HF_TF_MASK) && !inhibit) { + gen_helper_single_step(tcg_env); + } else if (jr) { + tcg_gen_lookup_and_goto_ptr(); +-- +2.41.0.windows.1 + diff --git a/target-i386-pass-X86CPU-to-x86_cpu_get_supported_fea.patch b/target-i386-pass-X86CPU-to-x86_cpu_get_supported_fea.patch new file mode 100644 index 0000000000000000000000000000000000000000..a69bdefb882ebceb2d8a4199570611b2763846fb --- /dev/null +++ b/target-i386-pass-X86CPU-to-x86_cpu_get_supported_fea.patch @@ -0,0 +1,108 @@ +From bd6fec2cb2bb811aa73a2a6e6da45c76ecded49c Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 27 Jun 2024 01:12:42 +0200 +Subject: [PATCH] target/i386: pass X86CPU to + x86_cpu_get_supported_feature_word + +commit 8dee38483274bd0fcf3f74dea024d719b958200d upstream. + +This allows modifying the bits in "-cpu max"/"-cpu host" depending on +the guest CPU vendor (which, at least by default, is the host vendor in +the case of KVM). + +For example, machine check architecture differs between Intel and AMD, +and bits from AMD should be dropped when configuring the guest for +an Intel model. + +Intel-SIG: commit 8dee38483274 target/i386: pass X86CPU to x86_cpu_get_supported_feature_word + +Cc: Xiaoyao Li +Cc: John Allen +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 11 +++++------ + target/i386/cpu.h | 3 +-- + target/i386/kvm/kvm-cpu.c | 2 +- + 3 files changed, 7 insertions(+), 9 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f80570f4da..dfc0f7fd2d 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5959,8 +5959,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + + #endif /* !CONFIG_USER_ONLY */ + +-uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, +- bool migratable_only) ++uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w) + { + FeatureWordInfo *wi = &feature_word_info[w]; + uint64_t r = 0; +@@ -6002,7 +6001,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + r &= ~unavail; + } + #endif +- if (migratable_only) { ++ if (cpu && cpu->migratable) { + r &= x86_cpu_get_migratable_flags(w); + } + return r; +@@ -7324,7 +7323,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + * by the user. + */ + env->features[w] |= +- x86_cpu_get_supported_feature_word(w, cpu->migratable) & ++ x86_cpu_get_supported_feature_word(cpu, w) & + ~env->user_features[w] & + ~feature_word_info[w].no_autoenable_flags; + } +@@ -7450,7 +7449,7 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) + + for (w = 0; w < FEATURE_WORDS; w++) { + uint64_t host_feat = +- x86_cpu_get_supported_feature_word(w, false); ++ x86_cpu_get_supported_feature_word(NULL, w); + uint64_t requested_features = env->features[w]; + uint64_t unavailable_features = requested_features & ~host_feat; + mark_unavailable_features(cpu, w, unavailable_features, prefix); +@@ -7566,7 +7565,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_LBR_FMT; + if (requested_lbr_fmt && kvm_enabled()) { + uint64_t host_perf_cap = +- x86_cpu_get_supported_feature_word(FEAT_PERF_CAPABILITIES, false); ++ x86_cpu_get_supported_feature_word(NULL, FEAT_PERF_CAPABILITIES); + unsigned host_lbr_fmt = host_perf_cap & PERF_CAP_LBR_FMT; + + if (!cpu->enable_pmu) { +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index fb6721f182..b90182582f 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -655,8 +655,7 @@ typedef enum FeatureWord { + } FeatureWord; + + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; +-uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, +- bool migratable_only); ++uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); + + /* cpuid_features bits */ + #define CPUID_FP87 (1U << 0) +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index f76972e47e..a3bc8d8f83 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -137,7 +137,7 @@ static void kvm_cpu_xsave_init(void) + if (!esa->size) { + continue; + } +- if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits) ++ if ((x86_cpu_get_supported_feature_word(NULL, esa->feature) & esa->bits) + != esa->bits) { + continue; + } +-- +2.41.0.windows.1 + diff --git a/target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch b/target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch deleted file mode 100644 index 462768e0318844720a338f553914e6bfbcdb0c8c..0000000000000000000000000000000000000000 --- a/target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 3b172cd5a6e62be725c778b8397310462fe0a890 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 7 May 2020 22:09:23 +0100 -Subject: [PATCH] target/i386: set the CPUID level to 0x14 on old machine-type - -RH-Author: plai@redhat.com -Message-id: <20200507220923.13723-1-plai@redhat.com> -Patchwork-id: 96347 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH RESEND] target/i386: set the CPUID level to 0x14 on old machine-type -Bugzilla: 1513681 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Igor Mammedov -RH-Acked-by: Danilo de Paula - -From: Luwei Kang - -BZ https://bugzilla.redhat.com/show_bug.cgi?id=1513681 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28146304 -Branch: rhel-av-8.2.1 - -Tested on intel-icelake-y-01.ml3.eng.bos.redhat.com. - -The CPUID level need to be set to 0x14 manually on old -machine-type if Intel PT is enabled in guest. E.g. the -CPUID[0].EAX(level)=7 and CPUID[7].EBX[25](intel-pt)=1 when the -Qemu with "-machine pc-i440fx-3.1 -cpu qemu64,+intel-pt" parameter. - -Some Intel PT capabilities are exposed by leaf 0x14 and the -missing capabilities will cause some MSRs access failed. -This patch add a warning message to inform the user to extend -the CPUID level. - -Suggested-by: Eduardo Habkost -Signed-off-by: Luwei Kang -Message-Id: <1584031686-16444-1-git-send-email-luwei.kang@intel.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit ddc2fc9e4e42ebce48b088963dc7fbd1c08d5f33) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6147cd419a..35a33db39a 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6206,9 +6206,14 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - x86_cpu_adjust_feat_level(cpu, FEAT_XSAVE); - - /* Intel Processor Trace requires CPUID[0x14] */ -- if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && -- kvm_enabled() && cpu->intel_pt_auto_level) { -- x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); -+ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT)) { -+ if (cpu->intel_pt_auto_level) { -+ x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); -+ } else if (cpu->env.cpuid_min_level < 0x14) { -+ mark_unavailable_features(cpu, FEAT_7_0_EBX, -+ CPUID_7_0_EBX_INTEL_PT, -+ "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,level=0x14\""); -+ } - } - - /* CPU topology with multi-dies support requires CPUID[0x1F] */ --- -2.27.0 - diff --git a/target-i386-sev-Add-support-for-reuse-ASID-for-diffe.patch b/target-i386-sev-Add-support-for-reuse-ASID-for-diffe.patch new file mode 100644 index 0000000000000000000000000000000000000000..f029a89c98d1bd67a16aaa11442cc419b6bdd191 --- /dev/null +++ b/target-i386-sev-Add-support-for-reuse-ASID-for-diffe.patch @@ -0,0 +1,177 @@ +From 8f4f8a2071e69130f0b9327ce8f9b92a5ae42c8d Mon Sep 17 00:00:00 2001 +From: appleLin +Date: Wed, 3 Aug 2022 21:02:41 +0800 +Subject: [PATCH] target/i386: sev: Add support for reuse ASID for different + CSV guests + +In you want to reuse one ASID for many CSV guests, you should provide a +label (i.e. userid) and the length of the label when launch CSV guest. +The CSV guests which were provided the same userid will share the same +ASID. + +Signed-off-by: hanliyang +--- + linux-headers/linux/kvm.h | 5 +++++ + qapi/qom.json | 6 ++++- + qemu-options.hx | 5 ++++- + target/i386/csv.h | 2 ++ + target/i386/sev.c | 46 ++++++++++++++++++++++++++++++++++++++- + 5 files changed, 61 insertions(+), 3 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index eb30402c2d..8dc00808ec 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2103,6 +2103,11 @@ struct kvm_csv_command_batch { + __u64 csv_batch_list_uaddr; + }; + ++struct kvm_csv_init { ++ __u64 userid_addr; ++ __u32 len; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +diff --git a/qapi/qom.json b/qapi/qom.json +index 213edd8db2..8c7461a113 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -866,6 +866,9 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# @user-id: the user id of the guest owner, only support on Hygon CPUs ++# (since 8.2) ++# + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +@@ -876,7 +879,8 @@ + '*handle': 'uint32', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool' } } ++ '*kernel-hashes': 'bool', ++ '*user-id': 'str' } } + + ## + # @ThreadContextProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 42fd09e4de..9829b1020a 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5637,7 +5637,7 @@ SRST + -object secret,id=sec0,keyid=secmaster0,format=base64,\\ + data=$SECRET,iv=$(dh_cert_file = g_strdup(value); + } + ++static char * ++sev_guest_get_user_id(Object *obj, Error **errp) ++{ ++ SevGuestState *s = SEV_GUEST(obj); ++ ++ return g_strdup(s->user_id); ++} ++ ++static void ++sev_guest_set_user_id(Object *obj, const char *value, Error **errp) ++{ ++ SevGuestState *s = SEV_GUEST(obj); ++ ++ s->user_id = g_strdup(value); ++} ++ + static char * + sev_guest_get_sev_device(Object *obj, Error **errp) + { +@@ -426,6 +443,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); ++ object_class_property_add_str(oc, "user-id", ++ sev_guest_get_user_id, ++ sev_guest_set_user_id); ++ object_class_property_set_description(oc, "user-id", ++ "user id of the guest owner"); + } + + static void +@@ -1174,7 +1196,29 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ ++ /* Only support reuse asid for CSV/CSV2 guest */ ++ if (is_hygon_cpu() && ++ (sev_guest->policy & GUEST_POLICY_REUSE_ASID)) { ++ char *user_id = NULL; ++ struct kvm_csv_init *init_cmd_buf = NULL; ++ ++ user_id = object_property_get_str(OBJECT(sev), "user-id", NULL); ++ if (user_id && strlen(user_id)) { ++ init_cmd_buf = g_new0(struct kvm_csv_init, 1); ++ init_cmd_buf->len = strlen(user_id); ++ init_cmd_buf->userid_addr = (__u64)user_id; ++ } ++ ret = sev_ioctl(sev->sev_fd, cmd, init_cmd_buf, &fw_error); ++ ++ if (user_id) { ++ g_free(user_id); ++ g_free(init_cmd_buf); ++ } ++ } else { ++ ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ } ++ + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +-- +2.41.0.windows.1 + diff --git a/target-i386-sev-Clear-shared_regions_list-when-reboo.patch b/target-i386-sev-Clear-shared_regions_list-when-reboo.patch new file mode 100644 index 0000000000000000000000000000000000000000..aef897233ccbf5299e75269299369f7682ae3d4d --- /dev/null +++ b/target-i386-sev-Clear-shared_regions_list-when-reboo.patch @@ -0,0 +1,57 @@ +From e98147762cb47645c590ee000dbc12c654a6cc2d Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Sun, 16 Jan 2022 19:57:58 -0500 +Subject: [PATCH] target/i386: sev: Clear shared_regions_list when reboot CSV + Guest + +Also fix memory leak in sev_remove_shared_regions_list(). + +Signed-off-by: hanliyang +--- + target/i386/kvm/kvm.c | 5 +++++ + target/i386/sev.c | 5 +++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a5a755db01..5730d0e0c0 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2270,6 +2270,11 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) + env->mp_state = KVM_MP_STATE_RUNNABLE; + } + ++ if (cpu_is_bsp(cpu) && ++ sev_enabled() && has_map_gpa_range) { ++ sev_remove_shared_regions_list(0, -1); ++ } ++ + /* enabled by default */ + env->poll_control_msr = 1; + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 6ccb22c00a..0b0f589aee 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1694,9 +1694,9 @@ int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) + int sev_remove_shared_regions_list(unsigned long start, unsigned long end) + { + SevGuestState *s = sev_guest; +- struct shared_region *pos; ++ struct shared_region *pos, *next_pos; + +- QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { ++ QTAILQ_FOREACH_SAFE(pos, &s->shared_regions_list, list, next_pos) { + unsigned long l, r; + unsigned long curr_gfn_end = pos->gfn_end; + +@@ -1710,6 +1710,7 @@ int sev_remove_shared_regions_list(unsigned long start, unsigned long end) + if (l <= r) { + if (pos->gfn_start == l && pos->gfn_end == r) { + QTAILQ_REMOVE(&s->shared_regions_list, pos, list); ++ g_free(pos); + } else if (l == pos->gfn_start) { + pos->gfn_start = r; + } else if (r == pos->gfn_end) { +-- +2.41.0.windows.1 + diff --git a/target-i386-sev-Fix-incompatibility-between-SEV-and-.patch b/target-i386-sev-Fix-incompatibility-between-SEV-and-.patch new file mode 100644 index 0000000000000000000000000000000000000000..abc6d3af6a16b00de6872cd0798850ff8fc16966 --- /dev/null +++ b/target-i386-sev-Fix-incompatibility-between-SEV-and-.patch @@ -0,0 +1,36 @@ +From c357946fa7c1d45a09b40214b5113f689bf7bbd0 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Fri, 1 Mar 2024 14:12:44 +0800 +Subject: [PATCH] target/i386: sev: Fix incompatibility between SEV and CSV on + the GET_ID API + +If the length of GET_ID request is too small, Hygon CSV will return +SEV_RET_INVALID_PARAM. This return code doesn't comply with SEV API +Spec. + +Hygon will consider to fix the compitibility issue of return value +of the GET_ID API, so also check whether the return value is +SEV_RET_INVALID_LEN on Hygon CPUs. + +Signed-off-by: hanliyang +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 2c6aecd1a3..04888bc3a8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -589,7 +589,8 @@ static int sev_get_cpu0_id(int fd, guchar **id, size_t *id_len, Error **errp) + + /* query the ID length */ + r = sev_platform_ioctl(fd, SEV_GET_ID2, &get_id2, &err); +- if (r < 0 && err != SEV_RET_INVALID_LEN) { ++ if (r < 0 && err != SEV_RET_INVALID_LEN && ++ !(is_hygon_cpu() && err == SEV_RET_INVALID_PARAM)) { + error_setg(errp, "SEV: Failed to get ID ret=%d fw_err=%d (%s)", + r, err, fw_error_to_str(err)); + return 1; +-- +2.41.0.windows.1 + diff --git a/target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch b/target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch new file mode 100644 index 0000000000000000000000000000000000000000..c3cffaa94a06016242e9cd7e2147676da7622321 --- /dev/null +++ b/target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch @@ -0,0 +1,63 @@ +From 5a4e9ad98edc1ba5c1e93f0e24753c1a8355ffce Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Wed, 13 Mar 2024 13:49:37 +0800 +Subject: [PATCH] target/i386/sev: Fix missing ERRP_GUARD() for error_prepend() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from f55cceac8c03e639711490f08996c32861591435 +As the comment in qapi/error, passing @errp to error_prepend() requires ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +... +* - It should not be passed to error_prepend(), error_vprepend() or +* error_append_hint(), because that doesn't work with &error_fatal. +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. + +ERRP_GUARD() could avoid the case when @errp is the pointer of +error_fatal, the user can't see this additional information, because +exit() happens in error_setg earlier than information is added [1]. + +The sev_inject_launch_secret() passes @errp to error_prepend(), and as +an APIs defined in target/i386/sev.h, it is necessary to protect its +@errp with ERRP_GUARD(). + +To avoid the issue like [1] said, add missing ERRP_GUARD() at the +beginning of this function. + +[1]: Issue description in the commit message of commit ae7c80a7bd73 + ("error: New macro ERRP_GUARD()"). + +Cc: Paolo Bonzini +Cc: Marcelo Tosatti +Signed-off-by: Zhao Liu +Reviewed-by: Thomas Huth +Message-ID: <20240229143914.1977550-17-zhao1.liu@linux.intel.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: dinglimin +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 9a71246682..1a9d1db7a8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1044,6 +1044,7 @@ sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + uint64_t gpa, Error **errp) + { ++ ERRP_GUARD(); + struct kvm_sev_launch_secret input; + g_autofree guchar *data = NULL, *hdr = NULL; + int error, ret = 1; +-- +2.27.0 + diff --git a/target-i386-sev-Return-0-if-sev_send_get_packet_len-.patch b/target-i386-sev-Return-0-if-sev_send_get_packet_len-.patch new file mode 100644 index 0000000000000000000000000000000000000000..ad0f2796dfe73b930a4890facb59d7d0738f76fd --- /dev/null +++ b/target-i386-sev-Return-0-if-sev_send_get_packet_len-.patch @@ -0,0 +1,59 @@ +From ccca5618025567c4168630459b90bf11bf96cca4 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Wed, 31 Jan 2024 07:26:57 +0800 +Subject: [PATCH] target/i386: sev: Return 0 if sev_send_get_packet_len() fails + +The send_packet_hdr_len of struct SEVState is of type size_t +which is an unsigned class type. If the send_packet_hdr_len +is assigned as -1, then it will be a huge number and the QEMU +process will crash when allocating packet buffer with the +huge size. + +For example, the following code could cause crash described +above. + + ``` + static int + sev_send_update_data(SEVState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, + uint64_t *bytes_sent) + { + + ...... + + if (!s->send_packet_hdr) { + s->send_packet_hdr_len = sev_send_get_packet_len(&fw_error); + if (s->send_packet_hdr_len < 1) { + error_report("%s: SEND_UPDATE fw_error=%d '%s'", + __func__, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + s->send_packet_hdr = g_new(gchar, s->send_packet_hdr_len); + } + + ...... + + } + ``` + +Signed-off-by: hanliyang +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 98b0d3937a..6ccb22c00a 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1492,7 +1492,7 @@ sev_send_get_packet_len(int *fw_err) + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_DATA, + &update, fw_err); + if (*fw_err != SEV_RET_INVALID_LEN) { +- ret = -1; ++ ret = 0; + error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", + __func__, ret, *fw_err, fw_error_to_str(*fw_err)); + goto err; +-- +2.41.0.windows.1 + diff --git a/target-i386-sev-add-support-to-encrypt-the-outgoing-.patch b/target-i386-sev-add-support-to-encrypt-the-outgoing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..44d5dcfb42fce2386fdbc4c7bacf8f1dd8814b8c --- /dev/null +++ b/target-i386-sev-add-support-to-encrypt-the-outgoing-.patch @@ -0,0 +1,319 @@ +From 0a7dde8450d9b6a6d0c75cef11e4bbff65e95edc Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 12:55:25 +0000 +Subject: [PATCH] target/i386: sev: add support to encrypt the outgoing page + +cherry-picked from https://github.com/AMDESE/qemu/commit/5187c6f86bd. + +The sev_save_outgoing_page() provide the implementation to encrypt the +guest private pages during the transit. The routines uses the SEND_START +command to create the outgoing encryption context on the first call then +uses the SEND_UPDATE_DATA command to encrypt the data before writing it +to the socket. While encrypting the data SEND_UPDATE_DATA produces some +metadata (e.g MAC, IV). The metadata is also sent to the target machine. +After migration is completed, we issue the SEND_FINISH command to transition +the SEV guest state from sending to unrunnable state. + +Signed-off-by: Brijesh Singh +Co-developed-by: Ashish Kalra +Signed-off-by: Ashish Kalra +[ Fix conflict. ] +Signed-off-by: hanliyang +--- + target/i386/sev.c | 219 +++++++++++++++++++++++++++++++++++++++ + target/i386/sev.h | 2 + + target/i386/trace-events | 3 + + 3 files changed, 224 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 65984f013a..e1fa0ec5e5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -31,6 +31,8 @@ + #include "sysemu/runstate.h" + #include "trace.h" + #include "migration/blocker.h" ++#include "migration/qemu-file.h" ++#include "migration/misc.h" + #include "qom/object.h" + #include "monitor/monitor.h" + #include "monitor/hmp-target.h" +@@ -79,6 +81,8 @@ struct SevGuestState { + size_t remote_plat_cert_len; + guchar *amd_cert; + size_t amd_cert_len; ++ gchar *send_packet_hdr; ++ size_t send_packet_hdr_len; + + uint32_t reset_cs; + uint32_t reset_ip; +@@ -167,6 +171,7 @@ static const char *const sev_fw_errlist[] = { + + static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_setup = sev_save_setup, ++ .save_outgoing_page = sev_save_outgoing_page, + }; + + static int +@@ -960,6 +965,38 @@ error: + return 1; + } + ++static void ++sev_send_finish(void) ++{ ++ int ret, error; ++ ++ trace_kvm_sev_send_finish(); ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_FINISH, 0, &error); ++ if (ret) { ++ error_report("%s: SEND_FINISH ret=%d fw_error=%d '%s'", ++ __func__, ret, error, fw_error_to_str(error)); ++ } ++ ++ g_free(sev_guest->send_packet_hdr); ++ sev_set_guest_state(sev_guest, SEV_STATE_RUNNING); ++} ++ ++static void ++sev_migration_state_notifier(Notifier *notifier, void *data) ++{ ++ MigrationState *s = data; ++ ++ if (migration_has_finished(s) || ++ migration_in_postcopy_after_devices(s) || ++ migration_has_failed(s)) { ++ if (sev_check_state(sev_guest, SEV_STATE_SEND_UPDATE)) { ++ sev_send_finish(); ++ } ++ } ++} ++ ++static Notifier sev_migration_state; ++ + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevGuestState *sev +@@ -1075,6 +1112,7 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + ram_block_notifier_add(&sev_ram_notifier); + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev); ++ migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); + + cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; + +@@ -1317,6 +1355,187 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + return 0; + } + ++static int ++sev_get_send_session_length(void) ++{ ++ int ret, fw_err = 0; ++ struct kvm_sev_send_start start = {}; ++ ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_START, &start, &fw_err); ++ if (fw_err != SEV_RET_INVALID_LEN) { ++ ret = -1; ++ error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_err, fw_error_to_str(fw_err)); ++ goto err; ++ } ++ ++ ret = start.session_len; ++err: ++ return ret; ++} ++ ++static int ++sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) ++{ ++ gsize pdh_len = 0, plat_cert_len; ++ int session_len, ret, fw_error; ++ struct kvm_sev_send_start start = { }; ++ guchar *pdh = NULL, *plat_cert = NULL, *session = NULL; ++ Error *local_err = NULL; ++ ++ if (!s->remote_pdh || !s->remote_plat_cert || !s->amd_cert_len) { ++ error_report("%s: missing remote PDH or PLAT_CERT", __func__); ++ return 1; ++ } ++ ++ start.pdh_cert_uaddr = (uintptr_t) s->remote_pdh; ++ start.pdh_cert_len = s->remote_pdh_len; ++ ++ start.plat_certs_uaddr = (uintptr_t)s->remote_plat_cert; ++ start.plat_certs_len = s->remote_plat_cert_len; ++ ++ start.amd_certs_uaddr = (uintptr_t)s->amd_cert; ++ start.amd_certs_len = s->amd_cert_len; ++ ++ /* get the session length */ ++ session_len = sev_get_send_session_length(); ++ if (session_len < 0) { ++ ret = 1; ++ goto err; ++ } ++ ++ session = g_new0(guchar, session_len); ++ start.session_uaddr = (unsigned long)session; ++ start.session_len = session_len; ++ ++ /* Get our PDH certificate */ ++ ret = sev_get_pdh_info(s->sev_fd, &pdh, &pdh_len, ++ &plat_cert, &plat_cert_len, &local_err); ++ if (ret) { ++ error_report("Failed to get our PDH cert"); ++ goto err; ++ } ++ ++ trace_kvm_sev_send_start(start.pdh_cert_uaddr, start.pdh_cert_len, ++ start.plat_certs_uaddr, start.plat_certs_len, ++ start.amd_certs_uaddr, start.amd_certs_len); ++ ++ ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_START, &start, &fw_error); ++ if (ret < 0) { ++ error_report("%s: SEND_START ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++ qemu_put_be32(f, start.policy); ++ qemu_put_be32(f, pdh_len); ++ qemu_put_buffer(f, (uint8_t *)pdh, pdh_len); ++ qemu_put_be32(f, start.session_len); ++ qemu_put_buffer(f, (uint8_t *)start.session_uaddr, start.session_len); ++ *bytes_sent = 12 + pdh_len + start.session_len; ++ ++ sev_set_guest_state(s, SEV_STATE_SEND_UPDATE); ++ ++err: ++ g_free(pdh); ++ g_free(plat_cert); ++ return ret; ++} ++ ++static int ++sev_send_get_packet_len(int *fw_err) ++{ ++ int ret; ++ struct kvm_sev_send_update_data update = { 0, }; ++ ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_DATA, ++ &update, fw_err); ++ if (*fw_err != SEV_RET_INVALID_LEN) { ++ ret = -1; ++ error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", ++ __func__, ret, *fw_err, fw_error_to_str(*fw_err)); ++ goto err; ++ } ++ ++ ret = update.hdr_len; ++ ++err: ++ return ret; ++} ++ ++static int ++sev_send_update_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, ++ uint64_t *bytes_sent) ++{ ++ int ret, fw_error; ++ guchar *trans; ++ struct kvm_sev_send_update_data update = { }; ++ ++ /* ++ * If this is first call then query the packet header bytes and allocate ++ * the packet buffer. ++ */ ++ if (!s->send_packet_hdr) { ++ s->send_packet_hdr_len = sev_send_get_packet_len(&fw_error); ++ if (s->send_packet_hdr_len < 1) { ++ error_report("%s: SEND_UPDATE fw_error=%d '%s'", ++ __func__, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ s->send_packet_hdr = g_new(gchar, s->send_packet_hdr_len); ++ } ++ ++ /* allocate transport buffer */ ++ trans = g_new(guchar, size); ++ ++ update.hdr_uaddr = (uintptr_t)s->send_packet_hdr; ++ update.hdr_len = s->send_packet_hdr_len; ++ update.guest_uaddr = (uintptr_t)ptr; ++ update.guest_len = size; ++ update.trans_uaddr = (uintptr_t)trans; ++ update.trans_len = size; ++ ++ trace_kvm_sev_send_update_data(ptr, trans, size); ++ ++ ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, &fw_error); ++ if (ret) { ++ error_report("%s: SEND_UPDATE_DATA ret=%d fw_error=%d '%s'", ++ __func__, ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++ qemu_put_be32(f, update.hdr_len); ++ qemu_put_buffer(f, (uint8_t *)update.hdr_uaddr, update.hdr_len); ++ *bytes_sent = 4 + update.hdr_len; ++ ++ qemu_put_be32(f, update.trans_len); ++ qemu_put_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ *bytes_sent += (4 + update.trans_len); ++ ++err: ++ g_free(trans); ++ return ret; ++} ++ ++int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, ++ uint32_t sz, uint64_t *bytes_sent) ++{ ++ SevGuestState *s = sev_guest; ++ ++ /* ++ * If this is a first buffer then create outgoing encryption context ++ * and write our PDH, policy and session data. ++ */ ++ if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && ++ sev_send_start(s, f, bytes_sent)) { ++ error_report("Failed to create outgoing context"); ++ return 1; ++ } ++ ++ return sev_send_update_data(s, f, ptr, sz, bytes_sent); ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +diff --git a/target/i386/sev.h b/target/i386/sev.h +index e96de021f5..463e94bb81 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -53,6 +53,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); + int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); + int sev_save_setup(const char *pdh, const char *plat_cert, + const char *amd_cert); ++int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, ++ uint32_t size, uint64_t *bytes_sent); + int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 2cd8726eeb..e8d4aec125 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -11,3 +11,6 @@ kvm_sev_launch_measurement(const char *value) "data %s" + kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" ++kvm_sev_send_start(uint64_t pdh, int l1, uint64_t plat, int l2, uint64_t amd, int l3) "pdh 0x%" PRIx64 " len %d plat 0x%" PRIx64 " len %d amd 0x%" PRIx64 " len %d" ++kvm_sev_send_update_data(void *src, void *dst, int len) "guest %p trans %p len %d" ++kvm_sev_send_finish(void) "" +-- +2.41.0.windows.1 + diff --git a/target-i386-sev-add-support-to-load-incoming-encrypt.patch b/target-i386-sev-add-support-to-load-incoming-encrypt.patch new file mode 100644 index 0000000000000000000000000000000000000000..4407508001055b7469e45e32c7a1f8139016562d --- /dev/null +++ b/target-i386-sev-add-support-to-load-incoming-encrypt.patch @@ -0,0 +1,221 @@ +From 778457c2f0f91b6a52e5db02dd3dc1f35ae64526 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 13:00:50 +0000 +Subject: [PATCH] target/i386: sev: add support to load incoming encrypted page + +cherry-picked from https://github.com/AMDESE/qemu/commit/e86e5dccb045. + +The sev_load_incoming_page() provide the implementation to read the +incoming guest private pages from the socket and load it into the guest +memory. The routines uses the RECEIVE_START command to create the +incoming encryption context on the first call then uses the +RECEIEVE_UPDATE_DATA command to load the encrypted pages into the guest +memory. After migration is completed, we issue the RECEIVE_FINISH command +to transition the SEV guest to the runnable state so that it can be +executed. + +Signed-off-by: Brijesh Singh +Co-developed-by: Ashish Kalra +Signed-off-by: Ashish Kalra +[ Fix conflicts. ] +Signed-off-by: hanliyang +--- + target/i386/sev.c | 137 ++++++++++++++++++++++++++++++++++++++- + target/i386/sev.h | 1 + + target/i386/trace-events | 3 + + 3 files changed, 140 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e1fa0ec5e5..de1a4b271e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -172,6 +172,7 @@ static const char *const sev_fw_errlist[] = { + static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { + .save_setup = sev_save_setup, + .save_outgoing_page = sev_save_outgoing_page, ++ .load_incoming_page = sev_load_incoming_page, + }; + + static int +@@ -911,13 +912,33 @@ sev_launch_finish(SevGuestState *sev) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static int ++sev_receive_finish(SevGuestState *s) ++{ ++ int error, ret = 1; ++ ++ trace_kvm_sev_receive_finish(); ++ ret = sev_ioctl(s->sev_fd, KVM_SEV_RECEIVE_FINISH, 0, &error); ++ if (ret) { ++ error_report("%s: RECEIVE_FINISH ret=%d fw_error=%d '%s'", ++ __func__, ret, error, fw_error_to_str(error)); ++ goto err; ++ } ++ ++ sev_set_guest_state(s, SEV_STATE_RUNNING); ++err: ++ return ret; ++} ++ + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { + SevGuestState *sev = opaque; + + if (running) { +- if (!sev_check_state(sev, SEV_STATE_RUNNING)) { ++ if (sev_check_state(sev, SEV_STATE_RECEIVE_UPDATE)) { ++ sev_receive_finish(sev); ++ } else if (!sev_check_state(sev, SEV_STATE_RUNNING)) { + sev_launch_finish(sev); + } + } +@@ -1536,6 +1557,120 @@ int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, + return sev_send_update_data(s, f, ptr, sz, bytes_sent); + } + ++static int ++sev_receive_start(SevGuestState *sev, QEMUFile *f) ++{ ++ int ret = 1; ++ int fw_error; ++ struct kvm_sev_receive_start start = { }; ++ gchar *session = NULL, *pdh_cert = NULL; ++ ++ /* get SEV guest handle */ ++ start.handle = object_property_get_int(OBJECT(sev), "handle", ++ &error_abort); ++ ++ /* get the source policy */ ++ start.policy = qemu_get_be32(f); ++ ++ /* get source PDH key */ ++ start.pdh_len = qemu_get_be32(f); ++ if (!check_blob_length(start.pdh_len)) { ++ return 1; ++ } ++ ++ pdh_cert = g_new(gchar, start.pdh_len); ++ qemu_get_buffer(f, (uint8_t *)pdh_cert, start.pdh_len); ++ start.pdh_uaddr = (uintptr_t)pdh_cert; ++ ++ /* get source session data */ ++ start.session_len = qemu_get_be32(f); ++ if (!check_blob_length(start.session_len)) { ++ return 1; ++ } ++ session = g_new(gchar, start.session_len); ++ qemu_get_buffer(f, (uint8_t *)session, start.session_len); ++ start.session_uaddr = (uintptr_t)session; ++ ++ trace_kvm_sev_receive_start(start.policy, session, pdh_cert); ++ ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_START, ++ &start, &fw_error); ++ if (ret < 0) { ++ error_report("Error RECEIVE_START ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++ ++ object_property_set_int(OBJECT(sev), "handle", start.handle, &error_abort); ++ sev_set_guest_state(sev, SEV_STATE_RECEIVE_UPDATE); ++err: ++ g_free(session); ++ g_free(pdh_cert); ++ ++ return ret; ++} ++ ++static int sev_receive_update_data(QEMUFile *f, uint8_t *ptr) ++{ ++ int ret = 1, fw_error = 0; ++ gchar *hdr = NULL, *trans = NULL; ++ struct kvm_sev_receive_update_data update = {}; ++ ++ /* get packet header */ ++ update.hdr_len = qemu_get_be32(f); ++ if (!check_blob_length(update.hdr_len)) { ++ return 1; ++ } ++ ++ hdr = g_new(gchar, update.hdr_len); ++ qemu_get_buffer(f, (uint8_t *)hdr, update.hdr_len); ++ update.hdr_uaddr = (uintptr_t)hdr; ++ ++ /* get transport buffer */ ++ update.trans_len = qemu_get_be32(f); ++ if (!check_blob_length(update.trans_len)) { ++ goto err; ++ } ++ ++ trans = g_new(gchar, update.trans_len); ++ update.trans_uaddr = (uintptr_t)trans; ++ qemu_get_buffer(f, (uint8_t *)update.trans_uaddr, update.trans_len); ++ ++ update.guest_uaddr = (uintptr_t) ptr; ++ update.guest_len = update.trans_len; ++ ++ trace_kvm_sev_receive_update_data(trans, ptr, update.guest_len, ++ hdr, update.hdr_len); ++ ++ ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_DATA, ++ &update, &fw_error); ++ if (ret) { ++ error_report("Error RECEIVE_UPDATE_DATA ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ goto err; ++ } ++err: ++ g_free(trans); ++ g_free(hdr); ++ return ret; ++} ++ ++int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) ++{ ++ SevGuestState *s = sev_guest; ++ ++ /* ++ * If this is first buffer and SEV is not in recieiving state then ++ * use RECEIVE_START command to create a encryption context. ++ */ ++ if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && ++ sev_receive_start(s, f)) { ++ return 1; ++ } ++ ++ return sev_receive_update_data(f, ptr); ++} ++ + static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 463e94bb81..d94da2956b 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -55,6 +55,7 @@ int sev_save_setup(const char *pdh, const char *plat_cert, + const char *amd_cert); + int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, + uint32_t size, uint64_t *bytes_sent); ++int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr); + int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +diff --git a/target/i386/trace-events b/target/i386/trace-events +index e8d4aec125..475de65ad4 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -14,3 +14,6 @@ kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data + kvm_sev_send_start(uint64_t pdh, int l1, uint64_t plat, int l2, uint64_t amd, int l3) "pdh 0x%" PRIx64 " len %d plat 0x%" PRIx64 " len %d amd 0x%" PRIx64 " len %d" + kvm_sev_send_update_data(void *src, void *dst, int len) "guest %p trans %p len %d" + kvm_sev_send_finish(void) "" ++kvm_sev_receive_start(int policy, void *session, void *pdh) "policy 0x%x session %p pdh %p" ++kvm_sev_receive_update_data(void *src, void *dst, int len, void *hdr, int hdr_len) "guest %p trans %p len %d hdr %p hdr_len %d" ++kvm_sev_receive_finish(void) "" +-- +2.41.0.windows.1 + diff --git a/target-i386-sev-do-not-create-launch-context-for-an-.patch b/target-i386-sev-do-not-create-launch-context-for-an-.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e52e4634abeced5b869b4a27b53416d5cb1f296 --- /dev/null +++ b/target-i386-sev-do-not-create-launch-context-for-an-.patch @@ -0,0 +1,49 @@ +From c8a6d5f18c45079575b707db8f017cce22acc970 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 12:16:09 +0000 +Subject: [PATCH] target/i386: sev: do not create launch context for an + incoming guest + +cherry-picked from https://github.com/AMDESE/qemu/commit/b85694233495. + +The LAUNCH_START is used for creating an encryption context to encrypt +newly created guest, for an incoming guest the RECEIVE_START should be +used. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Brijesh Singh +Signed-off-by: Ashish Kalra +[ Fix conflict. ] +Signed-off-by: hanliyang +--- + target/i386/sev.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 10233511cf..65984f013a 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1060,10 +1060,16 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- ret = sev_launch_start(sev); +- if (ret) { +- error_setg(errp, "%s: failed to create encryption context", __func__); +- goto err; ++ /* ++ * The LAUNCH context is used for new guest, if its an incoming guest ++ * then RECEIVE context will be created after the connection is established. ++ */ ++ if (!runstate_check(RUN_STATE_INMIGRATE)) { ++ ret = sev_launch_start(sev); ++ if (ret) { ++ error_setg(errp, "%s: failed to create encryption context", __func__); ++ goto err; ++ } + } + + ram_block_notifier_add(&sev_ram_notifier); +-- +2.41.0.windows.1 + diff --git a/target-i386-sev-provide-callback-to-setup-outgoing-c.patch b/target-i386-sev-provide-callback-to-setup-outgoing-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..76cf1bd7a10a973a7aba7250011c92964c2bc230 --- /dev/null +++ b/target-i386-sev-provide-callback-to-setup-outgoing-c.patch @@ -0,0 +1,135 @@ +From f6753191237118294d04193908db503bb87619f7 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Tue, 27 Jul 2021 12:10:23 +0000 +Subject: [PATCH] target/i386: sev: provide callback to setup outgoing context + +cherry-picked from https://github.com/AMDESE/qemu/commit/7521883afc0. + +The user provides the target machine's Platform Diffie-Hellman key (PDH) +and certificate chain before starting the SEV guest migration. Cache the +certificate chain as we need them while creating the outgoing context. + +Signed-off-by: Brijesh Singh +Co-developed-by: Ashish Kalra +Signed-off-by: Ashish Kalra +[ Fix conflict. ] +Signed-off-by: hanliyang +--- + target/i386/sev.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ + target/i386/sev.h | 2 ++ + 2 files changed, 61 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 1a9d1db7a8..10233511cf 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -73,6 +73,12 @@ struct SevGuestState { + int sev_fd; + SevState state; + gchar *measurement; ++ guchar *remote_pdh; ++ size_t remote_pdh_len; ++ guchar *remote_plat_cert; ++ size_t remote_plat_cert_len; ++ guchar *amd_cert; ++ size_t amd_cert_len; + + uint32_t reset_cs; + uint32_t reset_ip; +@@ -157,6 +163,12 @@ static const char *const sev_fw_errlist[] = { + + #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) + ++#define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */ ++ ++static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { ++ .save_setup = sev_save_setup, ++}; ++ + static int + sev_ioctl(int fd, int cmd, void *data, int *error) + { +@@ -906,6 +918,48 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++static inline bool check_blob_length(size_t value) ++{ ++ if (value > SEV_FW_BLOB_MAX_SIZE) { ++ error_report("invalid length max=%d got=%ld", ++ SEV_FW_BLOB_MAX_SIZE, value); ++ return false; ++ } ++ ++ return true; ++} ++ ++int sev_save_setup(const char *pdh, const char *plat_cert, ++ const char *amd_cert) ++{ ++ SevGuestState *s = sev_guest; ++ ++ s->remote_pdh = g_base64_decode(pdh, &s->remote_pdh_len); ++ if (!check_blob_length(s->remote_pdh_len)) { ++ goto error; ++ } ++ ++ s->remote_plat_cert = g_base64_decode(plat_cert, ++ &s->remote_plat_cert_len); ++ if (!check_blob_length(s->remote_plat_cert_len)) { ++ goto error; ++ } ++ ++ s->amd_cert = g_base64_decode(amd_cert, &s->amd_cert_len); ++ if (!check_blob_length(s->amd_cert_len)) { ++ goto error; ++ } ++ ++ return 0; ++ ++error: ++ g_free(s->remote_pdh); ++ g_free(s->remote_plat_cert); ++ g_free(s->amd_cert); ++ ++ return 1; ++} ++ + int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevGuestState *sev +@@ -920,6 +974,9 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return 0; + } + ++ ConfidentialGuestSupportClass *cgs_class = ++ (ConfidentialGuestSupportClass *) object_get_class(OBJECT(cgs)); ++ + ret = ram_block_discard_disable(true); + if (ret) { + error_report("%s: cannot disable RAM discard", __func__); +@@ -1013,6 +1070,8 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev); + ++ cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; ++ + cgs->ready = true; + + return 0; +diff --git a/target/i386/sev.h b/target/i386/sev.h +index e7499c95b1..e96de021f5 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -51,6 +51,8 @@ uint32_t sev_get_reduced_phys_bits(void); + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); + + int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); ++int sev_save_setup(const char *pdh, const char *plat_cert, ++ const char *amd_cert); + int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +-- +2.41.0.windows.1 + diff --git a/target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch b/target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch deleted file mode 100644 index b4156952cf7fca7359fb38c8db086837b8ba3651..0000000000000000000000000000000000000000 --- a/target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 70e4d278b89e04d7f9397ea25163feb6a7dbaa2d Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 2 Jul 2019 14:58:48 +0200 -Subject: [PATCH] target/i386: work around KVM_GET_MSRS bug for secondary - execution controls - -Some secondary controls are automatically enabled/disabled based on the CPUID -values that are set for the guest. However, they are still available at a -global level and therefore should be present when KVM_GET_MSRS is sent to -/dev/kvm. - -Unfortunately KVM forgot to include those, so fix that. - -Signed-off-by: Paolo Bonzini ---- - target/i386/kvm.c | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index fafb9fb26d..b97f40df6b 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -474,6 +474,23 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - value = msr_data.entries[0].data; - switch (index) { - case MSR_IA32_VMX_PROCBASED_CTLS2: -+ /* KVM forgot to add these bits for some time, do this ourselves. */ -+ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ } -+ /* fall through */ - case MSR_IA32_VMX_TRUE_PINBASED_CTLS: - case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: - case MSR_IA32_VMX_TRUE_ENTRY_CTLS: --- -2.27.0 - diff --git a/target-loongarch-Add-TCG-macro-in-structure-CPUArchS.patch b/target-loongarch-Add-TCG-macro-in-structure-CPUArchS.patch new file mode 100644 index 0000000000000000000000000000000000000000..4988ba9ad37b71c226dadf1a5d833e8526dd12c1 --- /dev/null +++ b/target-loongarch-Add-TCG-macro-in-structure-CPUArchS.patch @@ -0,0 +1,235 @@ +From 033e2a67885cf7347473e09454a6704074e05878 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 6 May 2024 09:19:12 +0800 +Subject: [PATCH 42/78] target/loongarch: Add TCG macro in structure + CPUArchState + +In structure CPUArchState some struct elements are only used in TCG +mode, and it is not used in KVM mode. Macro CONFIG_TCG is added to +make it simpiler in KVM mode, also there is the same modification +in c code when these structure elements are used. + +When VM runs in KVM mode, TLB entries are not used and do not need +migrate. It is only useful when it runs in TCG mode. + +Signed-off-by: Bibo Mao +Reviewed-by: Richard Henderson +Message-Id: <20240506011912.2108842-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 7 +++-- + target/loongarch/cpu.h | 16 +++++++---- + target/loongarch/cpu_helper.c | 9 ++++++ + target/loongarch/machine.c | 52 ++++++++++++++++++++++++----------- + 4 files changed, 60 insertions(+), 24 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index f7b5dae7ed..220d40fb01 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -536,7 +536,9 @@ static void loongarch_cpu_reset_hold(Object *obj) + lacc->parent_phases.hold(obj); + } + ++#ifdef CONFIG_TCG + env->fcsr0_mask = FCSR0_M1 | FCSR0_M2 | FCSR0_M3; ++#endif + env->fcsr0 = 0x0; + + int n; +@@ -581,7 +583,9 @@ static void loongarch_cpu_reset_hold(Object *obj) + + #ifndef CONFIG_USER_ONLY + env->pc = 0x1c000000; ++#ifdef CONFIG_TCG + memset(env->tlb, 0, sizeof(env->tlb)); ++#endif + if (kvm_enabled()) { + kvm_arch_reset_vcpu(env); + } +@@ -778,8 +782,7 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) + int i; + + qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc); +- qemu_fprintf(f, " FCSR0 0x%08x fp_status 0x%02x\n", env->fcsr0, +- get_float_exception_flags(&env->fp_status)); ++ qemu_fprintf(f, " FCSR0 0x%08x\n", env->fcsr0); + + /* gpr */ + for (i = 0; i < 32; i++) { +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index e3a15c593f..19bcad28de 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -275,6 +275,7 @@ union fpr_t { + VReg vreg; + }; + ++#ifdef CONFIG_TCG + struct LoongArchTLB { + uint64_t tlb_misc; + /* Fields corresponding to CSR_TLBELO0/1 */ +@@ -282,23 +283,18 @@ struct LoongArchTLB { + uint64_t tlb_entry1; + }; + typedef struct LoongArchTLB LoongArchTLB; ++#endif + + typedef struct CPUArchState { + uint64_t gpr[32]; + uint64_t pc; + + fpr_t fpr[32]; +- float_status fp_status; + bool cf[8]; +- + uint32_t fcsr0; +- uint32_t fcsr0_mask; + + uint32_t cpucfg[21]; + +- uint64_t lladdr; /* LL virtual address compared against SC */ +- uint64_t llval; +- + /* LoongArch CSRs */ + uint64_t CSR_CRMD; + uint64_t CSR_PRMD; +@@ -355,8 +351,16 @@ typedef struct CPUArchState { + uint64_t CSR_DERA; + uint64_t CSR_DSAVE; + ++#ifdef CONFIG_TCG ++ float_status fp_status; ++ uint32_t fcsr0_mask; ++ uint64_t lladdr; /* LL virtual address compared against SC */ ++ uint64_t llval; ++#endif + #ifndef CONFIG_USER_ONLY ++#ifdef CONFIG_TCG + LoongArchTLB tlb[LOONGARCH_TLB_MAX]; ++#endif + + AddressSpace *address_space_iocsr; + bool load_elf; +diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c +index f68d63f466..39037eecb4 100644 +--- a/target/loongarch/cpu_helper.c ++++ b/target/loongarch/cpu_helper.c +@@ -11,6 +11,7 @@ + #include "internals.h" + #include "cpu-csr.h" + ++#ifdef CONFIG_TCG + static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, + int *prot, target_ulong address, + int access_type, int index, int mmu_idx) +@@ -154,6 +155,14 @@ static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, + + return TLBRET_NOMATCH; + } ++#else ++static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ MMUAccessType access_type, int mmu_idx) ++{ ++ return TLBRET_NOMATCH; ++} ++#endif + + static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, + target_ulong dmw) +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index ec5abe56db..4bbf495d6b 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -8,6 +8,7 @@ + #include "qemu/osdep.h" + #include "cpu.h" + #include "migration/cpu.h" ++#include "sysemu/tcg.h" + #include "vec.h" + #include "kvm/kvm_loongarch.h" + #include "sysemu/kvm.h" +@@ -111,19 +112,6 @@ static const VMStateDescription vmstate_lasx = { + }, + }; + +-/* TLB state */ +-const VMStateDescription vmstate_tlb = { +- .name = "cpu/tlb", +- .version_id = 0, +- .minimum_version_id = 0, +- .fields = (VMStateField[]) { +- VMSTATE_UINT64(tlb_misc, LoongArchTLB), +- VMSTATE_UINT64(tlb_entry0, LoongArchTLB), +- VMSTATE_UINT64(tlb_entry1, LoongArchTLB), +- VMSTATE_END_OF_LIST() +- } +-}; +- + static int cpu_post_load(void *opaque, int version_id) + { + #ifdef CONFIG_KVM +@@ -142,6 +130,38 @@ static int cpu_pre_save(void *opaque) + return 0; + } + ++#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) ++static bool tlb_needed(void *opaque) ++{ ++ return tcg_enabled(); ++} ++ ++/* TLB state */ ++static const VMStateDescription vmstate_tlb_entry = { ++ .name = "cpu/tlb_entry", ++ .version_id = 0, ++ .minimum_version_id = 0, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(tlb_misc, LoongArchTLB), ++ VMSTATE_UINT64(tlb_entry0, LoongArchTLB), ++ VMSTATE_UINT64(tlb_entry1, LoongArchTLB), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static const VMStateDescription vmstate_tlb = { ++ .name = "cpu/tlb", ++ .version_id = 0, ++ .minimum_version_id = 0, ++ .needed = tlb_needed, ++ .fields = (const VMStateField[]) { ++ VMSTATE_STRUCT_ARRAY(env.tlb, LoongArchCPU, LOONGARCH_TLB_MAX, ++ 0, vmstate_tlb_entry, LoongArchTLB), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++#endif ++ + /* LoongArch CPU state */ + const VMStateDescription vmstate_loongarch_cpu = { + .name = "cpu", +@@ -212,9 +232,6 @@ const VMStateDescription vmstate_loongarch_cpu = { + VMSTATE_UINT64(env.CSR_DBG, LoongArchCPU), + VMSTATE_UINT64(env.CSR_DERA, LoongArchCPU), + VMSTATE_UINT64(env.CSR_DSAVE, LoongArchCPU), +- /* TLB */ +- VMSTATE_STRUCT_ARRAY(env.tlb, LoongArchCPU, LOONGARCH_TLB_MAX, +- 0, vmstate_tlb, LoongArchTLB), + + VMSTATE_UINT64(kvm_state_counter, LoongArchCPU), + +@@ -224,6 +241,9 @@ const VMStateDescription vmstate_loongarch_cpu = { + &vmstate_fpu, + &vmstate_lsx, + &vmstate_lasx, ++#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) ++ &vmstate_tlb, ++#endif + NULL + } + }; +-- +2.39.1 + diff --git a/target-loongarch-Add-compatible-support-about-VM-reb.patch b/target-loongarch-Add-compatible-support-about-VM-reb.patch new file mode 100644 index 0000000000000000000000000000000000000000..0354a6bd1f67f79b5400cc322e974f8ab1349366 --- /dev/null +++ b/target-loongarch-Add-compatible-support-about-VM-reb.patch @@ -0,0 +1,51 @@ +From 2c6cf54ea2f52774f2587e7e66eed9beba3a3dec Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Tue, 27 Aug 2024 11:58:07 +0800 +Subject: [PATCH 50/78] target/loongarch: Add compatible support about VM + reboot + +With edk2-stable202408 LoongArch UEFI bios, CSR PGD register is set only +if its value is equal to zero for boot cpu, it causes reboot issue. Since +CSR PGD register is changed with linux kernel, UEFI BIOS cannot use it. + +Add workaround to clear CSR registers relative with TLB in function +loongarch_cpu_reset_hold(), so that VM can reboot with edk2-stable202408 +UEFI bios. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240827035807.3326293-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index d8a31929b4..2038984d02 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -580,6 +580,20 @@ static void loongarch_cpu_reset_hold(Object *obj) + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0); + env->CSR_MERRCTL = FIELD_DP64(env->CSR_MERRCTL, CSR_MERRCTL, ISMERR, 0); + env->CSR_TID = cs->cpu_index; ++ /* ++ * Workaround for edk2-stable202408, CSR PGD register is set only if ++ * its value is equal to zero for boot cpu, it causes reboot issue. ++ * ++ * Here clear CSR registers relative with TLB. ++ */ ++ env->CSR_PGDH = 0; ++ env->CSR_PGDL = 0; ++ env->CSR_PWCL = 0; ++ env->CSR_PWCH = 0; ++ env->CSR_STLBPS = 0; ++ env->CSR_EENTRY = 0; ++ env->CSR_TLBRENTRY = 0; ++ env->CSR_MERRENTRY = 0; + + for (n = 0; n < 4; n++) { + env->CSR_DMW[n] = FIELD_DP64(env->CSR_DMW[n], CSR_DMW, PLV0, 0); +-- +2.39.1 + diff --git a/target-loongarch-Add-loongarch-kvm-into-meson-build.patch b/target-loongarch-Add-loongarch-kvm-into-meson-build.patch new file mode 100644 index 0000000000000000000000000000000000000000..7dd1112397927da908b455c142ab4857918fece2 --- /dev/null +++ b/target-loongarch-Add-loongarch-kvm-into-meson-build.patch @@ -0,0 +1,56 @@ +From 49a7ae85d6ac42f8ef556a0d42802508c28adfcc Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:04 +0800 +Subject: [PATCH] target/loongarch: Add loongarch kvm into meson build + +Add kvm.c into meson.build to compile it when kvm +is configed. Meanwhile in meson.build, we set the +kvm_targets to loongarch64-softmmu when the cpu is +loongarch. And fix the compiling error when config +is enable-kvm,disable-tcg. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-10-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + meson.build | 2 ++ + target/loongarch/kvm/meson.build | 1 + + target/loongarch/meson.build | 1 + + 3 files changed, 4 insertions(+) + create mode 100644 target/loongarch/kvm/meson.build + +diff --git a/meson.build b/meson.build +index 445f2b7c2b..0c62b4156d 100644 +--- a/meson.build ++++ b/meson.build +@@ -114,6 +114,8 @@ elif cpu in ['riscv32'] + kvm_targets = ['riscv32-softmmu'] + elif cpu in ['riscv64'] + kvm_targets = ['riscv64-softmmu'] ++elif cpu in ['loongarch64'] ++ kvm_targets = ['loongarch64-softmmu'] + else + kvm_targets = [] + endif +diff --git a/target/loongarch/kvm/meson.build b/target/loongarch/kvm/meson.build +new file mode 100644 +index 0000000000..2266de6ca9 +--- /dev/null ++++ b/target/loongarch/kvm/meson.build +@@ -0,0 +1 @@ ++loongarch_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c')) +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index e84e4c51f4..db310f6022 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -18,3 +18,4 @@ subdir('tcg') + + target_arch += {'loongarch': loongarch_ss} + target_system_arch += {'loongarch': loongarch_system_ss} ++subdir('kvm') +-- +2.27.0 + diff --git a/target-loongarch-Add-loongarch-vector-property-uncon.patch b/target-loongarch-Add-loongarch-vector-property-uncon.patch new file mode 100644 index 0000000000000000000000000000000000000000..4b370e12ff8dac2a442637d2e4fd8010432f42ca --- /dev/null +++ b/target-loongarch-Add-loongarch-vector-property-uncon.patch @@ -0,0 +1,46 @@ +From f572c385e0d368cbf12acf7d6f0b33b5f2efd7f0 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Tue, 21 May 2024 16:05:48 +0800 +Subject: [PATCH 45/78] target/loongarch: Add loongarch vector property + unconditionally + +Currently LSX/LASX vector property is decided by the default value. +Instead vector property should be added unconditionally, and it is +irrelative with its default value. If vector is disabled by default, +vector also can be enabled from command line. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240521080549.434197-2-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 220d40fb01..f89740a5aa 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -720,14 +720,10 @@ void loongarch_cpu_post_init(Object *obj) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + +- if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { +- object_property_add_bool(obj, "lsx", loongarch_get_lsx, +- loongarch_set_lsx); +- } +- if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) { +- object_property_add_bool(obj, "lasx", loongarch_get_lasx, +- loongarch_set_lasx); +- } ++ object_property_add_bool(obj, "lsx", loongarch_get_lsx, ++ loongarch_set_lsx); ++ object_property_add_bool(obj, "lasx", loongarch_get_lasx, ++ loongarch_set_lasx); + + if (kvm_enabled()) { + object_property_add_bool(obj, "pmu", loongarch_get_pmu, +-- +2.39.1 + diff --git a/target-loongarch-Add-loongson-binary-translation-fea.patch b/target-loongarch-Add-loongson-binary-translation-fea.patch new file mode 100644 index 0000000000000000000000000000000000000000..159a1af87cfc12f5a26ad3c7155f7b21bcd9af13 --- /dev/null +++ b/target-loongarch-Add-loongson-binary-translation-fea.patch @@ -0,0 +1,200 @@ +From 962f649aa5a06169f0ac23f61e273f0860942ebb Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Sun, 29 Sep 2024 15:04:04 +0800 +Subject: [PATCH 57/78] target/loongarch: Add loongson binary translation + feature + +Loongson Binary Translation (LBT) is used to accelerate binary +translation, which contains 4 scratch registers (scr0 to scr3), x86/ARM +eflags (eflags) and x87 fpu stack pointer (ftop). + +Now LBT feature is added in kvm mode, not supported in TCG mode since +it is not emulated. Feature variable lbt is added with OnOffAuto type, +If lbt feature is not supported with KVM host, it reports error if there +is lbt=on command line. + +If there is no any command line about lbt parameter, it checks whether +KVM host supports lbt feature and set the corresponding value in cpucfg. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240929070405.235200-2-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 20 ++++++++++ + target/loongarch/cpu.h | 6 +++ + target/loongarch/kvm/kvm.c | 57 ++++++++++++++++++++++++++- + target/loongarch/loongarch-qmp-cmds.c | 2 +- + 4 files changed, 83 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index d6a13de901..a57067938d 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -737,6 +737,18 @@ static void loongarch_set_pmnum(Object *obj, Visitor *v, + } + } + ++static bool loongarch_get_lbt(Object *obj, Error **errp) ++{ ++ return LOONGARCH_CPU(obj)->lbt != ON_OFF_AUTO_OFF; ++} ++ ++static void loongarch_set_lbt(Object *obj, bool value, Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ ++ cpu->lbt = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; ++} ++ + void loongarch_cpu_post_init(Object *obj) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +@@ -756,6 +768,14 @@ void loongarch_cpu_post_init(Object *obj) + loongarch_set_pmnum, NULL, + (void *)&value); + } ++ ++ cpu->lbt = ON_OFF_AUTO_AUTO; ++ object_property_add_bool(obj, "lbt", loongarch_get_lbt, ++ loongarch_set_lbt); ++ object_property_set_description(obj, "lbt", ++ "Set off to disable Binary Tranlation."); ++ } else { ++ cpu->lbt = ON_OFF_AUTO_OFF; + } + } + +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 19bcad28de..3e2bcbf608 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -155,6 +155,7 @@ FIELD(CPUCFG2, LLFTP_VER, 15, 3) + FIELD(CPUCFG2, LBT_X86, 18, 1) + FIELD(CPUCFG2, LBT_ARM, 19, 1) + FIELD(CPUCFG2, LBT_MIPS, 20, 1) ++FIELD(CPUCFG2, LBT_ALL, 18, 3) + FIELD(CPUCFG2, LSPW, 21, 1) + FIELD(CPUCFG2, LAM, 22, 1) + +@@ -285,6 +286,10 @@ struct LoongArchTLB { + typedef struct LoongArchTLB LoongArchTLB; + #endif + ++enum loongarch_features { ++ LOONGARCH_FEATURE_LBT, /* loongson binary translation extension */ ++}; ++ + typedef struct CPUArchState { + uint64_t gpr[32]; + uint64_t pc; +@@ -388,6 +393,7 @@ struct ArchCPU { + CPULoongArchState env; + QEMUTimer timer; + uint32_t phy_id; ++ OnOffAuto lbt; + + /* 'compatible' string for this CPU for Linux device trees */ + const char *dtb_compatible; +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 90c8379c46..567404bdb5 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -9,6 +9,7 @@ + #include + #include + ++#include "qapi/error.h" + #include "qemu/timer.h" + #include "qemu/error-report.h" + #include "qemu/main-loop.h" +@@ -786,17 +787,71 @@ static void kvm_loongarch_vm_stage_change(void *opaque, bool running, + } + } + ++static bool kvm_feature_supported(CPUState *cs, enum loongarch_features feature) ++{ ++ int ret; ++ struct kvm_device_attr attr; ++ ++ switch (feature) { ++ case LOONGARCH_FEATURE_LBT: ++ /* ++ * Return all if all the LBT features are supported such as: ++ * KVM_LOONGARCH_VM_FEAT_X86BT ++ * KVM_LOONGARCH_VM_FEAT_ARMBT ++ * KVM_LOONGARCH_VM_FEAT_MIPSBT ++ */ ++ attr.group = KVM_LOONGARCH_VM_FEAT_CTRL; ++ attr.attr = KVM_LOONGARCH_VM_FEAT_X86BT; ++ ret = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr); ++ attr.attr = KVM_LOONGARCH_VM_FEAT_ARMBT; ++ ret |= kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr); ++ attr.attr = KVM_LOONGARCH_VM_FEAT_MIPSBT; ++ ret |= kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr); ++ return (ret == 0); ++ default: ++ return false; ++ } ++} ++ ++static int kvm_cpu_check_lbt(CPUState *cs, Error **errp) ++{ ++ CPULoongArchState *env = cpu_env(cs); ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ bool kvm_supported; ++ ++ kvm_supported = kvm_feature_supported(cs, LOONGARCH_FEATURE_LBT); ++ if (cpu->lbt == ON_OFF_AUTO_ON) { ++ if (kvm_supported) { ++ env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, LBT_ALL, 7); ++ } else { ++ error_setg(errp, "'lbt' feature not supported by KVM on this host"); ++ return -ENOTSUP; ++ } ++ } else if ((cpu->lbt == ON_OFF_AUTO_AUTO) && kvm_supported) { ++ env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, LBT_ALL, 7); ++ } ++ ++ return 0; ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + uint64_t val; ++ int ret; ++ Error *local_err = NULL; + ++ ret = 0; + qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); + + if (!kvm_get_one_reg(cs, KVM_REG_LOONGARCH_DEBUG_INST, &val)) { + brk_insn = val; + } + +- return 0; ++ ret = kvm_cpu_check_lbt(cs, &local_err); ++ if (ret < 0) { ++ error_report_err(local_err); ++ } ++ return ret; + } + + int kvm_arch_destroy_vcpu(CPUState *cs) +diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c +index 2612f43de9..644b528824 100644 +--- a/target/loongarch/loongarch-qmp-cmds.c ++++ b/target/loongarch/loongarch-qmp-cmds.c +@@ -42,7 +42,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + } + + static const char *cpu_model_advertised_features[] = { +- "lsx", "lasx", "pmu", "pmnum", NULL ++ "lsx", "lasx", "lbt", "pmu", "pmnum", NULL + }; + + CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, +-- +2.39.1 + diff --git a/target-loongarch-Add-steal-time-support-on-migration.patch b/target-loongarch-Add-steal-time-support-on-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..2bcfe1ad9d88db6be77f1d869bf32cda9e27375d --- /dev/null +++ b/target-loongarch-Add-steal-time-support-on-migration.patch @@ -0,0 +1,151 @@ +From 8febab6bcb01e3e10ca4ac0021bae2a812a4452b Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 30 Sep 2024 14:40:40 +0800 +Subject: [PATCH 61/78] target/loongarch: Add steal time support on migration + +With pv steal time supported, VM machine needs get physical address +of each vcpu and notify new host during migration. Here two +functions kvm_get_stealtime/kvm_set_stealtime, and guest steal time +physical address is only updated on KVM_PUT_FULL_STATE stage. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-ID: <20240930064040.753929-1-maobibo@loongson.cn> +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.h | 3 ++ + target/loongarch/kvm/kvm.c | 65 ++++++++++++++++++++++++++++++++++++++ + target/loongarch/machine.c | 6 ++-- + 3 files changed, 72 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 8ff00d17e1..4c90cf9ef3 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -369,6 +369,9 @@ typedef struct CPUArchState { + uint64_t CSR_DBG; + uint64_t CSR_DERA; + uint64_t CSR_DSAVE; ++ struct { ++ uint64_t guest_addr; ++ } stealtime; + + #ifdef CONFIG_TCG + float_status fp_status; +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 8b0f86a201..550f14269e 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -35,6 +35,55 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO + }; + ++static int kvm_get_stealtime(CPUState *cs) ++{ ++ CPULoongArchState *env = cpu_env(cs); ++ int err; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, ++ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, ++ .addr = (uint64_t)&env->stealtime.guest_addr, ++ }; ++ ++ err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); ++ if (err) { ++ return 0; ++ } ++ ++ err = kvm_vcpu_ioctl(cs, KVM_GET_DEVICE_ATTR, attr); ++ if (err) { ++ error_report("PVTIME: KVM_GET_DEVICE_ATTR: %s", strerror(errno)); ++ return err; ++ } ++ ++ return 0; ++} ++ ++static int kvm_set_stealtime(CPUState *cs) ++{ ++ CPULoongArchState *env = cpu_env(cs); ++ int err; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, ++ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, ++ .addr = (uint64_t)&env->stealtime.guest_addr, ++ }; ++ ++ err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); ++ if (err) { ++ return 0; ++ } ++ ++ err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); ++ if (err) { ++ error_report("PVTIME: KVM_SET_DEVICE_ATTR %s with gpa "TARGET_FMT_lx, ++ strerror(errno), env->stealtime.guest_addr); ++ return err; ++ } ++ ++ return 0; ++} ++ + static int kvm_loongarch_get_regs_core(CPUState *cs) + { + int ret = 0; +@@ -790,6 +839,11 @@ int kvm_arch_get_registers(CPUState *cs) + return ret; + } + ++ ret = kvm_get_stealtime(cs); ++ if (ret) { ++ return ret; ++ } ++ + ret = kvm_loongarch_get_mpstate(cs); + return ret; + } +@@ -823,6 +877,17 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + ++ if (level >= KVM_PUT_FULL_STATE) { ++ /* ++ * only KVM_PUT_FULL_STATE is required, kvm kernel will clear ++ * guest_addr for KVM_PUT_RESET_STATE ++ */ ++ ret = kvm_set_stealtime(cs); ++ if (ret) { ++ return ret; ++ } ++ } ++ + ret = kvm_loongarch_put_mpstate(cs); + return ret; + } +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index 5d62aabd51..fd69ea05dc 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -188,8 +188,8 @@ static const VMStateDescription vmstate_tlb = { + /* LoongArch CPU state */ + const VMStateDescription vmstate_loongarch_cpu = { + .name = "cpu", +- .version_id = 2, +- .minimum_version_id = 2, ++ .version_id = 3, ++ .minimum_version_id = 3, + .post_load = cpu_post_load, + .pre_save = cpu_pre_save, + .fields = (const VMStateField[]) { +@@ -257,6 +257,8 @@ const VMStateDescription vmstate_loongarch_cpu = { + VMSTATE_UINT64(env.CSR_DSAVE, LoongArchCPU), + + VMSTATE_UINT64(kvm_state_counter, LoongArchCPU), ++ /* PV steal time */ ++ VMSTATE_UINT64(env.stealtime.guest_addr, LoongArchCPU), + + VMSTATE_END_OF_LIST() + }, +-- +2.39.1 + diff --git a/target-loongarch-Add-timer-information-dump-support.patch b/target-loongarch-Add-timer-information-dump-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..e489939d2ce6703d7d79d86fe9527c298a294800 --- /dev/null +++ b/target-loongarch-Add-timer-information-dump-support.patch @@ -0,0 +1,33 @@ +From 8a43c9379651fbf9d015240d6dc7c4b90ce98683 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 6 Dec 2023 16:18:39 +0800 +Subject: [PATCH] target/loongarch: Add timer information dump support + +Timer emulation sometimes is problematic especially when vm is running in +kvm mode. This patch adds registers dump support relative with timer +hardware, so that it is easier to find the problems. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231206081839.2290178-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index fc075952e6..db9a421cc4 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -762,6 +762,8 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) + qemu_fprintf(f, "TLBRENTRY=%016" PRIx64 "\n", env->CSR_TLBRENTRY); + qemu_fprintf(f, "TLBRBADV=%016" PRIx64 "\n", env->CSR_TLBRBADV); + qemu_fprintf(f, "TLBRERA=%016" PRIx64 "\n", env->CSR_TLBRERA); ++ qemu_fprintf(f, "TCFG=%016" PRIx64 "\n", env->CSR_TCFG); ++ qemu_fprintf(f, "TVAL=%016" PRIx64 "\n", env->CSR_TVAL); + + /* fpr */ + if (flags & CPU_DUMP_FPU) { +-- +2.27.0 + diff --git a/target-loongarch-Avoid-bits-shift-exceeding-width-of.patch b/target-loongarch-Avoid-bits-shift-exceeding-width-of.patch new file mode 100644 index 0000000000000000000000000000000000000000..af68bf09033976135aa5906b4095a36500bbcd84 --- /dev/null +++ b/target-loongarch-Avoid-bits-shift-exceeding-width-of.patch @@ -0,0 +1,42 @@ +From fa79379bd4c5b72e11f14f24439d5d501b8cc98b Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Sat, 14 Sep 2024 14:46:45 +0800 +Subject: [PATCH 55/78] target/loongarch: Avoid bits shift exceeding width of + bool type + +Variable env->cf[i] is defined as bool type, it is treated as int type +with shift operation. However the max possible width is 56 for the shift +operation, exceeding the width of int type. And there is existing api +read_fcc() which is converted to u64 type with bitwise shift, it can be +used to dump fp registers into coredump note segment. + +Resolves: Coverity CID 1561133 +Signed-off-by: Bibo Mao +Reviewed-by: Richard Henderson +Message-Id: <20240914064645.2099169-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/arch_dump.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/target/loongarch/arch_dump.c b/target/loongarch/arch_dump.c +index 4986db970e..d9e1120333 100644 +--- a/target/loongarch/arch_dump.c ++++ b/target/loongarch/arch_dump.c +@@ -97,11 +97,7 @@ static int loongarch_write_elf64_fprpreg(WriteCoreDumpFunction f, + + loongarch_note_init(¬e, s, "CORE", 5, NT_PRFPREG, sizeof(note.fpu)); + note.fpu.fcsr = cpu_to_dump64(s, env->fcsr0); +- +- for (i = 0; i < 8; i++) { +- note.fpu.fcc |= env->cf[i] << (8 * i); +- } +- note.fpu.fcc = cpu_to_dump64(s, note.fpu.fcc); ++ note.fpu.fcc = cpu_to_dump64(s, read_fcc(env)); + + for (i = 0; i < 32; ++i) { + note.fpu.fpr[i] = cpu_to_dump64(s, env->fpr[i].vreg.UD[0]); +-- +2.39.1 + diff --git a/target-loongarch-Define-some-kvm_arch-interfaces.patch b/target-loongarch-Define-some-kvm_arch-interfaces.patch new file mode 100644 index 0000000000000000000000000000000000000000..8667c98948257e691a468b425ec9982592d7f64d --- /dev/null +++ b/target-loongarch-Define-some-kvm_arch-interfaces.patch @@ -0,0 +1,162 @@ +From 623a99084843f47723cb799d4bcef8e1359d59ad Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:57 +0800 +Subject: [PATCH] target/loongarch: Define some kvm_arch interfaces + +Define some functions in target/loongarch/kvm/kvm.c, +such as kvm_arch_put_registers, kvm_arch_get_registers +and kvm_arch_handle_exit, etc. which are needed by +kvm/kvm-all.c. Now the most functions has no content +and they will be implemented in the next patches. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-3-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 131 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 131 insertions(+) + create mode 100644 target/loongarch/kvm/kvm.c + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +new file mode 100644 +index 0000000000..0d67322fd9 +--- /dev/null ++++ b/target/loongarch/kvm/kvm.c +@@ -0,0 +1,131 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * QEMU LoongArch KVM ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#include "qemu/osdep.h" ++#include ++#include ++ ++#include "qemu/timer.h" ++#include "qemu/error-report.h" ++#include "qemu/main-loop.h" ++#include "sysemu/sysemu.h" ++#include "sysemu/kvm.h" ++#include "sysemu/kvm_int.h" ++#include "hw/pci/pci.h" ++#include "exec/memattrs.h" ++#include "exec/address-spaces.h" ++#include "hw/boards.h" ++#include "hw/irq.h" ++#include "qemu/log.h" ++#include "hw/loader.h" ++#include "migration/migration.h" ++#include "sysemu/runstate.h" ++#include "cpu-csr.h" ++#include "kvm_loongarch.h" ++ ++static bool cap_has_mp_state; ++const KVMCapabilityInfo kvm_arch_required_capabilities[] = { ++ KVM_CAP_LAST_INFO ++}; ++ ++int kvm_arch_get_registers(CPUState *cs) ++{ ++ return 0; ++} ++int kvm_arch_put_registers(CPUState *cs, int level) ++{ ++ return 0; ++} ++ ++int kvm_arch_init_vcpu(CPUState *cs) ++{ ++ return 0; ++} ++ ++int kvm_arch_destroy_vcpu(CPUState *cs) ++{ ++ return 0; ++} ++ ++unsigned long kvm_arch_vcpu_id(CPUState *cs) ++{ ++ return cs->cpu_index; ++} ++ ++int kvm_arch_release_virq_post(int virq) ++{ ++ return 0; ++} ++ ++int kvm_arch_msi_data_to_gsi(uint32_t data) ++{ ++ abort(); ++} ++ ++int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, ++ uint64_t address, uint32_t data, PCIDevice *dev) ++{ ++ return 0; ++} ++ ++int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, ++ int vector, PCIDevice *dev) ++{ ++ return 0; ++} ++ ++void kvm_arch_init_irq_routing(KVMState *s) ++{ ++} ++ ++int kvm_arch_get_default_type(MachineState *ms) ++{ ++ return 0; ++} ++ ++int kvm_arch_init(MachineState *ms, KVMState *s) ++{ ++ return 0; ++} ++ ++int kvm_arch_irqchip_create(KVMState *s) ++{ ++ return 0; ++} ++ ++void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) ++{ ++} ++ ++MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) ++{ ++ return MEMTXATTRS_UNSPECIFIED; ++} ++ ++int kvm_arch_process_async_events(CPUState *cs) ++{ ++ return cs->halted; ++} ++ ++bool kvm_arch_stop_on_emulation_error(CPUState *cs) ++{ ++ return true; ++} ++ ++bool kvm_arch_cpu_check_are_resettable(void) ++{ ++ return true; ++} ++ ++int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ++{ ++ return 0; ++} ++ ++void kvm_arch_accel_class_init(ObjectClass *oc) ++{ ++} +-- +2.27.0 + diff --git a/target-loongarch-Fix-cpu_reset-set-wrong-CSR_CRMD.patch b/target-loongarch-Fix-cpu_reset-set-wrong-CSR_CRMD.patch new file mode 100644 index 0000000000000000000000000000000000000000..26d3777b6164497fc285ff388ebafe057bb4339b --- /dev/null +++ b/target-loongarch-Fix-cpu_reset-set-wrong-CSR_CRMD.patch @@ -0,0 +1,42 @@ +From d909e6bfef50fc67708358e455a3b53d869249e6 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 5 Jul 2024 10:18:39 +0800 +Subject: [PATCH 49/78] target/loongarch: Fix cpu_reset set wrong CSR_CRMD + +After cpu_reset, DATF in CSR_CRMD is 0, DATM is 0. +See the manual[1] 6.4. + + [1]: https://github.com/loongson/LoongArch-Documentation/releases/download/2023.04.20/LoongArch-Vol1-v1.10-EN.pdf + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240705021839.1004374-2-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 5bb9e5656a..d8a31929b4 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -554,13 +554,13 @@ static void loongarch_cpu_reset_hold(Object *obj) + env->fcsr0 = 0x0; + + int n; +- /* Set csr registers value after reset */ ++ /* Set csr registers value after reset, see the manual 6.4. */ + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, 0); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, 0); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 1); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 0); +- env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DATF, 1); +- env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DATM, 1); ++ env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DATF, 0); ++ env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DATM, 0); + + env->CSR_EUEN = FIELD_DP64(env->CSR_EUEN, CSR_EUEN, FPE, 0); + env->CSR_EUEN = FIELD_DP64(env->CSR_EUEN, CSR_EUEN, SXE, 0); +-- +2.39.1 + diff --git a/target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch b/target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch new file mode 100644 index 0000000000000000000000000000000000000000..f94c68bba1a2e09e031cb55c27a219eb9a392f49 --- /dev/null +++ b/target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch @@ -0,0 +1,45 @@ +From 6d175f9d5d5b9f46ee2f1a6fe00249bb817b5dc6 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 20 Mar 2024 09:39:55 +0800 +Subject: [PATCH] target/loongarch: Fix qemu-loongarch64 hang when + executing 'll.d $t0, $t0, 0' + +On gen_ll, if a->imm is zero, make_address_x return src1, +but the load to destination may clobber src1. We use a new +destination to fix this problem. + +Fixes: c5af6628f4be (target/loongarch: Extract make_address_i() helper) +Reviewed-by: Richard Henderson +Suggested-by: Richard Henderson +Signed-off-by: Song Gao +Message-Id: <20240320013955.1561311-1-gaosong@loongson.cn> +--- + target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +index 80c2e286fd..974bc2a70f 100644 +--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc ++++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +@@ -5,14 +5,14 @@ + + static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop) + { +- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); ++ TCGv t1 = tcg_temp_new(); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv t0 = make_address_i(ctx, src1, a->imm); + +- tcg_gen_qemu_ld_i64(dest, t0, ctx->mem_idx, mop); ++ tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop); + tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr)); +- tcg_gen_st_tl(dest, tcg_env, offsetof(CPULoongArchState, llval)); +- gen_set_gpr(a->rd, dest, EXT_NONE); ++ tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval)); ++ gen_set_gpr(a->rd, t1, EXT_NONE); + + return true; + } +-- +2.33.0 + diff --git a/target-loongarch-Fix-qemu-system-loongarch64-assert-.patch b/target-loongarch-Fix-qemu-system-loongarch64-assert-.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7c39f0f16831c3351599a41a08e1e6f95c67272 --- /dev/null +++ b/target-loongarch-Fix-qemu-system-loongarch64-assert-.patch @@ -0,0 +1,136 @@ +From 3db0118d3663c5d56841dac30e4bf95ccfff21bd Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 2 Apr 2024 09:39:36 +0800 +Subject: [PATCH] target/loongarch: Fix qemu-system-loongarch64 assert + failed with the option '-d int' +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +qemu-system-loongarch64 assert failed with the option '-d int', +the helper_idle() raise an exception EXCP_HLT, but the exception name is undefined. + +----- +merge patch: + +0cbb322f70e8a87e4acbffecef5ea8f9448f3513(target/loongarch/cpu.c: typo fix: expection) + +Signed-off-by: Song Gao +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20240321123606.1704900-1-gaosong@loongson.cn> +--- + target/loongarch/cpu.c | 74 +++++++++++++++++++++++------------------- + 1 file changed, 40 insertions(+), 34 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index b098b1c6f3..0b3f954b64 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -43,33 +43,45 @@ const char * const fregnames[32] = { + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + }; + +-static const char * const excp_names[] = { +- [EXCCODE_INT] = "Interrupt", +- [EXCCODE_PIL] = "Page invalid exception for load", +- [EXCCODE_PIS] = "Page invalid exception for store", +- [EXCCODE_PIF] = "Page invalid exception for fetch", +- [EXCCODE_PME] = "Page modified exception", +- [EXCCODE_PNR] = "Page Not Readable exception", +- [EXCCODE_PNX] = "Page Not Executable exception", +- [EXCCODE_PPI] = "Page Privilege error", +- [EXCCODE_ADEF] = "Address error for instruction fetch", +- [EXCCODE_ADEM] = "Address error for Memory access", +- [EXCCODE_SYS] = "Syscall", +- [EXCCODE_BRK] = "Break", +- [EXCCODE_INE] = "Instruction Non-Existent", +- [EXCCODE_IPE] = "Instruction privilege error", +- [EXCCODE_FPD] = "Floating Point Disabled", +- [EXCCODE_FPE] = "Floating Point Exception", +- [EXCCODE_DBP] = "Debug breakpoint", +- [EXCCODE_BCE] = "Bound Check Exception", +- [EXCCODE_SXD] = "128 bit vector instructions Disable exception", +- [EXCCODE_ASXD] = "256 bit vector instructions Disable exception", ++struct TypeExcp { ++ int32_t exccode; ++ const char * const name; ++}; ++ ++static const struct TypeExcp excp_names[] = { ++ {EXCCODE_INT, "Interrupt"}, ++ {EXCCODE_PIL, "Page invalid exception for load"}, ++ {EXCCODE_PIS, "Page invalid exception for store"}, ++ {EXCCODE_PIF, "Page invalid exception for fetch"}, ++ {EXCCODE_PME, "Page modified exception"}, ++ {EXCCODE_PNR, "Page Not Readable exception"}, ++ {EXCCODE_PNX, "Page Not Executable exception"}, ++ {EXCCODE_PPI, "Page Privilege error"}, ++ {EXCCODE_ADEF, "Address error for instruction fetch"}, ++ {EXCCODE_ADEM, "Address error for Memory access"}, ++ {EXCCODE_SYS, "Syscall"}, ++ {EXCCODE_BRK, "Break"}, ++ {EXCCODE_INE, "Instruction Non-Existent"}, ++ {EXCCODE_IPE, "Instruction privilege error"}, ++ {EXCCODE_FPD, "Floating Point Disabled"}, ++ {EXCCODE_FPE, "Floating Point Exception"}, ++ {EXCCODE_DBP, "Debug breakpoint"}, ++ {EXCCODE_BCE, "Bound Check Exception"}, ++ {EXCCODE_SXD, "128 bit vector instructions Disable exception"}, ++ {EXCCODE_ASXD, "256 bit vector instructions Disable exception"}, ++ {EXCP_HLT, "EXCP_HLT"}, + }; + + const char *loongarch_exception_name(int32_t exception) + { +- assert(excp_names[exception]); +- return excp_names[exception]; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(excp_names); i++) { ++ if (excp_names[i].exccode == exception) { ++ return excp_names[i].name; ++ } ++ } ++ return "Unknown"; + } + + void G_NORETURN do_raise_exception(CPULoongArchState *env, +@@ -78,7 +90,7 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, + { + CPUState *cs = env_cpu(env); + +- qemu_log_mask(CPU_LOG_INT, "%s: %d (%s)\n", ++ qemu_log_mask(CPU_LOG_INT, "%s: exception: %d (%s)\n", + __func__, + exception, + loongarch_exception_name(exception)); +@@ -159,22 +171,16 @@ static void loongarch_cpu_do_interrupt(CPUState *cs) + CPULoongArchState *env = &cpu->env; + bool update_badinstr = 1; + int cause = -1; +- const char *name; + bool tlbfill = FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR); + uint32_t vec_size = FIELD_EX64(env->CSR_ECFG, CSR_ECFG, VS); + + if (cs->exception_index != EXCCODE_INT) { +- if (cs->exception_index < 0 || +- cs->exception_index >= ARRAY_SIZE(excp_names)) { +- name = "unknown"; +- } else { +- name = excp_names[cs->exception_index]; +- } +- + qemu_log_mask(CPU_LOG_INT, + "%s enter: pc " TARGET_FMT_lx " ERA " TARGET_FMT_lx +- " TLBRERA " TARGET_FMT_lx " %s exception\n", __func__, +- env->pc, env->CSR_ERA, env->CSR_TLBRERA, name); ++ " TLBRERA " TARGET_FMT_lx " exception: %d (%s)\n", ++ __func__, env->pc, env->CSR_ERA, env->CSR_TLBRERA, ++ cs->exception_index, ++ loongarch_exception_name(cs->exception_index)); + } + + switch (cs->exception_index) { +-- +2.33.0 + diff --git a/target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch b/target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..33cf3381868dff0b58bf3ba2fb4ef5bb98fb5687 --- /dev/null +++ b/target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch @@ -0,0 +1,570 @@ +From d2381abc2c78de68e765a29a55282707541e315d Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Thu, 25 Jan 2024 14:14:01 +0800 +Subject: [PATCH] target/loongarch: Fix qtest test-hmp error when KVM-only + build + +The cc->sysemu_ops->get_phys_page_debug() is NULL when +KVM-only build. this patch fixes it. + +Signed-off-by: Song Gao +Tested-by: Bibo Mao +Message-Id: <20240125061401.52526-1-gaosong@loongson.cn> +--- + target/loongarch/cpu.c | 2 - + target/loongarch/cpu_helper.c | 231 ++++++++++++++++++++++++++++++ + target/loongarch/internals.h | 20 ++- + target/loongarch/meson.build | 1 + + target/loongarch/tcg/tlb_helper.c | 230 ----------------------------- + 5 files changed, 250 insertions(+), 234 deletions(-) + create mode 100644 target/loongarch/cpu_helper.c + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 6611d137a1..b098b1c6f3 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -771,9 +771,7 @@ static struct TCGCPUOps loongarch_tcg_ops = { + #include "hw/core/sysemu-cpu-ops.h" + + static const struct SysemuCPUOps loongarch_sysemu_ops = { +-#ifdef CONFIG_TCG + .get_phys_page_debug = loongarch_cpu_get_phys_page_debug, +-#endif + }; + + static int64_t loongarch_cpu_get_arch_id(CPUState *cs) +diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c +new file mode 100644 +index 0000000000..f68d63f466 +--- /dev/null ++++ b/target/loongarch/cpu_helper.c +@@ -0,0 +1,231 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * LoongArch CPU helpers for qemu ++ * ++ * Copyright (c) 2024 Loongson Technology Corporation Limited ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "cpu.h" ++#include "internals.h" ++#include "cpu-csr.h" ++ ++static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ int access_type, int index, int mmu_idx) ++{ ++ LoongArchTLB *tlb = &env->tlb[index]; ++ uint64_t plv = mmu_idx; ++ uint64_t tlb_entry, tlb_ppn; ++ uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; ++ ++ if (index >= LOONGARCH_STLB) { ++ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); ++ } else { ++ tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); ++ } ++ n = (address >> tlb_ps) & 0x1;/* Odd or even */ ++ ++ tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; ++ tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); ++ tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); ++ tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); ++ if (is_la64(env)) { ++ tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); ++ tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); ++ tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); ++ tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); ++ } else { ++ tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); ++ tlb_nx = 0; ++ tlb_nr = 0; ++ tlb_rplv = 0; ++ } ++ ++ /* Remove sw bit between bit12 -- bit PS*/ ++ tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); ++ ++ /* Check access rights */ ++ if (!tlb_v) { ++ return TLBRET_INVALID; ++ } ++ ++ if (access_type == MMU_INST_FETCH && tlb_nx) { ++ return TLBRET_XI; ++ } ++ ++ if (access_type == MMU_DATA_LOAD && tlb_nr) { ++ return TLBRET_RI; ++ } ++ ++ if (((tlb_rplv == 0) && (plv > tlb_plv)) || ++ ((tlb_rplv == 1) && (plv != tlb_plv))) { ++ return TLBRET_PE; ++ } ++ ++ if ((access_type == MMU_DATA_STORE) && !tlb_d) { ++ return TLBRET_DIRTY; ++ } ++ ++ *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | ++ (address & MAKE_64BIT_MASK(0, tlb_ps)); ++ *prot = PAGE_READ; ++ if (tlb_d) { ++ *prot |= PAGE_WRITE; ++ } ++ if (!tlb_nx) { ++ *prot |= PAGE_EXEC; ++ } ++ return TLBRET_MATCH; ++} ++ ++/* ++ * One tlb entry holds an adjacent odd/even pair, the vpn is the ++ * content of the virtual page number divided by 2. So the ++ * compare vpn is bit[47:15] for 16KiB page. while the vppn ++ * field in tlb entry contains bit[47:13], so need adjust. ++ * virt_vpn = vaddr[47:13] ++ */ ++bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, ++ int *index) ++{ ++ LoongArchTLB *tlb; ++ uint16_t csr_asid, tlb_asid, stlb_idx; ++ uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; ++ int i, compare_shift; ++ uint64_t vpn, tlb_vppn; ++ ++ csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); ++ stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); ++ vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); ++ stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ ++ compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; ++ ++ /* Search STLB */ ++ for (i = 0; i < 8; ++i) { ++ tlb = &env->tlb[i * 256 + stlb_idx]; ++ tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); ++ if (tlb_e) { ++ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); ++ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); ++ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); ++ ++ if ((tlb_g == 1 || tlb_asid == csr_asid) && ++ (vpn == (tlb_vppn >> compare_shift))) { ++ *index = i * 256 + stlb_idx; ++ return true; ++ } ++ } ++ } ++ ++ /* Search MTLB */ ++ for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { ++ tlb = &env->tlb[i]; ++ tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); ++ if (tlb_e) { ++ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); ++ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); ++ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); ++ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); ++ compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; ++ vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); ++ if ((tlb_g == 1 || tlb_asid == csr_asid) && ++ (vpn == (tlb_vppn >> compare_shift))) { ++ *index = i; ++ return true; ++ } ++ } ++ } ++ return false; ++} ++ ++static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ MMUAccessType access_type, int mmu_idx) ++{ ++ int index, match; ++ ++ match = loongarch_tlb_search(env, address, &index); ++ if (match) { ++ return loongarch_map_tlb_entry(env, physical, prot, ++ address, access_type, index, mmu_idx); ++ } ++ ++ return TLBRET_NOMATCH; ++} ++ ++static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, ++ target_ulong dmw) ++{ ++ if (is_la64(env)) { ++ return va & TARGET_VIRT_MASK; ++ } else { ++ uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG); ++ return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \ ++ (pseg << R_CSR_DMW_32_VSEG_SHIFT); ++ } ++} ++ ++int get_physical_address(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ MMUAccessType access_type, int mmu_idx) ++{ ++ int user_mode = mmu_idx == MMU_IDX_USER; ++ int kernel_mode = mmu_idx == MMU_IDX_KERNEL; ++ uint32_t plv, base_c, base_v; ++ int64_t addr_high; ++ uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); ++ uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); ++ ++ /* Check PG and DA */ ++ if (da & !pg) { ++ *physical = address & TARGET_PHYS_MASK; ++ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; ++ return TLBRET_MATCH; ++ } ++ ++ plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT); ++ if (is_la64(env)) { ++ base_v = address >> R_CSR_DMW_64_VSEG_SHIFT; ++ } else { ++ base_v = address >> R_CSR_DMW_32_VSEG_SHIFT; ++ } ++ /* Check direct map window */ ++ for (int i = 0; i < 4; i++) { ++ if (is_la64(env)) { ++ base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG); ++ } else { ++ base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); ++ } ++ if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { ++ *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); ++ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; ++ return TLBRET_MATCH; ++ } ++ } ++ ++ /* Check valid extension */ ++ addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); ++ if (!(addr_high == 0 || addr_high == -1)) { ++ return TLBRET_BADADDR; ++ } ++ ++ /* Mapped address */ ++ return loongarch_map_address(env, physical, prot, address, ++ access_type, mmu_idx); ++} ++ ++hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ hwaddr phys_addr; ++ int prot; ++ ++ if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, ++ cpu_mmu_index(env, false)) != 0) { ++ return -1; ++ } ++ return phys_addr; ++} +diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h +index 0beb034748..a2fc54c8a7 100644 +--- a/target/loongarch/internals.h ++++ b/target/loongarch/internals.h +@@ -37,6 +37,17 @@ void restore_fp_status(CPULoongArchState *env); + #endif + + #ifndef CONFIG_USER_ONLY ++enum { ++ TLBRET_MATCH = 0, ++ TLBRET_BADADDR = 1, ++ TLBRET_NOMATCH = 2, ++ TLBRET_INVALID = 3, ++ TLBRET_DIRTY = 4, ++ TLBRET_RI = 5, ++ TLBRET_XI = 6, ++ TLBRET_PE = 7, ++}; ++ + extern const VMStateDescription vmstate_loongarch_cpu; + + void loongarch_cpu_set_irq(void *opaque, int irq, int level); +@@ -46,12 +57,17 @@ uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu); + uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu); + void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, + uint64_t value); ++bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, ++ int *index); ++int get_physical_address(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ MMUAccessType access_type, int mmu_idx); ++hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); ++ + #ifdef CONFIG_TCG + bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); +- +-hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); + #endif + #endif /* !CONFIG_USER_ONLY */ + +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index db310f6022..e002e9aaf6 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -8,6 +8,7 @@ loongarch_ss.add(files( + + loongarch_system_ss = ss.source_set() + loongarch_system_ss.add(files( ++ 'cpu_helper.c', + 'loongarch-qmp-cmds.c', + 'machine.c', + )) +diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c +index 449043c68b..804ab7a263 100644 +--- a/target/loongarch/tcg/tlb_helper.c ++++ b/target/loongarch/tcg/tlb_helper.c +@@ -17,236 +17,6 @@ + #include "exec/log.h" + #include "cpu-csr.h" + +-enum { +- TLBRET_MATCH = 0, +- TLBRET_BADADDR = 1, +- TLBRET_NOMATCH = 2, +- TLBRET_INVALID = 3, +- TLBRET_DIRTY = 4, +- TLBRET_RI = 5, +- TLBRET_XI = 6, +- TLBRET_PE = 7, +-}; +- +-static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, +- int *prot, target_ulong address, +- int access_type, int index, int mmu_idx) +-{ +- LoongArchTLB *tlb = &env->tlb[index]; +- uint64_t plv = mmu_idx; +- uint64_t tlb_entry, tlb_ppn; +- uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; +- +- if (index >= LOONGARCH_STLB) { +- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); +- } else { +- tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); +- } +- n = (address >> tlb_ps) & 0x1;/* Odd or even */ +- +- tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; +- tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); +- tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); +- tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); +- if (is_la64(env)) { +- tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); +- tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); +- tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); +- tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); +- } else { +- tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); +- tlb_nx = 0; +- tlb_nr = 0; +- tlb_rplv = 0; +- } +- +- /* Remove sw bit between bit12 -- bit PS*/ +- tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); +- +- /* Check access rights */ +- if (!tlb_v) { +- return TLBRET_INVALID; +- } +- +- if (access_type == MMU_INST_FETCH && tlb_nx) { +- return TLBRET_XI; +- } +- +- if (access_type == MMU_DATA_LOAD && tlb_nr) { +- return TLBRET_RI; +- } +- +- if (((tlb_rplv == 0) && (plv > tlb_plv)) || +- ((tlb_rplv == 1) && (plv != tlb_plv))) { +- return TLBRET_PE; +- } +- +- if ((access_type == MMU_DATA_STORE) && !tlb_d) { +- return TLBRET_DIRTY; +- } +- +- *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | +- (address & MAKE_64BIT_MASK(0, tlb_ps)); +- *prot = PAGE_READ; +- if (tlb_d) { +- *prot |= PAGE_WRITE; +- } +- if (!tlb_nx) { +- *prot |= PAGE_EXEC; +- } +- return TLBRET_MATCH; +-} +- +-/* +- * One tlb entry holds an adjacent odd/even pair, the vpn is the +- * content of the virtual page number divided by 2. So the +- * compare vpn is bit[47:15] for 16KiB page. while the vppn +- * field in tlb entry contains bit[47:13], so need adjust. +- * virt_vpn = vaddr[47:13] +- */ +-static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, +- int *index) +-{ +- LoongArchTLB *tlb; +- uint16_t csr_asid, tlb_asid, stlb_idx; +- uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; +- int i, compare_shift; +- uint64_t vpn, tlb_vppn; +- +- csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); +- stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); +- vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); +- stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ +- compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; +- +- /* Search STLB */ +- for (i = 0; i < 8; ++i) { +- tlb = &env->tlb[i * 256 + stlb_idx]; +- tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); +- if (tlb_e) { +- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); +- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); +- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); +- +- if ((tlb_g == 1 || tlb_asid == csr_asid) && +- (vpn == (tlb_vppn >> compare_shift))) { +- *index = i * 256 + stlb_idx; +- return true; +- } +- } +- } +- +- /* Search MTLB */ +- for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { +- tlb = &env->tlb[i]; +- tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); +- if (tlb_e) { +- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); +- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); +- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); +- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); +- compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; +- vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); +- if ((tlb_g == 1 || tlb_asid == csr_asid) && +- (vpn == (tlb_vppn >> compare_shift))) { +- *index = i; +- return true; +- } +- } +- } +- return false; +-} +- +-static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, +- int *prot, target_ulong address, +- MMUAccessType access_type, int mmu_idx) +-{ +- int index, match; +- +- match = loongarch_tlb_search(env, address, &index); +- if (match) { +- return loongarch_map_tlb_entry(env, physical, prot, +- address, access_type, index, mmu_idx); +- } +- +- return TLBRET_NOMATCH; +-} +- +-static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, +- target_ulong dmw) +-{ +- if (is_la64(env)) { +- return va & TARGET_VIRT_MASK; +- } else { +- uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG); +- return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \ +- (pseg << R_CSR_DMW_32_VSEG_SHIFT); +- } +-} +- +-static int get_physical_address(CPULoongArchState *env, hwaddr *physical, +- int *prot, target_ulong address, +- MMUAccessType access_type, int mmu_idx) +-{ +- int user_mode = mmu_idx == MMU_IDX_USER; +- int kernel_mode = mmu_idx == MMU_IDX_KERNEL; +- uint32_t plv, base_c, base_v; +- int64_t addr_high; +- uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); +- uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); +- +- /* Check PG and DA */ +- if (da & !pg) { +- *physical = address & TARGET_PHYS_MASK; +- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; +- return TLBRET_MATCH; +- } +- +- plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT); +- if (is_la64(env)) { +- base_v = address >> R_CSR_DMW_64_VSEG_SHIFT; +- } else { +- base_v = address >> R_CSR_DMW_32_VSEG_SHIFT; +- } +- /* Check direct map window */ +- for (int i = 0; i < 4; i++) { +- if (is_la64(env)) { +- base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG); +- } else { +- base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); +- } +- if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { +- *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); +- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; +- return TLBRET_MATCH; +- } +- } +- +- /* Check valid extension */ +- addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); +- if (!(addr_high == 0 || addr_high == -1)) { +- return TLBRET_BADADDR; +- } +- +- /* Mapped address */ +- return loongarch_map_address(env, physical, prot, address, +- access_type, mmu_idx); +-} +- +-hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +-{ +- LoongArchCPU *cpu = LOONGARCH_CPU(cs); +- CPULoongArchState *env = &cpu->env; +- hwaddr phys_addr; +- int prot; +- +- if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, +- cpu_mmu_index(env, false)) != 0) { +- return -1; +- } +- return phys_addr; +-} +- + static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, + MMUAccessType access_type, int tlb_error) + { +-- +2.27.0 + diff --git a/target-loongarch-Fix-the-cpu-unplug-resource-leak.patch b/target-loongarch-Fix-the-cpu-unplug-resource-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..ade56ca4b9d13ce3d05b9f674e76a3d72158a78c --- /dev/null +++ b/target-loongarch-Fix-the-cpu-unplug-resource-leak.patch @@ -0,0 +1,76 @@ +From 2a51f062a46c2e3fbd96a1d75f9d53cab449f4ac Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Fri, 21 Mar 2025 20:40:37 +0800 +Subject: [PATCH] target/loongarch: Fix the cpu unplug resource leak + +When the cpu is created, qemu_add_vm_change_state_handler +is called in the kvm_arch_init_vcpu function to create +the VMChangeStateEntry resource. + +However, the resource is not released when the cpu is destroyed. +This results in a qemu process segment error when the virtual +machine restarts after the cpu is unplugged. + +This patch solves the problem by adding the corresponding resource +release process to the kvm_arch_destroy_vcpu function. + +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 2 +- + target/loongarch/cpu.h | 1 + + target/loongarch/kvm/kvm.c | 5 ++++- + 3 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 570ce8be3b..561566f3a0 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -573,7 +573,7 @@ static void loongarch_cpu_reset_hold(Object *obj) + env->CSR_ECFG = FIELD_DP64(env->CSR_ECFG, CSR_ECFG, VS, 0); + env->CSR_ECFG = FIELD_DP64(env->CSR_ECFG, CSR_ECFG, LIE, 0); + +- env->CSR_ESTAT = env->CSR_ESTAT & (~MAKE_64BIT_MASK(0, 2)); ++ env->CSR_ESTAT = 0; + env->CSR_RVACFG = FIELD_DP64(env->CSR_RVACFG, CSR_RVACFG, RBITS, 0); + env->CSR_CPUID = cs->cpu_index; + env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0); +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 9af622aba5..6cc717c5ea 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -427,6 +427,7 @@ struct ArchCPU { + const char *dtb_compatible; + /* used by KVM_REG_LOONGARCH_COUNTER ioctl to access guest time counters */ + uint64_t kvm_state_counter; ++ VMChangeStateEntry *vmsentry; + }; + + /** +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 277210ca04..f6e008a517 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -905,9 +905,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + uint64_t val; + int ret; + Error *local_err = NULL; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); + + ret = 0; +- qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); ++ cpu->vmsentry = qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); + + if (!kvm_get_one_reg(cs, KVM_REG_LOONGARCH_DEBUG_INST, &val)) { + brk_insn = val; +@@ -928,6 +929,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + + int kvm_arch_destroy_vcpu(CPUState *cs) + { ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ qemu_del_vm_change_state_handler(cpu->vmsentry); + return 0; + } + +-- +2.41.0.windows.1 + diff --git a/target-loongarch-Fix-tlb-huge-page-loading-issue.patch b/target-loongarch-Fix-tlb-huge-page-loading-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..467e6d745444102b0eefc14e2d06c01bba6115b2 --- /dev/null +++ b/target-loongarch-Fix-tlb-huge-page-loading-issue.patch @@ -0,0 +1,208 @@ +From c5938b5f858ee8904893e08999df1af1ae13b063 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 18 Mar 2024 15:03:32 +0800 +Subject: [PATCH] target/loongarch: Fix tlb huge page loading issue + +When we use qemu tcg simulation, the page size of bios is 4KB. +When using the level 2 super huge page (page size is 1G) to create the page table, +it is found that the content of the corresponding address space is abnormal, +resulting in the bios can not start the operating system and graphical interface normally. + +The lddir and ldpte instruction emulation has +a problem with the use of super huge page processing above level 2. +The page size is not correctly calculated, +resulting in the wrong page size of the table entry found by tlb. + +Signed-off-by: Xianglai Li +Reviewed-by: Richard Henderson +Signed-off-by: Song Gao +Message-Id: <20240318070332.1273939-1-lixianglai@loongson.cn> +--- + target/loongarch/cpu-csr.h | 3 + + target/loongarch/internals.h | 5 -- + target/loongarch/tcg/tlb_helper.c | 113 +++++++++++++++++++++--------- + 3 files changed, 82 insertions(+), 39 deletions(-) + +diff --git a/target/loongarch/cpu-csr.h b/target/loongarch/cpu-csr.h +index c59d7a9fcb..0834e91f30 100644 +--- a/target/loongarch/cpu-csr.h ++++ b/target/loongarch/cpu-csr.h +@@ -67,6 +67,9 @@ FIELD(TLBENTRY, D, 1, 1) + FIELD(TLBENTRY, PLV, 2, 2) + FIELD(TLBENTRY, MAT, 4, 2) + FIELD(TLBENTRY, G, 6, 1) ++FIELD(TLBENTRY, HUGE, 6, 1) ++FIELD(TLBENTRY, HGLOBAL, 12, 1) ++FIELD(TLBENTRY, LEVEL, 13, 2) + FIELD(TLBENTRY_32, PPN, 8, 24) + FIELD(TLBENTRY_64, PPN, 12, 36) + FIELD(TLBENTRY_64, NR, 61, 1) +diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h +index a2fc54c8a7..944153b180 100644 +--- a/target/loongarch/internals.h ++++ b/target/loongarch/internals.h +@@ -16,11 +16,6 @@ + #define TARGET_PHYS_MASK MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS) + #define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS) + +-/* Global bit used for lddir/ldpte */ +-#define LOONGARCH_PAGE_HUGE_SHIFT 6 +-/* Global bit for huge page */ +-#define LOONGARCH_HGLOBAL_SHIFT 12 +- + void loongarch_translate_init(void); + + void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags); +diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c +index 804ab7a263..eedd1ac376 100644 +--- a/target/loongarch/tcg/tlb_helper.c ++++ b/target/loongarch/tcg/tlb_helper.c +@@ -17,6 +17,34 @@ + #include "exec/log.h" + #include "cpu-csr.h" + ++static void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, ++ uint64_t *dir_width, target_ulong level) ++{ ++ switch (level) { ++ case 1: ++ *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); ++ break; ++ case 2: ++ *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); ++ break; ++ case 3: ++ *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); ++ break; ++ case 4: ++ *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); ++ break; ++ default: ++ /* level may be zero for ldpte */ ++ *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); ++ break; ++ } ++} ++ + static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, + MMUAccessType access_type, int tlb_error) + { +@@ -486,7 +514,25 @@ target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, + target_ulong badvaddr, index, phys, ret; + int shift; + uint64_t dir_base, dir_width; +- bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; ++ ++ if (unlikely((level == 0) || (level > 4))) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Attepted LDDIR with level %"PRId64"\n", level); ++ return base; ++ } ++ ++ if (FIELD_EX64(base, TLBENTRY, HUGE)) { ++ if (unlikely(level == 4)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Attempted use of level 4 huge page\n"); ++ } ++ ++ if (FIELD_EX64(base, TLBENTRY, LEVEL)) { ++ return base; ++ } else { ++ return FIELD_DP64(base, TLBENTRY, LEVEL, level); ++ } ++ } + + badvaddr = env->CSR_TLBRBADV; + base = base & TARGET_PHYS_MASK; +@@ -495,30 +541,7 @@ target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, + shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH); + shift = (shift + 1) * 3; + +- if (huge) { +- return base; +- } +- switch (level) { +- case 1: +- dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); +- break; +- case 2: +- dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); +- break; +- case 3: +- dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); +- break; +- case 4: +- dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); +- break; +- default: +- do_raise_exception(env, EXCCODE_INE, GETPC()); +- return 0; +- } ++ get_dir_base_width(env, &dir_base, &dir_width, level); + index = (badvaddr >> dir_base) & ((1 << dir_width) - 1); + phys = base | index << shift; + ret = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK; +@@ -531,20 +554,42 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, + CPUState *cs = env_cpu(env); + target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv; + int shift; +- bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; + uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); + uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); ++ uint64_t dir_base, dir_width; + ++ /* ++ * The parameter "base" has only two types, ++ * one is the page table base address, ++ * whose bit 6 should be 0, ++ * and the other is the huge page entry, ++ * whose bit 6 should be 1. ++ */ + base = base & TARGET_PHYS_MASK; ++ if (FIELD_EX64(base, TLBENTRY, HUGE)) { ++ /* ++ * Gets the huge page level and Gets huge page size. ++ * Clears the huge page level information in the entry. ++ * Clears huge page bit. ++ * Move HGLOBAL bit to GLOBAL bit. ++ */ ++ get_dir_base_width(env, &dir_base, &dir_width, ++ FIELD_EX64(base, TLBENTRY, LEVEL)); ++ ++ base = FIELD_DP64(base, TLBENTRY, LEVEL, 0); ++ base = FIELD_DP64(base, TLBENTRY, HUGE, 0); ++ if (FIELD_EX64(base, TLBENTRY, HGLOBAL)) { ++ base = FIELD_DP64(base, TLBENTRY, HGLOBAL, 0); ++ base = FIELD_DP64(base, TLBENTRY, G, 1); ++ } + +- if (huge) { +- /* Huge Page. base is paddr */ +- tmp0 = base ^ (1 << LOONGARCH_PAGE_HUGE_SHIFT); +- /* Move Global bit */ +- tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >> +- LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT | +- (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT))); +- ps = ptbase + ptwidth - 1; ++ ps = dir_base + dir_width - 1; ++ /* ++ * Huge pages are evenly split into parity pages ++ * when loaded into the tlb, ++ * so the tlb page size needs to be divided by 2. ++ */ ++ tmp0 = base; + if (odd) { + tmp0 += MAKE_64BIT_MASK(ps, 1); + } +-- +2.33.0 + diff --git a/target-loongarch-Implement-kvm-get-set-registers.patch b/target-loongarch-Implement-kvm-get-set-registers.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ccab152351b42a20afac08a00679adacc6b788f --- /dev/null +++ b/target-loongarch-Implement-kvm-get-set-registers.patch @@ -0,0 +1,724 @@ +From 0884653d8583aaaa5585caf38246518439bcfdfd Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:59 +0800 +Subject: [PATCH] target/loongarch: Implement kvm get/set registers + +Implement kvm_arch_get/set_registers interfaces, many regs +can be get/set in the function, such as core regs, csr regs, +fpu regs, mp state, etc. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Change-Id: Ia8fc48fe08b1768853f7729e77d37cdf270031e4 +Message-Id: <20240105075804.1228596-5-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + meson.build | 1 + + target/loongarch/cpu.c | 3 + + target/loongarch/cpu.h | 1 + + target/loongarch/internals.h | 5 +- + target/loongarch/kvm/kvm.c | 580 +++++++++++++++++++++++++++++++++- + target/loongarch/trace-events | 11 + + target/loongarch/trace.h | 1 + + 7 files changed, 599 insertions(+), 3 deletions(-) + create mode 100644 target/loongarch/trace-events + create mode 100644 target/loongarch/trace.h + +diff --git a/meson.build b/meson.build +index 6c77d9687d..445f2b7c2b 100644 +--- a/meson.build ++++ b/meson.build +@@ -3358,6 +3358,7 @@ if have_system or have_user + 'target/hppa', + 'target/i386', + 'target/i386/kvm', ++ 'target/loongarch', + 'target/mips/tcg', + 'target/nios2', + 'target/ppc', +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 021592798a..275833eec8 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -553,6 +553,9 @@ static void loongarch_cpu_reset_hold(Object *obj) + #ifndef CONFIG_USER_ONLY + env->pc = 0x1c000000; + memset(env->tlb, 0, sizeof(env->tlb)); ++ if (kvm_enabled()) { ++ kvm_arch_reset_vcpu(env); ++ } + #endif + + restore_fp_status(env); +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index f6d5ef0852..f4a89bd626 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -360,6 +360,7 @@ typedef struct CPUArchState { + MemoryRegion iocsr_mem; + bool load_elf; + uint64_t elf_address; ++ uint32_t mp_state; + /* Store ipistate to access from this struct */ + DeviceState *ipistate; + #endif +diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h +index c492863cc5..0beb034748 100644 +--- a/target/loongarch/internals.h ++++ b/target/loongarch/internals.h +@@ -31,8 +31,10 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, + + const char *loongarch_exception_name(int32_t exception); + ++#ifdef CONFIG_TCG + int ieee_ex_to_loongarch(int xcpt); + void restore_fp_status(CPULoongArchState *env); ++#endif + + #ifndef CONFIG_USER_ONLY + extern const VMStateDescription vmstate_loongarch_cpu; +@@ -44,12 +46,13 @@ uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu); + uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu); + void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, + uint64_t value); +- ++#ifdef CONFIG_TCG + bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + + hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); ++#endif + #endif /* !CONFIG_USER_ONLY */ + + uint64_t read_fcc(CPULoongArchState *env); +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 0d67322fd9..e7c9ef830c 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -26,19 +26,595 @@ + #include "sysemu/runstate.h" + #include "cpu-csr.h" + #include "kvm_loongarch.h" ++#include "trace.h" + + static bool cap_has_mp_state; + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO + }; + ++static int kvm_loongarch_get_regs_core(CPUState *cs) ++{ ++ int ret = 0; ++ int i; ++ struct kvm_regs regs; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ /* Get the current register set as KVM seems it */ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); ++ if (ret < 0) { ++ trace_kvm_failed_get_regs_core(strerror(errno)); ++ return ret; ++ } ++ /* gpr[0] value is always 0 */ ++ env->gpr[0] = 0; ++ for (i = 1; i < 32; i++) { ++ env->gpr[i] = regs.gpr[i]; ++ } ++ ++ env->pc = regs.pc; ++ return ret; ++} ++ ++static int kvm_loongarch_put_regs_core(CPUState *cs) ++{ ++ int ret = 0; ++ int i; ++ struct kvm_regs regs; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ /* Set the registers based on QEMU's view of things */ ++ for (i = 0; i < 32; i++) { ++ regs.gpr[i] = env->gpr[i]; ++ } ++ ++ regs.pc = env->pc; ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); ++ if (ret < 0) { ++ trace_kvm_failed_put_regs_core(strerror(errno)); ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_get_csr(CPUState *cs) ++{ ++ int ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CRMD), ++ &env->CSR_CRMD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRMD), ++ &env->CSR_PRMD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EUEN), ++ &env->CSR_EUEN); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_MISC), ++ &env->CSR_MISC); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ECFG), ++ &env->CSR_ECFG); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ESTAT), ++ &env->CSR_ESTAT); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ERA), ++ &env->CSR_ERA); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADV), ++ &env->CSR_BADV); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADI), ++ &env->CSR_BADI); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EENTRY), ++ &env->CSR_EENTRY); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBIDX), ++ &env->CSR_TLBIDX); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBEHI), ++ &env->CSR_TLBEHI); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO0), ++ &env->CSR_TLBELO0); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO1), ++ &env->CSR_TLBELO1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ASID), ++ &env->CSR_ASID); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDL), ++ &env->CSR_PGDL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDH), ++ &env->CSR_PGDH); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGD), ++ &env->CSR_PGD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCL), ++ &env->CSR_PWCL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCH), ++ &env->CSR_PWCH); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_STLBPS), ++ &env->CSR_STLBPS); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_RVACFG), ++ &env->CSR_RVACFG); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), ++ &env->CSR_CPUID); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG1), ++ &env->CSR_PRCFG1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG2), ++ &env->CSR_PRCFG2); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG3), ++ &env->CSR_PRCFG3); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(0)), ++ &env->CSR_SAVE[0]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(1)), ++ &env->CSR_SAVE[1]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(2)), ++ &env->CSR_SAVE[2]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(3)), ++ &env->CSR_SAVE[3]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(4)), ++ &env->CSR_SAVE[4]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(5)), ++ &env->CSR_SAVE[5]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(6)), ++ &env->CSR_SAVE[6]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(7)), ++ &env->CSR_SAVE[7]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TID), ++ &env->CSR_TID); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CNTC), ++ &env->CSR_CNTC); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TICLR), ++ &env->CSR_TICLR); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_LLBCTL), ++ &env->CSR_LLBCTL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL1), ++ &env->CSR_IMPCTL1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL2), ++ &env->CSR_IMPCTL2); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRENTRY), ++ &env->CSR_TLBRENTRY); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRBADV), ++ &env->CSR_TLBRBADV); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRERA), ++ &env->CSR_TLBRERA); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRSAVE), ++ &env->CSR_TLBRSAVE); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO0), ++ &env->CSR_TLBRELO0); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO1), ++ &env->CSR_TLBRELO1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBREHI), ++ &env->CSR_TLBREHI); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRPRMD), ++ &env->CSR_TLBRPRMD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(0)), ++ &env->CSR_DMW[0]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(1)), ++ &env->CSR_DMW[1]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(2)), ++ &env->CSR_DMW[2]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(3)), ++ &env->CSR_DMW[3]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TVAL), ++ &env->CSR_TVAL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TCFG), ++ &env->CSR_TCFG); ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_csr(CPUState *cs) ++{ ++ int ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CRMD), ++ &env->CSR_CRMD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRMD), ++ &env->CSR_PRMD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EUEN), ++ &env->CSR_EUEN); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_MISC), ++ &env->CSR_MISC); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ECFG), ++ &env->CSR_ECFG); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ESTAT), ++ &env->CSR_ESTAT); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ERA), ++ &env->CSR_ERA); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADV), ++ &env->CSR_BADV); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADI), ++ &env->CSR_BADI); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EENTRY), ++ &env->CSR_EENTRY); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBIDX), ++ &env->CSR_TLBIDX); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBEHI), ++ &env->CSR_TLBEHI); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO0), ++ &env->CSR_TLBELO0); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO1), ++ &env->CSR_TLBELO1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ASID), ++ &env->CSR_ASID); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDL), ++ &env->CSR_PGDL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDH), ++ &env->CSR_PGDH); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGD), ++ &env->CSR_PGD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCL), ++ &env->CSR_PWCL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCH), ++ &env->CSR_PWCH); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_STLBPS), ++ &env->CSR_STLBPS); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_RVACFG), ++ &env->CSR_RVACFG); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), ++ &env->CSR_CPUID); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG1), ++ &env->CSR_PRCFG1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG2), ++ &env->CSR_PRCFG2); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG3), ++ &env->CSR_PRCFG3); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(0)), ++ &env->CSR_SAVE[0]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(1)), ++ &env->CSR_SAVE[1]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(2)), ++ &env->CSR_SAVE[2]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(3)), ++ &env->CSR_SAVE[3]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(4)), ++ &env->CSR_SAVE[4]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(5)), ++ &env->CSR_SAVE[5]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(6)), ++ &env->CSR_SAVE[6]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(7)), ++ &env->CSR_SAVE[7]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TID), ++ &env->CSR_TID); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CNTC), ++ &env->CSR_CNTC); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TICLR), ++ &env->CSR_TICLR); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_LLBCTL), ++ &env->CSR_LLBCTL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL1), ++ &env->CSR_IMPCTL1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL2), ++ &env->CSR_IMPCTL2); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRENTRY), ++ &env->CSR_TLBRENTRY); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRBADV), ++ &env->CSR_TLBRBADV); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRERA), ++ &env->CSR_TLBRERA); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRSAVE), ++ &env->CSR_TLBRSAVE); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO0), ++ &env->CSR_TLBRELO0); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO1), ++ &env->CSR_TLBRELO1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBREHI), ++ &env->CSR_TLBREHI); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRPRMD), ++ &env->CSR_TLBRPRMD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(0)), ++ &env->CSR_DMW[0]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(1)), ++ &env->CSR_DMW[1]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(2)), ++ &env->CSR_DMW[2]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(3)), ++ &env->CSR_DMW[3]); ++ /* ++ * timer cfg must be put at last since it is used to enable ++ * guest timer ++ */ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TVAL), ++ &env->CSR_TVAL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TCFG), ++ &env->CSR_TCFG); ++ return ret; ++} ++ ++static int kvm_loongarch_get_regs_fp(CPUState *cs) ++{ ++ int ret, i; ++ struct kvm_fpu fpu; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_FPU, &fpu); ++ if (ret < 0) { ++ trace_kvm_failed_get_fpu(strerror(errno)); ++ return ret; ++ } ++ ++ env->fcsr0 = fpu.fcsr; ++ for (i = 0; i < 32; i++) { ++ env->fpr[i].vreg.UD[0] = fpu.fpr[i].val64[0]; ++ } ++ for (i = 0; i < 8; i++) { ++ env->cf[i] = fpu.fcc & 0xFF; ++ fpu.fcc = fpu.fcc >> 8; ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_regs_fp(CPUState *cs) ++{ ++ int ret, i; ++ struct kvm_fpu fpu; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ fpu.fcsr = env->fcsr0; ++ fpu.fcc = 0; ++ for (i = 0; i < 32; i++) { ++ fpu.fpr[i].val64[0] = env->fpr[i].vreg.UD[0]; ++ } ++ ++ for (i = 0; i < 8; i++) { ++ fpu.fcc |= env->cf[i] << (8 * i); ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_FPU, &fpu); ++ if (ret < 0) { ++ trace_kvm_failed_put_fpu(strerror(errno)); ++ } ++ ++ return ret; ++} ++ ++void kvm_arch_reset_vcpu(CPULoongArchState *env) ++{ ++ env->mp_state = KVM_MP_STATE_RUNNABLE; ++} ++ ++static int kvm_loongarch_get_mpstate(CPUState *cs) ++{ ++ int ret = 0; ++ struct kvm_mp_state mp_state; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ if (cap_has_mp_state) { ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state); ++ if (ret) { ++ trace_kvm_failed_get_mpstate(strerror(errno)); ++ return ret; ++ } ++ env->mp_state = mp_state.mp_state; ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_mpstate(CPUState *cs) ++{ ++ int ret = 0; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ struct kvm_mp_state mp_state = { ++ .mp_state = env->mp_state ++ }; ++ ++ if (cap_has_mp_state) { ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_MP_STATE, &mp_state); ++ if (ret) { ++ trace_kvm_failed_put_mpstate(strerror(errno)); ++ } ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_get_cpucfg(CPUState *cs) ++{ ++ int i, ret = 0; ++ uint64_t val; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ for (i = 0; i < 21; i++) { ++ ret = kvm_get_one_reg(cs, KVM_IOC_CPUCFG(i), &val); ++ if (ret < 0) { ++ trace_kvm_failed_get_cpucfg(strerror(errno)); ++ } ++ env->cpucfg[i] = (uint32_t)val; ++ } ++ return ret; ++} ++ ++static int kvm_loongarch_put_cpucfg(CPUState *cs) ++{ ++ int i, ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ uint64_t val; ++ ++ for (i = 0; i < 21; i++) { ++ val = env->cpucfg[i]; ++ /* LSX and LASX and LBT are not supported in kvm now */ ++ if (i == 2) { ++ val &= ~(BIT(R_CPUCFG2_LSX_SHIFT) | BIT(R_CPUCFG2_LASX_SHIFT)); ++ val &= ~(BIT(R_CPUCFG2_LBT_X86_SHIFT) | ++ BIT(R_CPUCFG2_LBT_ARM_SHIFT) | ++ BIT(R_CPUCFG2_LBT_MIPS_SHIFT)); ++ } ++ ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), &val); ++ if (ret < 0) { ++ trace_kvm_failed_put_cpucfg(strerror(errno)); ++ } ++ } ++ return ret; ++} ++ + int kvm_arch_get_registers(CPUState *cs) + { +- return 0; ++ int ret; ++ ++ ret = kvm_loongarch_get_regs_core(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_csr(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_regs_fp(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_mpstate(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_cpucfg(cs); ++ return ret; + } ++ + int kvm_arch_put_registers(CPUState *cs, int level) + { +- return 0; ++ int ret; ++ ++ ret = kvm_loongarch_put_regs_core(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_csr(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_regs_fp(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_mpstate(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_cpucfg(cs); ++ return ret; + } + + int kvm_arch_init_vcpu(CPUState *cs) +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +new file mode 100644 +index 0000000000..6827ab566a +--- /dev/null ++++ b/target/loongarch/trace-events +@@ -0,0 +1,11 @@ ++# See docs/devel/tracing.rst for syntax documentation. ++ ++#kvm.c ++kvm_failed_get_regs_core(const char *msg) "Failed to get core regs from KVM: %s" ++kvm_failed_put_regs_core(const char *msg) "Failed to put core regs into KVM: %s" ++kvm_failed_get_fpu(const char *msg) "Failed to get fpu from KVM: %s" ++kvm_failed_put_fpu(const char *msg) "Failed to put fpu into KVM: %s" ++kvm_failed_get_mpstate(const char *msg) "Failed to get mp_state from KVM: %s" ++kvm_failed_put_mpstate(const char *msg) "Failed to put mp_state into KVM: %s" ++kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" ++kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" +diff --git a/target/loongarch/trace.h b/target/loongarch/trace.h +new file mode 100644 +index 0000000000..c2ecb78f08 +--- /dev/null ++++ b/target/loongarch/trace.h +@@ -0,0 +1 @@ ++#include "trace/trace-target_loongarch.h" +-- +2.27.0 + diff --git a/target-loongarch-Implement-kvm_arch_handle_exit.patch b/target-loongarch-Implement-kvm_arch_handle_exit.patch new file mode 100644 index 0000000000000000000000000000000000000000..235e2292258903ce03f0a1c97275f237f6a64285 --- /dev/null +++ b/target-loongarch-Implement-kvm_arch_handle_exit.patch @@ -0,0 +1,68 @@ +From 3feeca228b010716aacdf7159df10ea63f7e34cd Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:02 +0800 +Subject: [PATCH] target/loongarch: Implement kvm_arch_handle_exit + +Implement kvm_arch_handle_exit for loongarch. In this +function, the KVM_EXIT_LOONGARCH_IOCSR is handled, +we read or write the iocsr address space by the addr, +length and is_write argument in kvm_run. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-8-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 24 +++++++++++++++++++++++- + target/loongarch/trace-events | 1 + + 2 files changed, 24 insertions(+), 1 deletion(-) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 85e7aeb083..d2dab3fef4 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -723,7 +723,29 @@ bool kvm_arch_cpu_check_are_resettable(void) + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { +- return 0; ++ int ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ MemTxAttrs attrs = {}; ++ ++ attrs.requester_id = env_cpu(env)->cpu_index; ++ ++ trace_kvm_arch_handle_exit(run->exit_reason); ++ switch (run->exit_reason) { ++ case KVM_EXIT_LOONGARCH_IOCSR: ++ address_space_rw(&env->address_space_iocsr, ++ run->iocsr_io.phys_addr, ++ attrs, ++ run->iocsr_io.data, ++ run->iocsr_io.len, ++ run->iocsr_io.is_write); ++ break; ++ default: ++ ret = -1; ++ warn_report("KVM: unknown exit reason %d", run->exit_reason); ++ break; ++ } ++ return ret; + } + + void kvm_arch_accel_class_init(ObjectClass *oc) +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +index 937c3c7c0c..021839880e 100644 +--- a/target/loongarch/trace-events ++++ b/target/loongarch/trace-events +@@ -11,3 +11,4 @@ kvm_failed_get_counter(const char *msg) "Failed to get counter from KVM: %s" + kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s" + kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" + kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" ++kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d" +-- +2.27.0 + diff --git a/target-loongarch-Implement-kvm_arch_init-function.patch b/target-loongarch-Implement-kvm_arch_init-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..eb972b1072a9e0d137795b3862ffad620c11cafd --- /dev/null +++ b/target-loongarch-Implement-kvm_arch_init-function.patch @@ -0,0 +1,33 @@ +From 3a87dbd5e0343ee777bac0f18888a5a2d51254db Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:00 +0800 +Subject: [PATCH] target/loongarch: Implement kvm_arch_init function + +Implement the kvm_arch_init of loongarch, in the function, the +KVM_CAP_MP_STATE cap is checked by kvm ioctl. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-6-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index e7c9ef830c..29944b9ef8 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -665,6 +665,7 @@ int kvm_arch_get_default_type(MachineState *ms) + + int kvm_arch_init(MachineState *ms, KVMState *s) + { ++ cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); + return 0; + } + +-- +2.27.0 + diff --git a/target-loongarch-Implement-kvm_arch_init_vcpu.patch b/target-loongarch-Implement-kvm_arch_init_vcpu.patch new file mode 100644 index 0000000000000000000000000000000000000000..7c80b63a38b382412df85d4e70c557d6f26b8d72 --- /dev/null +++ b/target-loongarch-Implement-kvm_arch_init_vcpu.patch @@ -0,0 +1,87 @@ +From d7d47c044c9854675b75b91ade678d03316d9271 Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:01 +0800 +Subject: [PATCH] target/loongarch: Implement kvm_arch_init_vcpu + +Implement kvm_arch_init_vcpu interface for loongarch, +in this function, we register VM change state handler. +And when VM state changes to running, the counter value +should be put into kvm to keep consistent with kvm, +and when state change to stop, counter value should be +refreshed from kvm. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-7-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.h | 2 ++ + target/loongarch/kvm/kvm.c | 23 +++++++++++++++++++++++ + target/loongarch/trace-events | 2 ++ + 3 files changed, 27 insertions(+) + +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index f4a89bd626..8ebd6fa1a7 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -381,6 +381,8 @@ struct ArchCPU { + + /* 'compatible' string for this CPU for Linux device trees */ + const char *dtb_compatible; ++ /* used by KVM_REG_LOONGARCH_COUNTER ioctl to access guest time counters */ ++ uint64_t kvm_state_counter; + }; + + /** +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 29944b9ef8..85e7aeb083 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -617,8 +617,31 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + ++static void kvm_loongarch_vm_stage_change(void *opaque, bool running, ++ RunState state) ++{ ++ int ret; ++ CPUState *cs = opaque; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ ++ if (running) { ++ ret = kvm_set_one_reg(cs, KVM_REG_LOONGARCH_COUNTER, ++ &cpu->kvm_state_counter); ++ if (ret < 0) { ++ trace_kvm_failed_put_counter(strerror(errno)); ++ } ++ } else { ++ ret = kvm_get_one_reg(cs, KVM_REG_LOONGARCH_COUNTER, ++ &cpu->kvm_state_counter); ++ if (ret < 0) { ++ trace_kvm_failed_get_counter(strerror(errno)); ++ } ++ } ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { ++ qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); + return 0; + } + +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +index 6827ab566a..937c3c7c0c 100644 +--- a/target/loongarch/trace-events ++++ b/target/loongarch/trace-events +@@ -7,5 +7,7 @@ kvm_failed_get_fpu(const char *msg) "Failed to get fpu from KVM: %s" + kvm_failed_put_fpu(const char *msg) "Failed to put fpu into KVM: %s" + kvm_failed_get_mpstate(const char *msg) "Failed to get mp_state from KVM: %s" + kvm_failed_put_mpstate(const char *msg) "Failed to put mp_state into KVM: %s" ++kvm_failed_get_counter(const char *msg) "Failed to get counter from KVM: %s" ++kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s" + kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" + kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" +-- +2.27.0 + diff --git a/target-loongarch-Implement-lbt-registers-save-restor.patch b/target-loongarch-Implement-lbt-registers-save-restor.patch new file mode 100644 index 0000000000000000000000000000000000000000..e88f971112cff0cd80630c6204348a0923d0ca67 --- /dev/null +++ b/target-loongarch-Implement-lbt-registers-save-restor.patch @@ -0,0 +1,191 @@ +From a7b08284143f7ace3635036bf0366cbec4d52c99 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Sun, 29 Sep 2024 15:04:05 +0800 +Subject: [PATCH 58/78] target/loongarch: Implement lbt registers save/restore + function + +Six registers scr0 - scr3, eflags and ftop are added in percpu vmstate. +And two functions kvm_loongarch_get_lbt/kvm_loongarch_put_lbt are added +to save/restore lbt registers. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240929070405.235200-3-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.h | 13 ++++++++ + target/loongarch/kvm/kvm.c | 62 ++++++++++++++++++++++++++++++++++++++ + target/loongarch/machine.c | 24 +++++++++++++++ + 3 files changed, 99 insertions(+) + +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 3e2bcbf608..2f8c5cf2dd 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -18,6 +18,7 @@ + #endif + #include "cpu-csr.h" + #include "cpu-qom.h" ++#include "qapi/qapi-types-common.h" + + #define IOCSRF_TEMP 0 + #define IOCSRF_NODECNT 1 +@@ -290,6 +291,17 @@ enum loongarch_features { + LOONGARCH_FEATURE_LBT, /* loongson binary translation extension */ + }; + ++typedef struct LoongArchBT { ++ /* scratch registers */ ++ uint64_t scr0; ++ uint64_t scr1; ++ uint64_t scr2; ++ uint64_t scr3; ++ /* loongarch eflags */ ++ uint32_t eflags; ++ uint32_t ftop; ++} lbt_t; ++ + typedef struct CPUArchState { + uint64_t gpr[32]; + uint64_t pc; +@@ -297,6 +309,7 @@ typedef struct CPUArchState { + fpr_t fpr[32]; + bool cf[8]; + uint32_t fcsr0; ++ lbt_t lbt; + + uint32_t cpucfg[21]; + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 567404bdb5..118f66f742 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -486,6 +486,58 @@ static int kvm_loongarch_put_regs_fp(CPUState *cs) + return ret; + } + ++static int kvm_loongarch_put_lbt(CPUState *cs) ++{ ++ CPULoongArchState *env = cpu_env(cs); ++ uint64_t val; ++ int ret; ++ ++ /* check whether vm support LBT firstly */ ++ if (FIELD_EX32(env->cpucfg[2], CPUCFG2, LBT_ALL) != 7) { ++ return 0; ++ } ++ ++ /* set six LBT registers including scr0-scr3, eflags, ftop */ ++ ret = kvm_set_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR0, &env->lbt.scr0); ++ ret |= kvm_set_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR1, &env->lbt.scr1); ++ ret |= kvm_set_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR2, &env->lbt.scr2); ++ ret |= kvm_set_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR3, &env->lbt.scr3); ++ /* ++ * Be cautious, KVM_REG_LOONGARCH_LBT_FTOP is defined as 64-bit however ++ * lbt.ftop is 32-bit; the same with KVM_REG_LOONGARCH_LBT_EFLAGS register ++ */ ++ val = env->lbt.eflags; ++ ret |= kvm_set_one_reg(cs, KVM_REG_LOONGARCH_LBT_EFLAGS, &val); ++ val = env->lbt.ftop; ++ ret |= kvm_set_one_reg(cs, KVM_REG_LOONGARCH_LBT_FTOP, &val); ++ ++ return ret; ++} ++ ++static int kvm_loongarch_get_lbt(CPUState *cs) ++{ ++ CPULoongArchState *env = cpu_env(cs); ++ uint64_t val; ++ int ret; ++ ++ /* check whether vm support LBT firstly */ ++ if (FIELD_EX32(env->cpucfg[2], CPUCFG2, LBT_ALL) != 7) { ++ return 0; ++ } ++ ++ /* get six LBT registers including scr0-scr3, eflags, ftop */ ++ ret = kvm_get_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR0, &env->lbt.scr0); ++ ret |= kvm_get_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR1, &env->lbt.scr1); ++ ret |= kvm_get_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR2, &env->lbt.scr2); ++ ret |= kvm_get_one_reg(cs, KVM_REG_LOONGARCH_LBT_SCR3, &env->lbt.scr3); ++ ret |= kvm_get_one_reg(cs, KVM_REG_LOONGARCH_LBT_EFLAGS, &val); ++ env->lbt.eflags = (uint32_t)val; ++ ret |= kvm_get_one_reg(cs, KVM_REG_LOONGARCH_LBT_FTOP, &val); ++ env->lbt.ftop = (uint32_t)val; ++ ++ return ret; ++} ++ + void kvm_arch_reset_vcpu(CPUState *cs) + { + CPULoongArchState *env = cpu_env(cs); +@@ -733,6 +785,11 @@ int kvm_arch_get_registers(CPUState *cs) + return ret; + } + ++ ret = kvm_loongarch_get_lbt(cs); ++ if (ret) { ++ return ret; ++ } ++ + ret = kvm_loongarch_get_mpstate(cs); + return ret; + } +@@ -761,6 +818,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + ++ ret = kvm_loongarch_put_lbt(cs); ++ if (ret) { ++ return ret; ++ } ++ + ret = kvm_loongarch_put_mpstate(cs); + return ret; + } +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index 97e1152ffd..5d62aabd51 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -130,6 +130,29 @@ static int cpu_pre_save(void *opaque) + return 0; + } + ++static bool lbt_needed(void *opaque) ++{ ++ LoongArchCPU *cpu = opaque; ++ ++ return !!FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, LBT_ALL); ++} ++ ++static const VMStateDescription vmstate_lbt = { ++ .name = "cpu/lbt", ++ .version_id = 0, ++ .minimum_version_id = 0, ++ .needed = lbt_needed, ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT64(env.lbt.scr0, LoongArchCPU), ++ VMSTATE_UINT64(env.lbt.scr1, LoongArchCPU), ++ VMSTATE_UINT64(env.lbt.scr2, LoongArchCPU), ++ VMSTATE_UINT64(env.lbt.scr3, LoongArchCPU), ++ VMSTATE_UINT32(env.lbt.eflags, LoongArchCPU), ++ VMSTATE_UINT32(env.lbt.ftop, LoongArchCPU), ++ VMSTATE_END_OF_LIST() ++ }, ++}; ++ + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) + static bool tlb_needed(void *opaque) + { +@@ -244,6 +267,7 @@ const VMStateDescription vmstate_loongarch_cpu = { + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) + &vmstate_tlb, + #endif ++ &vmstate_lbt, + NULL + } + }; +-- +2.39.1 + diff --git a/target-loongarch-Implement-set-vcpu-intr-for-kvm.patch b/target-loongarch-Implement-set-vcpu-intr-for-kvm.patch new file mode 100644 index 0000000000000000000000000000000000000000..0fbad2aab0c381829cd5589add8e73cdbb364387 --- /dev/null +++ b/target-loongarch-Implement-set-vcpu-intr-for-kvm.patch @@ -0,0 +1,122 @@ +From 5f4c8b31db442e6ac39fbfe4b29d5479ab3567aa Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Wed, 10 Jan 2024 10:41:52 +0100 +Subject: [PATCH] target/loongarch: Implement set vcpu intr for kvm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Implement loongarch kvm set vcpu interrupt interface, +when a irq is set in vcpu, we use the KVM_INTERRUPT +ioctl to set intr into kvm. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-ID: <20240105075804.1228596-9-zhaotianrui@loongson.cn> +[PMD: Split from bigger patch, part 2] +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20240110094152.52138-2-philmd@linaro.org> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 9 ++++++++- + target/loongarch/kvm/kvm.c | 15 +++++++++++++++ + target/loongarch/kvm/kvm_loongarch.h | 16 ++++++++++++++++ + target/loongarch/trace-events | 1 + + 4 files changed, 40 insertions(+), 1 deletion(-) + create mode 100644 target/loongarch/kvm/kvm_loongarch.h + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 60f2636b43..413414392b 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -12,6 +12,8 @@ + #include "qemu/module.h" + #include "sysemu/qtest.h" + #include "sysemu/tcg.h" ++#include "sysemu/kvm.h" ++#include "kvm/kvm_loongarch.h" + #include "exec/exec-all.h" + #include "cpu.h" + #include "internals.h" +@@ -19,6 +21,9 @@ + #include "cpu-csr.h" + #include "sysemu/reset.h" + #include "vec.h" ++#ifdef CONFIG_KVM ++#include ++#endif + #ifdef CONFIG_TCG + #include "exec/cpu_ldst.h" + #include "tcg/tcg.h" +@@ -111,7 +116,9 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level) + return; + } + +- if (tcg_enabled()) { ++ if (kvm_enabled()) { ++ kvm_loongarch_set_interrupt(cpu, irq, level); ++ } else if (tcg_enabled()) { + env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); + if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { + cpu_interrupt(cs, CPU_INTERRUPT_HARD); +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index d2dab3fef4..bd33ec2114 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -748,6 +748,21 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + return ret; + } + ++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level) ++{ ++ struct kvm_interrupt intr; ++ CPUState *cs = CPU(cpu); ++ ++ if (level) { ++ intr.irq = irq; ++ } else { ++ intr.irq = -irq; ++ } ++ ++ trace_kvm_set_intr(irq, level); ++ return kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &intr); ++} ++ + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h +new file mode 100644 +index 0000000000..d945b6bb82 +--- /dev/null ++++ b/target/loongarch/kvm/kvm_loongarch.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * QEMU LoongArch kvm interface ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#include "cpu.h" ++ ++#ifndef QEMU_KVM_LOONGARCH_H ++#define QEMU_KVM_LOONGARCH_H ++ ++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); ++void kvm_arch_reset_vcpu(CPULoongArchState *env); ++ ++#endif +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +index 021839880e..dea11edc0f 100644 +--- a/target/loongarch/trace-events ++++ b/target/loongarch/trace-events +@@ -12,3 +12,4 @@ kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s" + kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" + kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" + kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d" ++kvm_set_intr(int irq, int level) "kvm set interrupt, irq num: %d, level: %d" +-- +2.27.0 + diff --git a/target-loongarch-Put-cpucfg-operation-before-CSR-reg.patch b/target-loongarch-Put-cpucfg-operation-before-CSR-reg.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d2e4568a81fa9db0554724087a3d2933805edf8 --- /dev/null +++ b/target-loongarch-Put-cpucfg-operation-before-CSR-reg.patch @@ -0,0 +1,81 @@ +From 717faefc8f56490ad94ef69b42c2d2491225ace8 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Sun, 28 Apr 2024 11:16:51 +0800 +Subject: [PATCH 43/78] target/loongarch: Put cpucfg operation before CSR + register + +On Loongarch, cpucfg is register for cpu feature, some other registers +depend on cpucfg feature such as perf CSR registers. Here put cpucfg +read/write operations before CSR register, so that KVM knows how many +perf CSR registers are valid from pre-set cpucfg feature information. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240428031651.1354587-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/kvm/kvm.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 5c88270132..407d454919 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -714,22 +714,22 @@ int kvm_arch_get_registers(CPUState *cs) + return ret; + } + +- ret = kvm_loongarch_get_csr(cs); ++ ret = kvm_loongarch_get_cpucfg(cs); + if (ret) { + return ret; + } + +- ret = kvm_loongarch_get_regs_fp(cs); ++ ret = kvm_loongarch_get_csr(cs); + if (ret) { + return ret; + } + +- ret = kvm_loongarch_get_mpstate(cs); ++ ret = kvm_loongarch_get_regs_fp(cs); + if (ret) { + return ret; + } + +- ret = kvm_loongarch_get_cpucfg(cs); ++ ret = kvm_loongarch_get_mpstate(cs); + return ret; + } + +@@ -742,22 +742,22 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + +- ret = kvm_loongarch_put_csr(cs, level); ++ ret = kvm_loongarch_put_cpucfg(cs); + if (ret) { + return ret; + } + +- ret = kvm_loongarch_put_regs_fp(cs); ++ ret = kvm_loongarch_put_csr(cs, level); + if (ret) { + return ret; + } + +- ret = kvm_loongarch_put_mpstate(cs); ++ ret = kvm_loongarch_put_regs_fp(cs); + if (ret) { + return ret; + } + +- ret = kvm_loongarch_put_cpucfg(cs); ++ ret = kvm_loongarch_put_mpstate(cs); + return ret; + } + +-- +2.39.1 + diff --git a/target-loongarch-Remove-avail_64-in-trans_srai_w-and.patch b/target-loongarch-Remove-avail_64-in-trans_srai_w-and.patch new file mode 100644 index 0000000000000000000000000000000000000000..ff55f2317e28dbf985a890169fca5c9ff0f17b15 --- /dev/null +++ b/target-loongarch-Remove-avail_64-in-trans_srai_w-and.patch @@ -0,0 +1,56 @@ +From 3b3fdfa6d5439298b883e2e223fa04a2209612f5 Mon Sep 17 00:00:00 2001 +From: Feiyang Chen +Date: Fri, 28 Jun 2024 13:33:57 +1000 +Subject: [PATCH 47/78] target/loongarch: Remove avail_64 in trans_srai_w() and + simplify it + +Since srai.w is a valid instruction on la32, remove the avail_64 check +and simplify trans_srai_w(). + +Fixes: c0c0461e3a06 ("target/loongarch: Add avail_64 to check la64-only instructions") +Reviewed-by: Richard Henderson +Signed-off-by: Feiyang Chen +Message-Id: <20240628033357.50027-1-chris.chenfeiyang@gmail.com> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/tcg/insn_trans/trans_shift.c.inc | 15 +++------------ + 1 file changed, 3 insertions(+), 12 deletions(-) + +diff --git a/target/loongarch/tcg/insn_trans/trans_shift.c.inc b/target/loongarch/tcg/insn_trans/trans_shift.c.inc +index 2f4bd6ff28..377307785a 100644 +--- a/target/loongarch/tcg/insn_trans/trans_shift.c.inc ++++ b/target/loongarch/tcg/insn_trans/trans_shift.c.inc +@@ -67,19 +67,9 @@ static void gen_rotr_d(TCGv dest, TCGv src1, TCGv src2) + tcg_gen_rotr_tl(dest, src1, t0); + } + +-static bool trans_srai_w(DisasContext *ctx, arg_srai_w *a) ++static void gen_sari_w(TCGv dest, TCGv src1, target_long imm) + { +- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); +- TCGv src1 = gpr_src(ctx, a->rj, EXT_ZERO); +- +- if (!avail_64(ctx)) { +- return false; +- } +- +- tcg_gen_sextract_tl(dest, src1, a->imm, 32 - a->imm); +- gen_set_gpr(a->rd, dest, EXT_NONE); +- +- return true; ++ tcg_gen_sextract_tl(dest, src1, imm, 32 - imm); + } + + TRANS(sll_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_sll_w) +@@ -94,6 +84,7 @@ TRANS(slli_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_shli_tl) + TRANS(slli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shli_tl) + TRANS(srli_w, ALL, gen_rri_c, EXT_ZERO, EXT_SIGN, tcg_gen_shri_tl) + TRANS(srli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shri_tl) ++TRANS(srai_w, ALL, gen_rri_c, EXT_NONE, EXT_NONE, gen_sari_w) + TRANS(srai_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_sari_tl) + TRANS(rotri_w, 64, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w) + TRANS(rotri_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_rotri_tl) +-- +2.39.1 + diff --git a/target-loongarch-Restrict-TCG-specific-code.patch b/target-loongarch-Restrict-TCG-specific-code.patch new file mode 100644 index 0000000000000000000000000000000000000000..a430cf8acfc20f73f5c2eabe0fb5e81f5fa7221f --- /dev/null +++ b/target-loongarch-Restrict-TCG-specific-code.patch @@ -0,0 +1,152 @@ +From 773ea71519da1413ca2e0e60857272164e156a47 Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Wed, 10 Jan 2024 10:41:51 +0100 +Subject: [PATCH] target/loongarch: Restrict TCG-specific code +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In preparation of supporting KVM in the next commit. + +Conflict: + +diff --cc target/loongarch/cpu.c +index 275833eec8,70dd4622aa..0000000000 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@@ -17,9 -17,14 +17,17 @@@ + #include "internals.h" + #include "fpu/softfloat-helpers.h" + #include "cpu-csr.h" + -#ifndef CONFIG_USER_ONLY + #include "sysemu/reset.h" +++<<<<<<< HEAD + +#include "tcg/tcg.h" +++======= ++ #endif +++>>>>>>> target/loongarch: Restrict TCG-specific code + #include "vec.h" ++ #ifdef CONFIG_TCG ++ #include "exec/cpu_ldst.h" ++ #include "tcg/tcg.h" ++ #endif + +Solve: + +drop: +++<<<<<<< HEAD + +#include "tcg/tcg.h" +++======= ++ #endif +++>>>>>>> target/loongarch: Restrict TCG-specific code + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-ID: <20240105075804.1228596-9-zhaotianrui@loongson.cn> +[PMD: Split from bigger patch, part 1] +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20240110094152.52138-1-philmd@linaro.org> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 30 +++++++++++++++++++++--------- + 1 file changed, 21 insertions(+), 9 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 275833eec8..60f2636b43 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -11,15 +11,18 @@ + #include "qapi/error.h" + #include "qemu/module.h" + #include "sysemu/qtest.h" +-#include "exec/cpu_ldst.h" ++#include "sysemu/tcg.h" + #include "exec/exec-all.h" + #include "cpu.h" + #include "internals.h" + #include "fpu/softfloat-helpers.h" + #include "cpu-csr.h" + #include "sysemu/reset.h" +-#include "tcg/tcg.h" + #include "vec.h" ++#ifdef CONFIG_TCG ++#include "exec/cpu_ldst.h" ++#include "tcg/tcg.h" ++#endif + + const char * const regnames[32] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", +@@ -108,12 +111,13 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level) + return; + } + +- env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); +- +- if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { +- cpu_interrupt(cs, CPU_INTERRUPT_HARD); +- } else { +- cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ if (tcg_enabled()) { ++ env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); ++ if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { ++ cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ } else { ++ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ } + } + } + +@@ -138,7 +142,10 @@ static inline bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) + + return (pending & status) != 0; + } ++#endif + ++#ifdef CONFIG_TCG ++#ifndef CONFIG_USER_ONLY + static void loongarch_cpu_do_interrupt(CPUState *cs) + { + LoongArchCPU *cpu = LOONGARCH_CPU(cs); +@@ -320,7 +327,6 @@ static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + } + #endif + +-#ifdef CONFIG_TCG + static void loongarch_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) + { +@@ -558,7 +564,9 @@ static void loongarch_cpu_reset_hold(Object *obj) + } + #endif + ++#ifdef CONFIG_TCG + restore_fp_status(env); ++#endif + cs->exception_index = -1; + } + +@@ -701,8 +709,10 @@ static void loongarch_cpu_init(Object *obj) + CPULoongArchState *env = &cpu->env; + + qdev_init_gpio_in(DEVICE(cpu), loongarch_cpu_set_irq, N_IRQS); ++#ifdef CONFIG_TCG + timer_init_ns(&cpu->timer, QEMU_CLOCK_VIRTUAL, + &loongarch_constant_timer_cb, cpu); ++#endif + memory_region_init_io(&env->system_iocsr, OBJECT(cpu), NULL, + env, "iocsr", UINT64_MAX); + address_space_init(&env->address_space_iocsr, &env->system_iocsr, "IOCSR"); +@@ -802,7 +812,9 @@ static struct TCGCPUOps loongarch_tcg_ops = { + #include "hw/core/sysemu-cpu-ops.h" + + static const struct SysemuCPUOps loongarch_sysemu_ops = { ++#ifdef CONFIG_TCG + .get_phys_page_debug = loongarch_cpu_get_phys_page_debug, ++#endif + }; + + static int64_t loongarch_cpu_get_arch_id(CPUState *cs) +-- +2.27.0 + diff --git a/target-loongarch-Set-CSR_PRCFG1-and-CSR_PRCFG2-value.patch b/target-loongarch-Set-CSR_PRCFG1-and-CSR_PRCFG2-value.patch new file mode 100644 index 0000000000000000000000000000000000000000..c88286d87536b9d4a1345a86073638b3bdf612cc --- /dev/null +++ b/target-loongarch-Set-CSR_PRCFG1-and-CSR_PRCFG2-value.patch @@ -0,0 +1,55 @@ +From f677a8f2311e823a87ec70dbdbc07712d54e5a85 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 5 Jul 2024 10:18:38 +0800 +Subject: [PATCH 48/78] target/loongarch: Set CSR_PRCFG1 and CSR_PRCFG2 values + +We set the value of register CSR_PRCFG3, but left out CSR_PRCFG1 +and CSR_PRCFG2. Set CSR_PRCFG1 and CSR_PRCFG2 according to the +default values of the physical machine. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240705021839.1004374-1-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index f89740a5aa..5bb9e5656a 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -472,6 +472,18 @@ static void loongarch_la464_initfn(Object *obj) + env->cpucfg[20] = data; + + env->CSR_ASID = FIELD_DP64(0, CSR_ASID, ASIDBITS, 0xa); ++ ++ env->CSR_PRCFG1 = FIELD_DP64(env->CSR_PRCFG1, CSR_PRCFG1, SAVE_NUM, 8); ++ env->CSR_PRCFG1 = FIELD_DP64(env->CSR_PRCFG1, CSR_PRCFG1, TIMER_BITS, 0x2f); ++ env->CSR_PRCFG1 = FIELD_DP64(env->CSR_PRCFG1, CSR_PRCFG1, VSMAX, 7); ++ ++ env->CSR_PRCFG2 = 0x3ffff000; ++ ++ env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, TLB_TYPE, 2); ++ env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, MTLB_ENTRY, 63); ++ env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_WAYS, 7); ++ env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_SETS, 8); ++ + loongarch_cpu_post_init(obj); + } + +@@ -569,11 +581,6 @@ static void loongarch_cpu_reset_hold(Object *obj) + env->CSR_MERRCTL = FIELD_DP64(env->CSR_MERRCTL, CSR_MERRCTL, ISMERR, 0); + env->CSR_TID = cs->cpu_index; + +- env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, TLB_TYPE, 2); +- env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, MTLB_ENTRY, 63); +- env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_WAYS, 7); +- env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_SETS, 8); +- + for (n = 0; n < 4; n++) { + env->CSR_DMW[n] = FIELD_DP64(env->CSR_DMW[n], CSR_DMW, PLV0, 0); + env->CSR_DMW[n] = FIELD_DP64(env->CSR_DMW[n], CSR_DMW, PLV1, 0); +-- +2.39.1 + diff --git a/target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch b/target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch new file mode 100644 index 0000000000000000000000000000000000000000..9556d3117254e61fffbefcfdc494ae6298632b81 --- /dev/null +++ b/target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch @@ -0,0 +1,57 @@ +From d271f623205c2984a30cfb12e160e219b2bbe974 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 15 Jan 2024 16:51:21 +0800 +Subject: [PATCH] target/loongarch: Set cpuid CSR register only once with kvm + mode + +CSR cpuid register is used for routing irq to different vcpus, its +value is kept unchanged since poweron. So it is not necessary to +set CSR cpuid register after system resets, and it is only set at +vm creation stage. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240115085121.180524-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 84bcdf5f86..2230f029d0 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -250,7 +250,7 @@ static int kvm_loongarch_get_csr(CPUState *cs) + return ret; + } + +-static int kvm_loongarch_put_csr(CPUState *cs) ++static int kvm_loongarch_put_csr(CPUState *cs, int level) + { + int ret = 0; + LoongArchCPU *cpu = LOONGARCH_CPU(cs); +@@ -322,8 +322,11 @@ static int kvm_loongarch_put_csr(CPUState *cs) + ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_RVACFG), + &env->CSR_RVACFG); + +- ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), ++ /* CPUID is constant after poweron, it should be set only once */ ++ if (level >= KVM_PUT_FULL_STATE) { ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), + &env->CSR_CPUID); ++ } + + ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG1), + &env->CSR_PRCFG1); +@@ -598,7 +601,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + +- ret = kvm_loongarch_put_csr(cs); ++ ret = kvm_loongarch_put_csr(cs, level); + if (ret) { + return ret; + } +-- +2.27.0 + diff --git a/target-loongarch-Supplement-vcpu-env-initial-when-vc.patch b/target-loongarch-Supplement-vcpu-env-initial-when-vc.patch new file mode 100644 index 0000000000000000000000000000000000000000..e066713c788e3f93715b341486904e7db372f6ca --- /dev/null +++ b/target-loongarch-Supplement-vcpu-env-initial-when-vc.patch @@ -0,0 +1,59 @@ +From 48dae5f461bf2cde206e879d52df6cf1bad3ac6e Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:58 +0800 +Subject: [PATCH] target/loongarch: Supplement vcpu env initial when vcpu reset + +Supplement vcpu env initial when vcpu reset, including +init vcpu CSR_CPUID,CSR_TID to cpu->cpu_index. The two +regs will be used in kvm_get/set_csr_ioctl. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-4-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 2 ++ + target/loongarch/cpu.h | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index db9a421cc4..021592798a 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -531,10 +531,12 @@ static void loongarch_cpu_reset_hold(Object *obj) + + env->CSR_ESTAT = env->CSR_ESTAT & (~MAKE_64BIT_MASK(0, 2)); + env->CSR_RVACFG = FIELD_DP64(env->CSR_RVACFG, CSR_RVACFG, RBITS, 0); ++ env->CSR_CPUID = cs->cpu_index; + env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0); + env->CSR_LLBCTL = FIELD_DP64(env->CSR_LLBCTL, CSR_LLBCTL, KLO, 0); + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0); + env->CSR_MERRCTL = FIELD_DP64(env->CSR_MERRCTL, CSR_MERRCTL, ISMERR, 0); ++ env->CSR_TID = cs->cpu_index; + + env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, TLB_TYPE, 2); + env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, MTLB_ENTRY, 63); +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 00d1fba597..f6d5ef0852 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -319,6 +319,7 @@ typedef struct CPUArchState { + uint64_t CSR_PWCH; + uint64_t CSR_STLBPS; + uint64_t CSR_RVACFG; ++ uint64_t CSR_CPUID; + uint64_t CSR_PRCFG1; + uint64_t CSR_PRCFG2; + uint64_t CSR_PRCFG3; +@@ -350,7 +351,6 @@ typedef struct CPUArchState { + uint64_t CSR_DBG; + uint64_t CSR_DERA; + uint64_t CSR_DSAVE; +- uint64_t CSR_CPUID; + + #ifndef CONFIG_USER_ONLY + LoongArchTLB tlb[LOONGARCH_TLB_MAX]; +-- +2.27.0 + diff --git a/target-loongarch-Support-QMP-dump-guest-memory.patch b/target-loongarch-Support-QMP-dump-guest-memory.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7decd79eebf274da501b417c3ea7ba5448ffedd --- /dev/null +++ b/target-loongarch-Support-QMP-dump-guest-memory.patch @@ -0,0 +1,237 @@ +From 2f19b259a16985ce515727c819c3a7eb4f41e6d0 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Thu, 22 Aug 2024 14:52:45 +0800 +Subject: [PATCH 52/78] target/loongarch: Support QMP dump-guest-memory + +Add the support needed for creating prstatus elf notes. This allows +us to use QMP dump-guest-memory. + +Now ELF notes of LoongArch only supports general elf notes, LSX and +LASX is not supported, since it is mainly used to dump guest memory. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Tested-by: Song Gao +Message-Id: <20240822065245.2286214-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/arch_dump.c | 167 +++++++++++++++++++++++++++++++++++ + target/loongarch/cpu.c | 1 + + target/loongarch/internals.h | 2 + + target/loongarch/meson.build | 1 + + 4 files changed, 171 insertions(+) + create mode 100644 target/loongarch/arch_dump.c + +diff --git a/target/loongarch/arch_dump.c b/target/loongarch/arch_dump.c +new file mode 100644 +index 0000000000..4986db970e +--- /dev/null ++++ b/target/loongarch/arch_dump.c +@@ -0,0 +1,167 @@ ++/* ++ * Support for writing ELF notes for LoongArch architectures ++ * ++ * Copyright (c) 2023 Loongarch Technology ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2 or later, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program. If not, see . ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "cpu.h" ++#include "elf.h" ++#include "sysemu/dump.h" ++#include "internals.h" ++ ++/* struct user_pt_regs from arch/loongarch/include/uapi/asm/ptrace.h */ ++struct loongarch_user_regs { ++ uint64_t gpr[32]; ++ uint64_t pad1[1]; ++ /* Special CSR registers. */ ++ uint64_t csr_era; ++ uint64_t csr_badv; ++ uint64_t pad2[10]; ++} QEMU_PACKED; ++ ++QEMU_BUILD_BUG_ON(sizeof(struct loongarch_user_regs) != 360); ++ ++/* struct elf_prstatus from include/uapi/linux/elfcore.h */ ++struct loongarch_elf_prstatus { ++ char pad1[32]; /* 32 == offsetof(struct elf_prstatus, pr_pid) */ ++ uint32_t pr_pid; ++ /* ++ * 76 == offsetof(struct elf_prstatus, pr_reg) - ++ * offsetof(struct elf_prstatus, pr_ppid) ++ */ ++ char pad2[76]; ++ struct loongarch_user_regs pr_reg; ++ uint32_t pr_fpvalid; ++ char pad3[4]; ++} QEMU_PACKED; ++ ++QEMU_BUILD_BUG_ON(sizeof(struct loongarch_elf_prstatus) != 480); ++ ++/* struct user_fp_state from arch/loongarch/include/uapi/asm/ptrace.h */ ++struct loongarch_fpu_struct { ++ uint64_t fpr[32]; ++ uint64_t fcc; ++ unsigned int fcsr; ++} QEMU_PACKED; ++ ++QEMU_BUILD_BUG_ON(sizeof(struct loongarch_fpu_struct) != 268); ++ ++struct loongarch_note { ++ Elf64_Nhdr hdr; ++ char name[8]; /* align_up(sizeof("CORE"), 4) */ ++ union { ++ struct loongarch_elf_prstatus prstatus; ++ struct loongarch_fpu_struct fpu; ++ }; ++} QEMU_PACKED; ++ ++#define LOONGARCH_NOTE_HEADER_SIZE offsetof(struct loongarch_note, prstatus) ++#define LOONGARCH_PRSTATUS_NOTE_SIZE \ ++ (LOONGARCH_NOTE_HEADER_SIZE + sizeof(struct loongarch_elf_prstatus)) ++#define LOONGARCH_PRFPREG_NOTE_SIZE \ ++ (LOONGARCH_NOTE_HEADER_SIZE + sizeof(struct loongarch_fpu_struct)) ++ ++static void loongarch_note_init(struct loongarch_note *note, DumpState *s, ++ const char *name, Elf64_Word namesz, ++ Elf64_Word type, Elf64_Word descsz) ++{ ++ memset(note, 0, sizeof(*note)); ++ ++ note->hdr.n_namesz = cpu_to_dump32(s, namesz); ++ note->hdr.n_descsz = cpu_to_dump32(s, descsz); ++ note->hdr.n_type = cpu_to_dump32(s, type); ++ ++ memcpy(note->name, name, namesz); ++} ++ ++static int loongarch_write_elf64_fprpreg(WriteCoreDumpFunction f, ++ CPULoongArchState *env, int cpuid, ++ DumpState *s) ++{ ++ struct loongarch_note note; ++ int ret, i; ++ ++ loongarch_note_init(¬e, s, "CORE", 5, NT_PRFPREG, sizeof(note.fpu)); ++ note.fpu.fcsr = cpu_to_dump64(s, env->fcsr0); ++ ++ for (i = 0; i < 8; i++) { ++ note.fpu.fcc |= env->cf[i] << (8 * i); ++ } ++ note.fpu.fcc = cpu_to_dump64(s, note.fpu.fcc); ++ ++ for (i = 0; i < 32; ++i) { ++ note.fpu.fpr[i] = cpu_to_dump64(s, env->fpr[i].vreg.UD[0]); ++ } ++ ++ ret = f(¬e, LOONGARCH_PRFPREG_NOTE_SIZE, s); ++ if (ret < 0) { ++ return -1; ++ } ++ ++ return 0; ++} ++ ++int loongarch_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, ++ int cpuid, DumpState *s) ++{ ++ struct loongarch_note note; ++ CPULoongArchState *env = &LOONGARCH_CPU(cs)->env; ++ int ret, i; ++ ++ loongarch_note_init(¬e, s, "CORE", 5, NT_PRSTATUS, ++ sizeof(note.prstatus)); ++ note.prstatus.pr_pid = cpu_to_dump32(s, cpuid); ++ note.prstatus.pr_fpvalid = cpu_to_dump32(s, 1); ++ ++ for (i = 0; i < 32; ++i) { ++ note.prstatus.pr_reg.gpr[i] = cpu_to_dump64(s, env->gpr[i]); ++ } ++ note.prstatus.pr_reg.csr_era = cpu_to_dump64(s, env->CSR_ERA); ++ note.prstatus.pr_reg.csr_badv = cpu_to_dump64(s, env->CSR_BADV); ++ ret = f(¬e, LOONGARCH_PRSTATUS_NOTE_SIZE, s); ++ if (ret < 0) { ++ return -1; ++ } ++ ++ ret = loongarch_write_elf64_fprpreg(f, env, cpuid, s); ++ if (ret < 0) { ++ return -1; ++ } ++ ++ return ret; ++} ++ ++int cpu_get_dump_info(ArchDumpInfo *info, ++ const GuestPhysBlockList *guest_phys_blocks) ++{ ++ info->d_machine = EM_LOONGARCH; ++ info->d_endian = ELFDATA2LSB; ++ info->d_class = ELFCLASS64; ++ ++ return 0; ++} ++ ++ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) ++{ ++ size_t note_size = 0; ++ ++ if (class == ELFCLASS64) { ++ note_size = LOONGARCH_PRSTATUS_NOTE_SIZE + LOONGARCH_PRFPREG_NOTE_SIZE; ++ } ++ ++ return note_size * nr_cpus; ++} +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 63d1f65608..d6a13de901 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -861,6 +861,7 @@ static struct TCGCPUOps loongarch_tcg_ops = { + #include "hw/core/sysemu-cpu-ops.h" + + static const struct SysemuCPUOps loongarch_sysemu_ops = { ++ .write_elf64_note = loongarch_cpu_write_elf64_note, + .get_phys_page_debug = loongarch_cpu_get_phys_page_debug, + }; + +diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h +index 944153b180..1a02427627 100644 +--- a/target/loongarch/internals.h ++++ b/target/loongarch/internals.h +@@ -72,5 +72,7 @@ void write_fcc(CPULoongArchState *env, uint64_t val); + int loongarch_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n); + int loongarch_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n); + void loongarch_cpu_register_gdb_regs_for_features(CPUState *cs); ++int loongarch_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, ++ int cpuid, DumpState *s); + + #endif +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index e002e9aaf6..7817318287 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -8,6 +8,7 @@ loongarch_ss.add(files( + + loongarch_system_ss = ss.source_set() + loongarch_system_ss.add(files( ++ 'arch_dump.c', + 'cpu_helper.c', + 'loongarch-qmp-cmds.c', + 'machine.c', +-- +2.39.1 + diff --git a/target-loongarch-Use-explicit-little-endian-LD-ST-AP.patch b/target-loongarch-Use-explicit-little-endian-LD-ST-AP.patch new file mode 100644 index 0000000000000000000000000000000000000000..45c70dab5ab92f71d40e7db7832cc44d57c3a240 --- /dev/null +++ b/target-loongarch-Use-explicit-little-endian-LD-ST-AP.patch @@ -0,0 +1,64 @@ +From 43ac751187131f91b043ecf611ec795422b42c6c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 4 Oct 2024 11:59:56 +0200 +Subject: [PATCH 54/78] target/loongarch: Use explicit little-endian LD/ST API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The LoongArch architecture uses little endianness. Directly +use the little-endian LD/ST API. + +Mechanical change using: + + $ end=le; \ + for acc in uw w l q tul; do \ + sed -i -e "s/ld${acc}_p(/ld${acc}_${end}_p(/" \ + -e "s/st${acc}_p(/st${acc}_${end}_p(/" \ + $(git grep -wlE '(ld|st)t?u?[wlq]_p' target/loongarch/); \ + done + +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Message-Id: <20241004163042.85922-13-philmd@linaro.org> +Signed-off-by: Xianglai Li +--- + target/loongarch/gdbstub.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c +index f8e3324bae..cc72680c38 100644 +--- a/target/loongarch/gdbstub.c ++++ b/target/loongarch/gdbstub.c +@@ -68,10 +68,10 @@ int loongarch_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) + int length = 0; + + if (is_la64(env)) { +- tmp = ldq_p(mem_buf); ++ tmp = ldq_le_p(mem_buf); + read_length = 8; + } else { +- tmp = ldl_p(mem_buf); ++ tmp = ldl_le_p(mem_buf); + read_length = 4; + } + +@@ -104,13 +104,13 @@ static int loongarch_gdb_set_fpu(CPULoongArchState *env, + int length = 0; + + if (0 <= n && n < 32) { +- env->fpr[n].vreg.D(0) = ldq_p(mem_buf); ++ env->fpr[n].vreg.D(0) = ldq_le_p(mem_buf); + length = 8; + } else if (32 <= n && n < 40) { + env->cf[n - 32] = ldub_p(mem_buf); + length = 1; + } else if (n == 40) { +- env->fcsr0 = ldl_p(mem_buf); ++ env->fcsr0 = ldl_le_p(mem_buf); + length = 4; + } + return length; +-- +2.39.1 + diff --git a/target-loongarch-fix-Werror-maybe-uninitialized-fals.patch b/target-loongarch-fix-Werror-maybe-uninitialized-fals.patch new file mode 100644 index 0000000000000000000000000000000000000000..b29b7e3792dbb7cc4fbe8014e5f0830c36ca892c --- /dev/null +++ b/target-loongarch-fix-Werror-maybe-uninitialized-fals.patch @@ -0,0 +1,73 @@ +From 1b5bad7f9b10bba438fe12082c8aa29805c03092 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Tue, 24 Sep 2024 15:49:47 +0400 +Subject: [PATCH 53/78] target/loongarch: fix -Werror=maybe-uninitialized + false-positive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +../target/loongarch/gdbstub.c:55:20: error: ‘val’ may be used uninitialized [-Werror=maybe-uninitialized] + 55 | return gdb_get_reg32(mem_buf, val); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ +../target/loongarch/gdbstub.c:39:18: note: ‘val’ was declared here + 39 | uint64_t val; + +Signed-off-by: Marc-André Lureau +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Xianglai Li +--- + target/loongarch/gdbstub.c | 29 +++++++++++++++-------------- + 1 file changed, 15 insertions(+), 14 deletions(-) + +diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c +index 5fc2f19e96..f8e3324bae 100644 +--- a/target/loongarch/gdbstub.c ++++ b/target/loongarch/gdbstub.c +@@ -33,28 +33,29 @@ void write_fcc(CPULoongArchState *env, uint64_t val) + + int loongarch_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) + { +- LoongArchCPU *cpu = LOONGARCH_CPU(cs); +- CPULoongArchState *env = &cpu->env; +- uint64_t val; +- +- if (0 <= n && n < 32) { +- val = env->gpr[n]; +- } else if (n == 32) { +- /* orig_a0 */ +- val = 0; +- } else if (n == 33) { +- val = env->pc; +- } else if (n == 34) { +- val = env->CSR_BADV; +- } ++ CPULoongArchState *env = cpu_env(cs); + + if (0 <= n && n <= 34) { ++ uint64_t val; ++ ++ if (n < 32) { ++ val = env->gpr[n]; ++ } else if (n == 32) { ++ /* orig_a0 */ ++ val = 0; ++ } else if (n == 33) { ++ val = env->pc; ++ } else /* if (n == 34) */ { ++ val = env->CSR_BADV; ++ } ++ + if (is_la64(env)) { + return gdb_get_reg64(mem_buf, val); + } else { + return gdb_get_reg32(mem_buf, val); + } + } ++ + return 0; + } + +-- +2.39.1 + diff --git a/target-loongarch-fix-a-wrong-print-in-cpu-dump.patch b/target-loongarch-fix-a-wrong-print-in-cpu-dump.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a5f57893512db2bba01e86bc9d608831619f14b --- /dev/null +++ b/target-loongarch-fix-a-wrong-print-in-cpu-dump.patch @@ -0,0 +1,39 @@ +From 4f76ccdc5bdad57b9c70da7a4fc00502cc335060 Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 11:27:12 +0800 +Subject: [PATCH] target/loongarch: fix a wrong print in cpu dump + +cherry picked from commit 78f932ea1f7b3b9b0ac628dc2a91281318fe51fa + +description: + loongarch_cpu_dump_state() want to dump all loongarch cpu +state registers, but there is a tiny typographical error when +printing "PRCFG2". + +Cc: qemu-stable@nongnu.org +Signed-off-by: lanyanzhi +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240604073831.666690-1-lanyanzhi22b@ict.ac.cn> +Signed-off-by: Song Gao +Signed-off-by: Gao Jiazhen +--- + target/loongarch/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 8e7c8332da..f7b5dae7ed 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -802,7 +802,7 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) + qemu_fprintf(f, "EENTRY=%016" PRIx64 "\n", env->CSR_EENTRY); + qemu_fprintf(f, "PRCFG1=%016" PRIx64 ", PRCFG2=%016" PRIx64 "," + " PRCFG3=%016" PRIx64 "\n", +- env->CSR_PRCFG1, env->CSR_PRCFG3, env->CSR_PRCFG3); ++ env->CSR_PRCFG1, env->CSR_PRCFG2, env->CSR_PRCFG3); + qemu_fprintf(f, "TLBRENTRY=%016" PRIx64 "\n", env->CSR_TLBRENTRY); + qemu_fprintf(f, "TLBRBADV=%016" PRIx64 "\n", env->CSR_TLBRBADV); + qemu_fprintf(f, "TLBRERA=%016" PRIx64 "\n", env->CSR_TLBRERA); +-- +2.41.0.windows.1 + diff --git a/target-loongarch-fix-vcpu-reset-command-word-issue.patch b/target-loongarch-fix-vcpu-reset-command-word-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..9e5ef3b15867877936ee6c68a6e757252eaf00fe --- /dev/null +++ b/target-loongarch-fix-vcpu-reset-command-word-issue.patch @@ -0,0 +1,56 @@ +From 655073e4e179e601e35a444f585d8e2049df97f5 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Wed, 5 Feb 2025 19:56:54 +0800 +Subject: [PATCH] target/loongarch: fix vcpu reset command word issue + +When the KVM_REG_LOONGARCH_VCPU_RESET command word +is sent to the kernel through the kvm_set_one_reg interface, +the parameter source needs to be a legal address, +otherwise the kernel will return an error and the command word +will fail to be sent. + +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 2 +- + target/loongarch/kvm/kvm.c | 9 ++++++++- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index ee764f0bc7..570ce8be3b 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -638,8 +638,8 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error **errp) + + loongarch_cpu_register_gdb_regs_for_features(cs); + +- cpu_reset(cs); + qemu_init_vcpu(cs); ++ cpu_reset(cs); + + lacc->parent_realize(dev, errp); + } +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 0acdd5c4c1..277210ca04 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -590,9 +590,16 @@ static int kvm_loongarch_get_lbt(CPUState *cs) + void kvm_arch_reset_vcpu(CPUState *cs) + { + CPULoongArchState *env = cpu_env(cs); ++ int ret = 0; ++ uint64_t unused = 0; + + env->mp_state = KVM_MP_STATE_RUNNABLE; +- kvm_set_one_reg(cs, KVM_REG_LOONGARCH_VCPU_RESET, 0); ++ ret = kvm_set_one_reg(cs, KVM_REG_LOONGARCH_VCPU_RESET, &unused); ++ if (ret) { ++ error_report("Failed to set KVM_REG_LOONGARCH_VCPU_RESET: %s", ++ strerror(errno)); ++ exit(EXIT_FAILURE); ++ } + } + + static int kvm_loongarch_get_mpstate(CPUState *cs) +-- +2.41.0.windows.1 + diff --git a/target-loongarch-kvm-Add-pmu-support.patch b/target-loongarch-kvm-Add-pmu-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..5a5650a903c670138079db6e2d1bb6902e81a4ff --- /dev/null +++ b/target-loongarch-kvm-Add-pmu-support.patch @@ -0,0 +1,224 @@ +From 57db061a63243c64c07624740fc039ddcc4777a2 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 22 Mar 2024 19:26:35 +0800 +Subject: [PATCH] target/loongarch/kvm: Add pmu support + +This patch adds PMU support + e.g + '... -cpu max,pmu=on,pmnum=[1-16]'; + '... -cpu max,pmu=on' (default pmnum = 4); + '... -cpu max,pmu=off' (disable PMU) + +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 64 +++++++++++++++++++++++++++ + target/loongarch/cpu.h | 2 + + target/loongarch/kvm/kvm.c | 55 ++++++++++++++++++++++- + target/loongarch/loongarch-qmp-cmds.c | 2 +- + 4 files changed, 121 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index fdb819c2cf..bc557f207b 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -8,6 +8,7 @@ + #include "qemu/osdep.h" + #include "qemu/log.h" + #include "qemu/qemu-print.h" ++#include "qemu/error-report.h" + #include "qapi/error.h" + #include "qemu/module.h" + #include "sysemu/qtest.h" +@@ -19,6 +20,7 @@ + #include "internals.h" + #include "fpu/softfloat-helpers.h" + #include "cpu-csr.h" ++#include "qapi/visitor.h" + #include "sysemu/reset.h" + #include "vec.h" + #ifdef CONFIG_KVM +@@ -426,6 +428,14 @@ static void loongarch_la464_initfn(Object *obj) + data = FIELD_DP32(data, CPUCFG5, CC_DIV, 1); + env->cpucfg[5] = data; + ++ if (kvm_enabled()) { ++ data = 0; ++ data = FIELD_DP32(data, CPUCFG6, PMP, 1); ++ data = FIELD_DP32(data, CPUCFG6, PMNUM, 3); ++ data = FIELD_DP32(data, CPUCFG6, PMBITS, 63); ++ env->cpucfg[6] = data; ++ } ++ + data = 0; + data = FIELD_DP32(data, CPUCFG16, L1_IUPRE, 1); + data = FIELD_DP32(data, CPUCFG16, L1_DPRE, 1); +@@ -660,6 +670,48 @@ static void loongarch_set_lasx(Object *obj, bool value, Error **errp) + } + } + ++static bool loongarch_get_pmu(Object *obj, Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ ++ return !!(FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMP)); ++} ++ ++static void loongarch_set_pmu(Object *obj, bool value, Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ ++ cpu->env.cpucfg[6] = FIELD_DP32(cpu->env.cpucfg[6], CPUCFG6, PMP, value); ++} ++ ++static void loongarch_get_pmnum(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ uint32_t value = FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMNUM); ++ ++ visit_type_uint32(v, name, &value, errp); ++} ++ ++static void loongarch_set_pmnum(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ uint32_t *value= opaque; ++ ++ if (!visit_type_uint32(v, name, value, errp)) { ++ return; ++ } ++ if ((*value <= PMNUM_MAX) && (*value > 0)) { ++ cpu->env.cpucfg[6] = FIELD_DP32(cpu->env.cpucfg[6], CPUCFG6, PMNUM, *value -1); ++ } else { ++ error_report("Performance counter number need be in [1- %d]\n", PMNUM_MAX); ++ exit(EXIT_FAILURE); ++ } ++} ++ + void loongarch_cpu_post_init(Object *obj) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +@@ -672,6 +724,18 @@ void loongarch_cpu_post_init(Object *obj) + object_property_add_bool(obj, "lasx", loongarch_get_lasx, + loongarch_set_lasx); + } ++ ++ if (kvm_enabled()) { ++ object_property_add_bool(obj, "pmu", loongarch_get_pmu, ++ loongarch_set_pmu); ++ if (FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMP)) { ++ uint32_t value = 4; ++ object_property_add(obj, "pmnum", "uint32", ++ loongarch_get_pmnum, ++ loongarch_set_pmnum, NULL, ++ (void *)&value); ++ } ++ } + } + + static void loongarch_cpu_init(Object *obj) +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 4749d41c8c..80cad24fa1 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -186,6 +186,8 @@ FIELD(CPUCFG6, PMNUM, 4, 4) + FIELD(CPUCFG6, PMBITS, 8, 6) + FIELD(CPUCFG6, UPM, 14, 1) + ++#define PMNUM_MAX 16 ++ + /* cpucfg[16] bits */ + FIELD(CPUCFG16, L1_IUPRE, 0, 1) + FIELD(CPUCFG16, L1_IUUNIFY, 1, 1) +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 49d02076ad..5dda631b2b 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -573,6 +573,53 @@ static int kvm_check_cpucfg2(CPUState *cs) + return ret; + } + ++static int kvm_check_cpucfg6(CPUState *cs) ++{ ++ int ret; ++ uint64_t val; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_CPUCFG, ++ .attr = 6, ++ .addr = (uint64_t)&val, ++ }; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, &attr); ++ if (!ret) { ++ kvm_vcpu_ioctl(cs, KVM_GET_DEVICE_ATTR, &attr); ++ ++ if (FIELD_EX32(env->cpucfg[6], CPUCFG6, PMP)) { ++ /* Check PMP */ ++ if (!FIELD_EX32(val, CPUCFG6, PMP)) { ++ error_report("'pmu' feature not supported by KVM on this host" ++ " Please disable 'pmu' with " ++ "'... -cpu XXX,pmu=off ...'\n"); ++ exit(EXIT_FAILURE); ++ } ++ /* Check PMNUM */ ++ int guest_pmnum = FIELD_EX32(env->cpucfg[6], CPUCFG6, PMNUM); ++ int host_pmnum = FIELD_EX32(val, CPUCFG6, PMNUM); ++ if (guest_pmnum > host_pmnum){ ++ warn_report("The guest pmnum %d larger than KVM support %d\n", ++ guest_pmnum, host_pmnum); ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, ++ PMNUM, host_pmnum); ++ } ++ /* Check PMBITS */ ++ int guest_pmbits = FIELD_EX32(env->cpucfg[6], CPUCFG6, PMBITS); ++ int host_pmbits = FIELD_EX32(val, CPUCFG6, PMBITS); ++ if (guest_pmbits != host_pmbits) { ++ warn_report("The host not support PMBITS %d\n", guest_pmbits); ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, ++ PMBITS, host_pmbits); ++ } ++ } ++ } ++ ++ return ret; ++} ++ + static int kvm_loongarch_put_cpucfg(CPUState *cs) + { + int i, ret = 0; +@@ -586,7 +633,13 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + if (ret) { + return ret; + } +- } ++ } ++ if (i == 6) { ++ ret = kvm_check_cpucfg6(cs); ++ if (ret) { ++ return ret; ++ } ++ } + val = env->cpucfg[i]; + ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), &val); + if (ret < 0) { +diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c +index 645672ff59..2612f43de9 100644 +--- a/target/loongarch/loongarch-qmp-cmds.c ++++ b/target/loongarch/loongarch-qmp-cmds.c +@@ -42,7 +42,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + } + + static const char *cpu_model_advertised_features[] = { +- "lsx", "lasx", NULL ++ "lsx", "lasx", "pmu", "pmnum", NULL + }; + + CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, +-- +2.33.0 + diff --git a/target-loongarch-kvm-Add-pv-steal-time-support.patch b/target-loongarch-kvm-Add-pv-steal-time-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..19fbcad65ec05a5ea70e7df913889faccff344fa --- /dev/null +++ b/target-loongarch-kvm-Add-pv-steal-time-support.patch @@ -0,0 +1,169 @@ +From 8b69a1b340da95cacdff252927ca8aef9d43c33a Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 24 Apr 2024 16:06:33 +0800 +Subject: [PATCH] target/loongarch/kvm: Add pv steal time support + +Signed-off-by: Song Gao +--- + linux-headers/asm-loongarch/kvm.h | 2 ++ + target/loongarch/cpu.h | 3 ++ + target/loongarch/kvm/kvm.c | 50 ++++++++++++++++++++++++++++ + target/loongarch/kvm/kvm_loongarch.h | 2 ++ + target/loongarch/machine.c | 25 ++++++++++++++ + 5 files changed, 82 insertions(+) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 4cec8c1601..81fec85f0a 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -84,6 +84,8 @@ struct kvm_fpu { + #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) + #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) + #define KVM_LOONGARCH_VCPU_CPUCFG 0 ++#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 ++#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 + + struct kvm_debug_exit_arch { + }; +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 80cad24fa1..0ed24051af 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -365,6 +365,9 @@ typedef struct CPUArchState { + /* Store ipistate to access from this struct */ + DeviceState *ipistate; + #endif ++ struct { ++ uint64_t guest_addr; ++ } st; + } CPULoongArchState; + + /** +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 5dda631b2b..e1d521a1de 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -649,6 +649,56 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + return ret; + } + ++int kvm_loongarch_put_pvtime(LoongArchCPU *cpu) ++{ ++ CPULoongArchState *env = &cpu->env; ++ int err; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, ++ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, ++ .addr = (uint64_t)&env->st.guest_addr, ++ }; ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ /* It's ok even though kvm has not such attr */ ++ return 0; ++ } ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("PVTIME IPA: KVM_SET_DEVICE_ATTR: %s", strerror(-err)); ++ return err; ++ } ++ ++ return 0; ++} ++ ++int kvm_loongarch_get_pvtime(LoongArchCPU *cpu) ++{ ++ CPULoongArchState *env = &cpu->env; ++ int err; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, ++ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, ++ .addr = (uint64_t)&env->st.guest_addr, ++ }; ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ /* It's ok even though kvm has not such attr */ ++ return 0; ++ } ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("PVTIME IPA: KVM_GET_DEVICE_ATTR: %s", strerror(-err)); ++ return err; ++ } ++ ++ return 0; ++} ++ + int kvm_arch_get_registers(CPUState *cs) + { + int ret; +diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h +index d945b6bb82..551878a725 100644 +--- a/target/loongarch/kvm/kvm_loongarch.h ++++ b/target/loongarch/kvm/kvm_loongarch.h +@@ -12,5 +12,7 @@ + + int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); + void kvm_arch_reset_vcpu(CPULoongArchState *env); ++int kvm_loongarch_put_pvtime(LoongArchCPU *cpu); ++int kvm_loongarch_get_pvtime(LoongArchCPU *cpu); + + #endif +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index 4443caed2d..ec5abe56db 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -9,6 +9,8 @@ + #include "cpu.h" + #include "migration/cpu.h" + #include "vec.h" ++#include "kvm/kvm_loongarch.h" ++#include "sysemu/kvm.h" + + static const VMStateDescription vmstate_fpu_reg = { + .name = "fpu_reg", +@@ -122,15 +124,38 @@ const VMStateDescription vmstate_tlb = { + } + }; + ++static int cpu_post_load(void *opaque, int version_id) ++{ ++#ifdef CONFIG_KVM ++ LoongArchCPU *cpu = opaque; ++ kvm_loongarch_put_pvtime(cpu); ++#endif ++ return 0; ++} ++ ++static int cpu_pre_save(void *opaque) ++{ ++#ifdef CONFIG_KVM ++ LoongArchCPU *cpu = opaque; ++ kvm_loongarch_get_pvtime(cpu); ++#endif ++ return 0; ++} ++ + /* LoongArch CPU state */ + const VMStateDescription vmstate_loongarch_cpu = { + .name = "cpu", + .version_id = 1, + .minimum_version_id = 1, ++ .post_load = cpu_post_load, ++ .pre_save = cpu_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), + VMSTATE_UINTTL(env.pc, LoongArchCPU), + ++ /* PV time */ ++ VMSTATE_UINT64(env.st.guest_addr, LoongArchCPU), ++ + /* Remaining CSRs */ + VMSTATE_UINT64(env.CSR_CRMD, LoongArchCPU), + VMSTATE_UINT64(env.CSR_PRMD, LoongArchCPU), +-- +2.33.0 + diff --git a/target-loongarch-kvm-Add-software-breakpoint-support-sync-upstream.patch b/target-loongarch-kvm-Add-software-breakpoint-support-sync-upstream.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b65cf8ce8c46ea336dd6482ff29d29ef07ccf46 --- /dev/null +++ b/target-loongarch-kvm-Add-software-breakpoint-support-sync-upstream.patch @@ -0,0 +1,39 @@ +From 9a6ef31fa2fcf1f1257fb849cc6cabe2b4c440e0 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Fri, 7 Jun 2024 11:50:16 +0800 +Subject: [PATCH 46/78] target/loongarch/kvm: Add software breakpoint support + +With KVM virtualization, debug exception is injected to guest kernel +rather than host for normal break intruction. Here hypercall +instruction with special code is used for sw breakpoint usage, +and detailed instruction comes from kvm kernel with user API +KVM_REG_LOONGARCH_DEBUG_INST. + +Now only software breakpoint is supported, and it is allowed to +insert/remove software breakpoint. We can debug guest kernel with gdb +method after kernel is loaded, hardware breakpoint will be added in later. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Tested-by: Song Gao +Message-Id: <20240607035016.2975799-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + configs/targets/loongarch64-softmmu.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/targets/loongarch64-softmmu.mak b/configs/targets/loongarch64-softmmu.mak +index f23780fdd8..0034c33620 100644 +--- a/configs/targets/loongarch64-softmmu.mak ++++ b/configs/targets/loongarch64-softmmu.mak +@@ -1,5 +1,6 @@ + TARGET_ARCH=loongarch64 + TARGET_BASE_ARCH=loongarch ++TARGET_KVM_HAVE_GUEST_DEBUG=y + TARGET_SUPPORTS_MTTCG=y + TARGET_XML_FILES= gdb-xml/loongarch-base32.xml gdb-xml/loongarch-base64.xml gdb-xml/loongarch-fpu.xml + TARGET_NEED_FDT=y +-- +2.39.1 + diff --git a/target-loongarch-kvm-Add-software-breakpoint-support.patch b/target-loongarch-kvm-Add-software-breakpoint-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3981d62e76171605008311379cfa6929f1b221e --- /dev/null +++ b/target-loongarch-kvm-Add-software-breakpoint-support.patch @@ -0,0 +1,132 @@ +From 6a301af275fd684c197cf7a2e73fc265993478da Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Sun, 18 Feb 2024 15:00:25 +0800 +Subject: [PATCH] target/loongarch/kvm: Add software breakpoint support + +With KVM virtualization, debug exception is passthrough to +to guest kernel rather than host mode. Here hypercall +instruction with special hypercall code is used for sw +breakpoint usage. + +Now only software breakpoint is supported, and itt is allowed +to insert/remove software breakpoint. Later hardware breakpoint +will be added. + +Signed-off-by: Bibo Mao +--- + target/loongarch/kvm/kvm.c | 77 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 77 insertions(+) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index c19978a970..49d02076ad 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -29,6 +29,7 @@ + #include "trace.h" + + static bool cap_has_mp_state; ++static unsigned int brk_insn; + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO + }; +@@ -675,7 +676,14 @@ static void kvm_loongarch_vm_stage_change(void *opaque, bool running, + + int kvm_arch_init_vcpu(CPUState *cs) + { ++ uint64_t val; ++ + qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); ++ ++ if (!kvm_get_one_reg(cs, KVM_REG_LOONGARCH_DEBUG_INST, &val)) { ++ brk_insn = val; ++ } ++ + return 0; + } + +@@ -755,6 +763,68 @@ bool kvm_arch_cpu_check_are_resettable(void) + return true; + } + ++ ++void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) ++{ ++ if (kvm_sw_breakpoints_active(cpu)) { ++ dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; ++ } ++} ++ ++int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) ++{ ++ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || ++ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { ++ error_report("%s failed", __func__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) ++{ ++ static uint32_t brk; ++ ++ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || ++ brk != brk_insn || ++ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { ++ error_report("%s failed", __func__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) ++{ ++ return -ENOSYS; ++} ++ ++int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) ++{ ++ return -ENOSYS; ++} ++ ++void kvm_arch_remove_all_hw_breakpoints(void) ++{ ++} ++ ++static bool kvm_loongarch_handle_debug(CPUState *cs, struct kvm_run *run) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ kvm_cpu_synchronize_state(cs); ++ if (cs->singlestep_enabled) { ++ return true; ++ } ++ ++ if (kvm_find_sw_breakpoint(cs, env->pc)) { ++ return true; ++ } ++ ++ return false; ++} ++ + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +@@ -774,6 +844,13 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + run->iocsr_io.len, + run->iocsr_io.is_write); + break; ++ ++ case KVM_EXIT_DEBUG: ++ if (kvm_loongarch_handle_debug(cs, run)) { ++ ret = EXCP_DEBUG; ++ } ++ break; ++ + default: + ret = -1; + warn_report("KVM: unknown exit reason %d", run->exit_reason); +-- +2.33.0 + diff --git a/target-loongarch-kvm-Add-vCPU-reset-function.patch b/target-loongarch-kvm-Add-vCPU-reset-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..f5c5990ffb6e5d4d68dcb9034190108e1dc76abc --- /dev/null +++ b/target-loongarch-kvm-Add-vCPU-reset-function.patch @@ -0,0 +1,68 @@ +From ad00cc7da8ab03d6d612a3bd7ec0c4b7af594894 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Thu, 22 Aug 2024 10:28:27 +0800 +Subject: [PATCH 51/78] target/loongarch/kvm: Add vCPU reset function + +KVM provides interface KVM_REG_LOONGARCH_VCPU_RESET to reset vCPU, +it can be used to clear internal state about kvm kernel. vCPU reset +function is added here for kvm mode. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240822022827.2273534-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 2 +- + target/loongarch/kvm/kvm.c | 5 ++++- + target/loongarch/kvm/kvm_loongarch.h | 2 +- + 3 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 2038984d02..63d1f65608 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -608,7 +608,7 @@ static void loongarch_cpu_reset_hold(Object *obj) + memset(env->tlb, 0, sizeof(env->tlb)); + #endif + if (kvm_enabled()) { +- kvm_arch_reset_vcpu(env); ++ kvm_arch_reset_vcpu(cs); + } + #endif + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 407d454919..90c8379c46 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -485,9 +485,12 @@ static int kvm_loongarch_put_regs_fp(CPUState *cs) + return ret; + } + +-void kvm_arch_reset_vcpu(CPULoongArchState *env) ++void kvm_arch_reset_vcpu(CPUState *cs) + { ++ CPULoongArchState *env = cpu_env(cs); ++ + env->mp_state = KVM_MP_STATE_RUNNABLE; ++ kvm_set_one_reg(cs, KVM_REG_LOONGARCH_VCPU_RESET, 0); + } + + static int kvm_loongarch_get_mpstate(CPUState *cs) +diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h +index 551878a725..8482f9308d 100644 +--- a/target/loongarch/kvm/kvm_loongarch.h ++++ b/target/loongarch/kvm/kvm_loongarch.h +@@ -11,8 +11,8 @@ + #define QEMU_KVM_LOONGARCH_H + + int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); +-void kvm_arch_reset_vcpu(CPULoongArchState *env); + int kvm_loongarch_put_pvtime(LoongArchCPU *cpu); + int kvm_loongarch_get_pvtime(LoongArchCPU *cpu); ++void kvm_arch_reset_vcpu(CPUState *cs); + + #endif +-- +2.39.1 + diff --git a/target-loongarch-kvm-Enable-LSX-LASX-extension.patch b/target-loongarch-kvm-Enable-LSX-LASX-extension.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc5655fd1259e3f0405bd996299ec25373f9d6f9 --- /dev/null +++ b/target-loongarch-kvm-Enable-LSX-LASX-extension.patch @@ -0,0 +1,98 @@ +From 6e503b590e42ad7c522cf937b83e1f8f715dbd1a Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Mon, 22 Jan 2024 17:02:06 +0800 +Subject: [PATCH] target/loongarch/kvm: Enable LSX/LASX extension + +The kernel had already support LSX and LASX [1], +but QEMU is disable LSX/LASX for kvm. This patch adds +kvm_check_cpucfg2() to check CPUCFG2. + +[1]: https://lore.kernel.org/all/CABgObfZHRf7E_7Jk4uPRmSyxTy3EiuuYwHC35jQncNL9s-zTDA@mail.gmail.com/ + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240122090206.1083584-1-gaosong@loongson.cn> +--- + linux-headers/asm-loongarch/kvm.h | 1 + + target/loongarch/kvm/kvm.c | 45 ++++++++++++++++++++++++++----- + 2 files changed, 39 insertions(+), 7 deletions(-) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index c6ad2ee610..923d0bd382 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -79,6 +79,7 @@ struct kvm_fpu { + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) + #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) + #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) ++#define KVM_LOONGARCH_VCPU_CPUCFG 0 + + struct kvm_debug_exit_arch { + }; +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 2230f029d0..c19978a970 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -540,6 +540,38 @@ static int kvm_loongarch_get_cpucfg(CPUState *cs) + return ret; + } + ++static int kvm_check_cpucfg2(CPUState *cs) ++{ ++ int ret; ++ uint64_t val; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_CPUCFG, ++ .attr = 2, ++ .addr = (uint64_t)&val, ++ }; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, &attr); ++ ++ if (!ret) { ++ kvm_vcpu_ioctl(cs, KVM_GET_DEVICE_ATTR, &attr); ++ env->cpucfg[2] &= val; ++ ++ if (FIELD_EX32(env->cpucfg[2], CPUCFG2, FP)) { ++ /* The FP minimal version is 1. */ ++ env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, FP_VER, 1); ++ } ++ ++ if (FIELD_EX32(env->cpucfg[2], CPUCFG2, LLFTP)) { ++ /* The LLFTP minimal version is 1. */ ++ env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, LLFTP_VER, 1); ++ } ++ } ++ ++ return ret; ++} ++ + static int kvm_loongarch_put_cpucfg(CPUState *cs) + { + int i, ret = 0; +@@ -548,14 +580,13 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + uint64_t val; + + for (i = 0; i < 21; i++) { ++ if (i == 2) { ++ ret = kvm_check_cpucfg2(cs); ++ if (ret) { ++ return ret; ++ } ++ } + val = env->cpucfg[i]; +- /* LSX and LASX and LBT are not supported in kvm now */ +- if (i == 2) { +- val &= ~(BIT(R_CPUCFG2_LSX_SHIFT) | BIT(R_CPUCFG2_LASX_SHIFT)); +- val &= ~(BIT(R_CPUCFG2_LBT_X86_SHIFT) | +- BIT(R_CPUCFG2_LBT_ARM_SHIFT) | +- BIT(R_CPUCFG2_LBT_MIPS_SHIFT)); +- } + ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), &val); + if (ret < 0) { + trace_kvm_failed_put_cpucfg(strerror(errno)); +-- +2.27.0 + diff --git a/target-loongarch-kvm-Fix-VM-recovery-from-disk-failu.patch b/target-loongarch-kvm-Fix-VM-recovery-from-disk-failu.patch new file mode 100644 index 0000000000000000000000000000000000000000..c1a483ed802989a6b592c06a468fff69b775b958 --- /dev/null +++ b/target-loongarch-kvm-Fix-VM-recovery-from-disk-failu.patch @@ -0,0 +1,40 @@ +From 520e792f674a7ab192a9237519c4e0c8f50abc71 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 8 May 2024 10:47:32 +0800 +Subject: [PATCH 44/78] target/loongarch/kvm: Fix VM recovery from disk + failures + +vmstate does not save kvm_state_conter, +which can cause VM recovery from disk to fail. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Song Gao +Acked-by: Peter Xu +Message-Id: <20240508024732.3127792-1-gaosong@loongson.cn> +Signed-off-by: Xianglai Li +--- + target/loongarch/machine.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index 4bbf495d6b..97e1152ffd 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -165,11 +165,11 @@ static const VMStateDescription vmstate_tlb = { + /* LoongArch CPU state */ + const VMStateDescription vmstate_loongarch_cpu = { + .name = "cpu", +- .version_id = 1, +- .minimum_version_id = 1, ++ .version_id = 2, ++ .minimum_version_id = 2, + .post_load = cpu_post_load, + .pre_save = cpu_pre_save, +- .fields = (VMStateField[]) { ++ .fields = (const VMStateField[]) { + VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), + VMSTATE_UINTTL(env.pc, LoongArchCPU), + +-- +2.39.1 + diff --git a/target-loongarch-kvm-Fix-vm-restore-failed.patch b/target-loongarch-kvm-Fix-vm-restore-failed.patch new file mode 100644 index 0000000000000000000000000000000000000000..36cd4cb417fdbc3a62e618574e79800ba5fda743 --- /dev/null +++ b/target-loongarch-kvm-Fix-vm-restore-failed.patch @@ -0,0 +1,28 @@ +From 7cf9ed3844ed3340165121e5fd7dcb959ee80d15 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 24 Apr 2024 14:18:46 +0800 +Subject: [PATCH] target/loongarch/kvm: Fix vm restore failed + +The vmstate_loongarch_cpu need kvm_state_counter. + +Signed-off-by: Song Gao +--- + target/loongarch/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index 1c4e01d076..4443caed2d 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -191,6 +191,8 @@ const VMStateDescription vmstate_loongarch_cpu = { + VMSTATE_STRUCT_ARRAY(env.tlb, LoongArchCPU, LOONGARCH_TLB_MAX, + 0, vmstate_tlb, LoongArchTLB), + ++ VMSTATE_UINT64(kvm_state_counter, LoongArchCPU), ++ + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { +-- +2.33.0 + diff --git a/target-loongarch-kvm-Implement-LoongArch-PMU-extensi.patch b/target-loongarch-kvm-Implement-LoongArch-PMU-extensi.patch new file mode 100644 index 0000000000000000000000000000000000000000..82b1b18c0a0dbddd0f093b56bdb53bf74a55c358 --- /dev/null +++ b/target-loongarch-kvm-Implement-LoongArch-PMU-extensi.patch @@ -0,0 +1,233 @@ +From b87b4782e8147fd481becd946ca909edaaa58b41 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 18 Sep 2024 16:23:15 +0800 +Subject: [PATCH 59/78] target/loongarch/kvm: Implement LoongArch PMU extension + +Implement PMU extension for LoongArch kvm mode. Use OnOffAuto type +variable pmu to check the PMU feature. If the PMU Feature is not supported +with KVM host, it reports error if there is pmu=on command line. + +If there is no any command line about pmu parameter, it checks whether +KVM host supports the PMU Feature and set the corresponding value in cpucfg. + +This patch is based on lbt patch located at + https://lore.kernel.org/qemu-devel/20240904061859.86615-1-maobibo@loongson.cn + +Co-developed-by: Song Gao +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240918082315.2345034-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + target/loongarch/cpu.c | 63 +++++++-------------------- + target/loongarch/cpu.h | 2 + + target/loongarch/kvm/kvm.c | 41 +++++++++++++++++ + target/loongarch/loongarch-qmp-cmds.c | 2 +- + 4 files changed, 59 insertions(+), 49 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index a57067938d..2ee1d63989 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -695,58 +695,28 @@ static void loongarch_set_lasx(Object *obj, bool value, Error **errp) + } + } + +-static bool loongarch_get_pmu(Object *obj, Error **errp) +-{ +- LoongArchCPU *cpu = LOONGARCH_CPU(obj); +- +- return !!(FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMP)); +-} +- +-static void loongarch_set_pmu(Object *obj, bool value, Error **errp) +-{ +- LoongArchCPU *cpu = LOONGARCH_CPU(obj); +- +- cpu->env.cpucfg[6] = FIELD_DP32(cpu->env.cpucfg[6], CPUCFG6, PMP, value); +-} +- +-static void loongarch_get_pmnum(Object *obj, Visitor *v, +- const char *name, void *opaque, +- Error **errp) ++static bool loongarch_get_lbt(Object *obj, Error **errp) + { +- LoongArchCPU *cpu = LOONGARCH_CPU(obj); +- uint32_t value = FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMNUM); +- +- visit_type_uint32(v, name, &value, errp); ++ return LOONGARCH_CPU(obj)->lbt != ON_OFF_AUTO_OFF; + } + +-static void loongarch_set_pmnum(Object *obj, Visitor *v, +- const char *name, void *opaque, +- Error **errp) ++static void loongarch_set_lbt(Object *obj, bool value, Error **errp) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +- uint32_t *value= opaque; + +- if (!visit_type_uint32(v, name, value, errp)) { +- return; +- } +- if ((*value <= PMNUM_MAX) && (*value > 0)) { +- cpu->env.cpucfg[6] = FIELD_DP32(cpu->env.cpucfg[6], CPUCFG6, PMNUM, *value -1); +- } else { +- error_report("Performance counter number need be in [1- %d]\n", PMNUM_MAX); +- exit(EXIT_FAILURE); +- } ++ cpu->lbt = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + } + +-static bool loongarch_get_lbt(Object *obj, Error **errp) ++static bool loongarch_get_pmu(Object *obj, Error **errp) + { +- return LOONGARCH_CPU(obj)->lbt != ON_OFF_AUTO_OFF; ++ return LOONGARCH_CPU(obj)->pmu != ON_OFF_AUTO_OFF; + } + +-static void loongarch_set_lbt(Object *obj, bool value, Error **errp) ++static void loongarch_set_pmu(Object *obj, bool value, Error **errp) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + +- cpu->lbt = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; ++ cpu->pmu = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + } + + void loongarch_cpu_post_init(Object *obj) +@@ -759,21 +729,18 @@ void loongarch_cpu_post_init(Object *obj) + loongarch_set_lasx); + + if (kvm_enabled()) { +- object_property_add_bool(obj, "pmu", loongarch_get_pmu, +- loongarch_set_pmu); +- if (FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMP)) { +- uint32_t value = 4; +- object_property_add(obj, "pmnum", "uint32", +- loongarch_get_pmnum, +- loongarch_set_pmnum, NULL, +- (void *)&value); +- } +- + cpu->lbt = ON_OFF_AUTO_AUTO; + object_property_add_bool(obj, "lbt", loongarch_get_lbt, + loongarch_set_lbt); + object_property_set_description(obj, "lbt", + "Set off to disable Binary Tranlation."); ++ ++ cpu->pmu = ON_OFF_AUTO_AUTO; ++ object_property_add_bool(obj, "pmu", loongarch_get_pmu, ++ loongarch_set_pmu); ++ object_property_set_description(obj, "pmu", ++ "Set off to performance monitor unit."); ++ + } else { + cpu->lbt = ON_OFF_AUTO_OFF; + } +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 2f8c5cf2dd..8ff00d17e1 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -289,6 +289,7 @@ typedef struct LoongArchTLB LoongArchTLB; + + enum loongarch_features { + LOONGARCH_FEATURE_LBT, /* loongson binary translation extension */ ++ LOONGARCH_FEATURE_PMU, + }; + + typedef struct LoongArchBT { +@@ -407,6 +408,7 @@ struct ArchCPU { + QEMUTimer timer; + uint32_t phy_id; + OnOffAuto lbt; ++ OnOffAuto pmu; + + /* 'compatible' string for this CPU for Linux device trees */ + const char *dtb_compatible; +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 118f66f742..8b0f86a201 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -870,9 +870,18 @@ static bool kvm_feature_supported(CPUState *cs, enum loongarch_features feature) + attr.attr = KVM_LOONGARCH_VM_FEAT_MIPSBT; + ret |= kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr); + return (ret == 0); ++ ++ case LOONGARCH_FEATURE_PMU: ++ attr.group = KVM_LOONGARCH_VM_FEAT_CTRL; ++ attr.attr = KVM_LOONGARCH_VM_FEAT_PMU; ++ ret = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, &attr); ++ return (ret == 0); ++ + default: + return false; + } ++ ++ return false; + } + + static int kvm_cpu_check_lbt(CPUState *cs, Error **errp) +@@ -896,6 +905,32 @@ static int kvm_cpu_check_lbt(CPUState *cs, Error **errp) + return 0; + } + ++static int kvm_cpu_check_pmu(CPUState *cs, Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = cpu_env(cs); ++ bool kvm_supported; ++ ++ kvm_supported = kvm_feature_supported(cs, LOONGARCH_FEATURE_PMU); ++ if (cpu->pmu == ON_OFF_AUTO_ON) { ++ if (!kvm_supported) { ++ error_setg(errp, "'pmu' feature not supported by KVM on the host"); ++ return -ENOTSUP; ++ } ++ } else if (cpu->pmu != ON_OFF_AUTO_AUTO) { ++ /* disable pmu if ON_OFF_AUTO_OFF is set */ ++ kvm_supported = false; ++ } ++ ++ if (kvm_supported) { ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, PMP, 1); ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, PMNUM, 3); ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, PMBITS, 63); ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, UPM, 1); ++ } ++ return 0; ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + uint64_t val; +@@ -913,6 +948,12 @@ int kvm_arch_init_vcpu(CPUState *cs) + if (ret < 0) { + error_report_err(local_err); + } ++ ++ ret = kvm_cpu_check_pmu(cs, &local_err); ++ if (ret < 0) { ++ error_report_err(local_err); ++ } ++ + return ret; + } + +diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c +index 644b528824..dc78a3ffa2 100644 +--- a/target/loongarch/loongarch-qmp-cmds.c ++++ b/target/loongarch/loongarch-qmp-cmds.c +@@ -42,7 +42,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + } + + static const char *cpu_model_advertised_features[] = { +- "lsx", "lasx", "lbt", "pmu", "pmnum", NULL ++ "lsx", "lasx", "lbt", "pmu", NULL + }; + + CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, +-- +2.39.1 + diff --git a/target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch b/target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch new file mode 100644 index 0000000000000000000000000000000000000000..cdd2e4957e54ddfa954cae7818eae2e85f11a7e7 --- /dev/null +++ b/target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch @@ -0,0 +1,43 @@ +From 7a3573ce009afa271168829da86e2c70c63fa58a Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 14 May 2024 19:07:52 +0800 +Subject: [PATCH] target/loongarch/kvm: fpu save the vreg registers high + 192bit + +On kvm side, get_fpu/set_fpu save the vreg registers high 192bits, +but QEMU missing. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240514110752.989572-1-gaosong@loongson.cn> +--- + target/loongarch/kvm/kvm.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index e1d521a1de..5c88270132 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -444,6 +444,9 @@ static int kvm_loongarch_get_regs_fp(CPUState *cs) + env->fcsr0 = fpu.fcsr; + for (i = 0; i < 32; i++) { + env->fpr[i].vreg.UD[0] = fpu.fpr[i].val64[0]; ++ env->fpr[i].vreg.UD[1] = fpu.fpr[i].val64[1]; ++ env->fpr[i].vreg.UD[2] = fpu.fpr[i].val64[2]; ++ env->fpr[i].vreg.UD[3] = fpu.fpr[i].val64[3]; + } + for (i = 0; i < 8; i++) { + env->cf[i] = fpu.fcc & 0xFF; +@@ -465,6 +468,9 @@ static int kvm_loongarch_put_regs_fp(CPUState *cs) + fpu.fcc = 0; + for (i = 0; i < 32; i++) { + fpu.fpr[i].val64[0] = env->fpr[i].vreg.UD[0]; ++ fpu.fpr[i].val64[1] = env->fpr[i].vreg.UD[1]; ++ fpu.fpr[i].val64[2] = env->fpr[i].vreg.UD[2]; ++ fpu.fpr[i].val64[3] = env->fpr[i].vreg.UD[3]; + } + + for (i = 0; i < 8; i++) { +-- +2.33.0 + diff --git a/target-loongarch-kvm-sync-kernel-header-files.patch b/target-loongarch-kvm-sync-kernel-header-files.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d621d7f30f2a0d6296992b697b390f8b67fb82c --- /dev/null +++ b/target-loongarch-kvm-sync-kernel-header-files.patch @@ -0,0 +1,41 @@ +From b7e49ac3b4e7dbfc9ba4645a85962294883c251a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 13 Mar 2024 10:04:33 +0800 +Subject: [PATCH] target/loongarch/kvm: sync kernel header files + +sync kernel header files. + +Signed-off-by: Bibo Mao +--- + linux-headers/asm-loongarch/kvm.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 923d0bd382..4cec8c1601 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -15,10 +15,12 @@ + */ + + #define __KVM_HAVE_READONLY_MEM ++#define __KVM_HAVE_GUEST_DEBUG + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + ++#define KVM_GUESTDBG_USE_SW_BP 0x00010000 + /* + * for KVM_GET_REGS and KVM_SET_REGS + */ +@@ -74,6 +76,8 @@ struct kvm_fpu { + + #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) + #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++/* Debugging: Special instruction for software breakpoint */ ++#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) + + #define LOONGARCH_REG_SHIFT 3 + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) +-- +2.33.0 + diff --git a/target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch b/target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a41c08ace63d59551990616806a9c784d4384c5 --- /dev/null +++ b/target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch @@ -0,0 +1,41 @@ +From ae65e1281aa67713bde6bce323a3a8d06f27c636 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 2 Jan 2024 10:01:59 +0800 +Subject: [PATCH] target/loongarch/meson: move gdbstub.c to loongarch.ss +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +gdbstub.c is not specific to TCG and can be used by +other accelerators, such as KVM accelerator + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Song Gao +Message-Id: <20240102020200.3462097-1-gaosong@loongson.cn> +--- + target/loongarch/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index 18e8191e2b..b3a0fb12fb 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -3,6 +3,7 @@ gen = decodetree.process('insns.decode') + loongarch_ss = ss.source_set() + loongarch_ss.add(files( + 'cpu.c', ++ 'gdbstub.c', + )) + loongarch_tcg_ss = ss.source_set() + loongarch_tcg_ss.add(gen) +@@ -10,7 +11,6 @@ loongarch_tcg_ss.add(files( + 'fpu_helper.c', + 'op_helper.c', + 'translate.c', +- 'gdbstub.c', + 'vec_helper.c', + )) + loongarch_tcg_ss.add(zlib) +-- +2.27.0 + diff --git a/target-loongarch-move-translate-modules-to-tcg.patch b/target-loongarch-move-translate-modules-to-tcg.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7e249c8e5ff13f7e04e3013613cf1a21ad032c0 --- /dev/null +++ b/target-loongarch-move-translate-modules-to-tcg.patch @@ -0,0 +1,215 @@ +From eef77dd5b0d292d8a0276c820fc8fee24de0d898 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 2 Jan 2024 10:02:00 +0800 +Subject: [PATCH] target/loongarch: move translate modules to tcg/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce the target/loongarch/tcg directory. Its purpose is to hold the TCG +code that is selected by CONFIG_TCG + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Song Gao +Message-Id: <20240102020200.3462097-2-gaosong@loongson.cn> +--- + target/loongarch/meson.build | 15 +-------------- + target/loongarch/{ => tcg}/constant_timer.c | 0 + target/loongarch/{ => tcg}/csr_helper.c | 0 + target/loongarch/{ => tcg}/fpu_helper.c | 0 + .../{ => tcg}/insn_trans/trans_arith.c.inc | 0 + .../{ => tcg}/insn_trans/trans_atomic.c.inc | 0 + .../{ => tcg}/insn_trans/trans_bit.c.inc | 0 + .../{ => tcg}/insn_trans/trans_branch.c.inc | 0 + .../{ => tcg}/insn_trans/trans_extra.c.inc | 0 + .../{ => tcg}/insn_trans/trans_farith.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fcmp.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fcnv.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fmemory.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fmov.c.inc | 0 + .../{ => tcg}/insn_trans/trans_memory.c.inc | 0 + .../insn_trans/trans_privileged.c.inc | 0 + .../{ => tcg}/insn_trans/trans_shift.c.inc | 0 + .../{ => tcg}/insn_trans/trans_vec.c.inc | 0 + target/loongarch/{ => tcg}/iocsr_helper.c | 0 + target/loongarch/tcg/meson.build | 19 +++++++++++++++++++ + target/loongarch/{ => tcg}/op_helper.c | 0 + target/loongarch/{ => tcg}/tlb_helper.c | 0 + target/loongarch/{ => tcg}/translate.c | 0 + target/loongarch/{ => tcg}/vec_helper.c | 0 + 24 files changed, 20 insertions(+), 14 deletions(-) + rename target/loongarch/{ => tcg}/constant_timer.c (100%) + rename target/loongarch/{ => tcg}/csr_helper.c (100%) + rename target/loongarch/{ => tcg}/fpu_helper.c (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_arith.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_atomic.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_bit.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_branch.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_extra.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_farith.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fcmp.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fcnv.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fmemory.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fmov.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_memory.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_privileged.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_shift.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_vec.c.inc (100%) + rename target/loongarch/{ => tcg}/iocsr_helper.c (100%) + create mode 100644 target/loongarch/tcg/meson.build + rename target/loongarch/{ => tcg}/op_helper.c (100%) + rename target/loongarch/{ => tcg}/tlb_helper.c (100%) + rename target/loongarch/{ => tcg}/translate.c (100%) + rename target/loongarch/{ => tcg}/vec_helper.c (100%) + +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index b3a0fb12fb..e84e4c51f4 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -5,29 +5,16 @@ loongarch_ss.add(files( + 'cpu.c', + 'gdbstub.c', + )) +-loongarch_tcg_ss = ss.source_set() +-loongarch_tcg_ss.add(gen) +-loongarch_tcg_ss.add(files( +- 'fpu_helper.c', +- 'op_helper.c', +- 'translate.c', +- 'vec_helper.c', +-)) +-loongarch_tcg_ss.add(zlib) + + loongarch_system_ss = ss.source_set() + loongarch_system_ss.add(files( + 'loongarch-qmp-cmds.c', + 'machine.c', +- 'tlb_helper.c', +- 'constant_timer.c', +- 'csr_helper.c', +- 'iocsr_helper.c', + )) + + common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen]) + +-loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss]) ++subdir('tcg') + + target_arch += {'loongarch': loongarch_ss} + target_system_arch += {'loongarch': loongarch_system_ss} +diff --git a/target/loongarch/constant_timer.c b/target/loongarch/tcg/constant_timer.c +similarity index 100% +rename from target/loongarch/constant_timer.c +rename to target/loongarch/tcg/constant_timer.c +diff --git a/target/loongarch/csr_helper.c b/target/loongarch/tcg/csr_helper.c +similarity index 100% +rename from target/loongarch/csr_helper.c +rename to target/loongarch/tcg/csr_helper.c +diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c +similarity index 100% +rename from target/loongarch/fpu_helper.c +rename to target/loongarch/tcg/fpu_helper.c +diff --git a/target/loongarch/insn_trans/trans_arith.c.inc b/target/loongarch/tcg/insn_trans/trans_arith.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_arith.c.inc +rename to target/loongarch/tcg/insn_trans/trans_arith.c.inc +diff --git a/target/loongarch/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_atomic.c.inc +rename to target/loongarch/tcg/insn_trans/trans_atomic.c.inc +diff --git a/target/loongarch/insn_trans/trans_bit.c.inc b/target/loongarch/tcg/insn_trans/trans_bit.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_bit.c.inc +rename to target/loongarch/tcg/insn_trans/trans_bit.c.inc +diff --git a/target/loongarch/insn_trans/trans_branch.c.inc b/target/loongarch/tcg/insn_trans/trans_branch.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_branch.c.inc +rename to target/loongarch/tcg/insn_trans/trans_branch.c.inc +diff --git a/target/loongarch/insn_trans/trans_extra.c.inc b/target/loongarch/tcg/insn_trans/trans_extra.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_extra.c.inc +rename to target/loongarch/tcg/insn_trans/trans_extra.c.inc +diff --git a/target/loongarch/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_farith.c.inc +rename to target/loongarch/tcg/insn_trans/trans_farith.c.inc +diff --git a/target/loongarch/insn_trans/trans_fcmp.c.inc b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fcmp.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +diff --git a/target/loongarch/insn_trans/trans_fcnv.c.inc b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fcnv.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fcnv.c.inc +diff --git a/target/loongarch/insn_trans/trans_fmemory.c.inc b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fmemory.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fmemory.c.inc +diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/tcg/insn_trans/trans_fmov.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fmov.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fmov.c.inc +diff --git a/target/loongarch/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_memory.c.inc +rename to target/loongarch/tcg/insn_trans/trans_memory.c.inc +diff --git a/target/loongarch/insn_trans/trans_privileged.c.inc b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_privileged.c.inc +rename to target/loongarch/tcg/insn_trans/trans_privileged.c.inc +diff --git a/target/loongarch/insn_trans/trans_shift.c.inc b/target/loongarch/tcg/insn_trans/trans_shift.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_shift.c.inc +rename to target/loongarch/tcg/insn_trans/trans_shift.c.inc +diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_vec.c.inc +rename to target/loongarch/tcg/insn_trans/trans_vec.c.inc +diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/tcg/iocsr_helper.c +similarity index 100% +rename from target/loongarch/iocsr_helper.c +rename to target/loongarch/tcg/iocsr_helper.c +diff --git a/target/loongarch/tcg/meson.build b/target/loongarch/tcg/meson.build +new file mode 100644 +index 0000000000..1a3cd589fb +--- /dev/null ++++ b/target/loongarch/tcg/meson.build +@@ -0,0 +1,19 @@ ++if 'CONFIG_TCG' not in config_all ++ subdir_done() ++endif ++ ++loongarch_ss.add([zlib, gen]) ++ ++loongarch_ss.add(files( ++ 'fpu_helper.c', ++ 'op_helper.c', ++ 'translate.c', ++ 'vec_helper.c', ++)) ++ ++loongarch_system_ss.add(files( ++ 'constant_timer.c', ++ 'csr_helper.c', ++ 'iocsr_helper.c', ++ 'tlb_helper.c', ++)) +diff --git a/target/loongarch/op_helper.c b/target/loongarch/tcg/op_helper.c +similarity index 100% +rename from target/loongarch/op_helper.c +rename to target/loongarch/tcg/op_helper.c +diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c +similarity index 100% +rename from target/loongarch/tlb_helper.c +rename to target/loongarch/tcg/tlb_helper.c +diff --git a/target/loongarch/translate.c b/target/loongarch/tcg/translate.c +similarity index 100% +rename from target/loongarch/translate.c +rename to target/loongarch/tcg/translate.c +diff --git a/target/loongarch/vec_helper.c b/target/loongarch/tcg/vec_helper.c +similarity index 100% +rename from target/loongarch/vec_helper.c +rename to target/loongarch/tcg/vec_helper.c +-- +2.27.0 + diff --git a/target-m68k-Map-FPU-exceptions-to-FPSR-register.patch b/target-m68k-Map-FPU-exceptions-to-FPSR-register.patch new file mode 100644 index 0000000000000000000000000000000000000000..de9b79c30fcd435b6456cae0ae289b19462e6af9 --- /dev/null +++ b/target-m68k-Map-FPU-exceptions-to-FPSR-register.patch @@ -0,0 +1,218 @@ +From a8a621a06d54b987502d277f33021547d00fd133 Mon Sep 17 00:00:00 2001 +From: Keith Packard +Date: Wed, 2 Aug 2023 20:52:31 -0700 +Subject: [PATCH] target/m68k: Map FPU exceptions to FPSR register + +Add helpers for reading/writing the 68881 FPSR register so that +changes in floating point exception state can be seen by the +application. + +Call these helpers in pre_load/post_load hooks to synchronize +exception state. + +Signed-off-by: Keith Packard +Reviewed-by: Richard Henderson +Message-Id: <20230803035231.429697-1-keithp@keithp.com> +Signed-off-by: Richard Henderson +(cherry picked from commit 5888357942da1fd5a50efb6e4a6af8b1a27a5af8) +Signed-off-by: zhujun2 +--- + target/m68k/cpu.c | 12 +++++-- + target/m68k/cpu.h | 3 +- + target/m68k/fpu_helper.c | 72 ++++++++++++++++++++++++++++++++++++++++ + target/m68k/helper.c | 4 +-- + target/m68k/helper.h | 2 ++ + target/m68k/translate.c | 4 +-- + 6 files changed, 90 insertions(+), 7 deletions(-) + +diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c +index 11c7e0a790..d95deaafcd 100644 +--- a/target/m68k/cpu.c ++++ b/target/m68k/cpu.c +@@ -396,12 +396,19 @@ static const VMStateDescription vmstate_freg = { + } + }; + +-static int fpu_post_load(void *opaque, int version) ++static int fpu_pre_save(void *opaque) + { + M68kCPU *s = opaque; + +- cpu_m68k_restore_fp_status(&s->env); ++ s->env.fpsr = cpu_m68k_get_fpsr(&s->env); ++ return 0; ++} ++ ++static int fpu_post_load(void *opaque, int version) ++{ ++ M68kCPU *s = opaque; + ++ cpu_m68k_set_fpsr(&s->env, s->env.fpsr); + return 0; + } + +@@ -410,6 +417,7 @@ const VMStateDescription vmmstate_fpu = { + .version_id = 1, + .minimum_version_id = 1, + .needed = fpu_needed, ++ .pre_save = fpu_pre_save, + .post_load = fpu_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(env.fpcr, M68kCPU), +diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h +index 6cfc696d2b..4d78da9d5f 100644 +--- a/target/m68k/cpu.h ++++ b/target/m68k/cpu.h +@@ -199,7 +199,8 @@ void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t); + void cpu_m68k_set_sr(CPUM68KState *env, uint32_t); + void cpu_m68k_restore_fp_status(CPUM68KState *env); + void cpu_m68k_set_fpcr(CPUM68KState *env, uint32_t val); +- ++uint32_t cpu_m68k_get_fpsr(CPUM68KState *env); ++void cpu_m68k_set_fpsr(CPUM68KState *env, uint32_t val); + + /* + * Instead of computing the condition codes after each m68k instruction, +diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c +index ab120b5f59..8314791f50 100644 +--- a/target/m68k/fpu_helper.c ++++ b/target/m68k/fpu_helper.c +@@ -164,6 +164,78 @@ void HELPER(set_fpcr)(CPUM68KState *env, uint32_t val) + cpu_m68k_set_fpcr(env, val); + } + ++/* Convert host exception flags to cpu_m68k form. */ ++static int cpu_m68k_exceptbits_from_host(int host_bits) ++{ ++ int target_bits = 0; ++ ++ if (host_bits & float_flag_invalid) { ++ target_bits |= 0x80; ++ } ++ if (host_bits & float_flag_overflow) { ++ target_bits |= 0x40; ++ } ++ if (host_bits & (float_flag_underflow | float_flag_output_denormal)) { ++ target_bits |= 0x20; ++ } ++ if (host_bits & float_flag_divbyzero) { ++ target_bits |= 0x10; ++ } ++ if (host_bits & float_flag_inexact) { ++ target_bits |= 0x08; ++ } ++ return target_bits; ++} ++ ++/* Convert cpu_m68k exception flags to target form. */ ++static int cpu_m68k_exceptbits_to_host(int target_bits) ++{ ++ int host_bits = 0; ++ ++ if (target_bits & 0x80) { ++ host_bits |= float_flag_invalid; ++ } ++ if (target_bits & 0x40) { ++ host_bits |= float_flag_overflow; ++ } ++ if (target_bits & 0x20) { ++ host_bits |= float_flag_underflow; ++ } ++ if (target_bits & 0x10) { ++ host_bits |= float_flag_divbyzero; ++ } ++ if (target_bits & 0x08) { ++ host_bits |= float_flag_inexact; ++ } ++ return host_bits; ++} ++ ++uint32_t cpu_m68k_get_fpsr(CPUM68KState *env) ++{ ++ int host_flags = get_float_exception_flags(&env->fp_status); ++ int target_flags = cpu_m68k_exceptbits_from_host(host_flags); ++ int except = (env->fpsr & ~(0xf8)) | target_flags; ++ return except; ++} ++ ++uint32_t HELPER(get_fpsr)(CPUM68KState *env) ++{ ++ return cpu_m68k_get_fpsr(env); ++} ++ ++void cpu_m68k_set_fpsr(CPUM68KState *env, uint32_t val) ++{ ++ env->fpsr = val; ++ ++ int host_flags = cpu_m68k_exceptbits_to_host((int) env->fpsr); ++ set_float_exception_flags(host_flags, &env->fp_status); ++} ++ ++void HELPER(set_fpsr)(CPUM68KState *env, uint32_t val) ++{ ++ cpu_m68k_set_fpsr(env, val); ++} ++ + #define PREC_BEGIN(prec) \ + do { \ + FloatX80RoundPrec old = \ +diff --git a/target/m68k/helper.c b/target/m68k/helper.c +index 0a1544cd68..beab4b96bc 100644 +--- a/target/m68k/helper.c ++++ b/target/m68k/helper.c +@@ -118,7 +118,7 @@ static int m68k_fpu_gdb_get_reg(CPUM68KState *env, GByteArray *mem_buf, int n) + case 8: /* fpcontrol */ + return gdb_get_reg32(mem_buf, env->fpcr); + case 9: /* fpstatus */ +- return gdb_get_reg32(mem_buf, env->fpsr); ++ return gdb_get_reg32(mem_buf, cpu_m68k_get_fpsr(env)); + case 10: /* fpiar, not implemented */ + return gdb_get_reg32(mem_buf, 0); + } +@@ -137,7 +137,7 @@ static int m68k_fpu_gdb_set_reg(CPUM68KState *env, uint8_t *mem_buf, int n) + cpu_m68k_set_fpcr(env, ldl_p(mem_buf)); + return 4; + case 9: /* fpstatus */ +- env->fpsr = ldl_p(mem_buf); ++ cpu_m68k_set_fpsr(env, ldl_p(mem_buf)); + return 4; + case 10: /* fpiar, not implemented */ + return 4; +diff --git a/target/m68k/helper.h b/target/m68k/helper.h +index 2bbe0dc032..95aa5e53bb 100644 +--- a/target/m68k/helper.h ++++ b/target/m68k/helper.h +@@ -54,6 +54,8 @@ DEF_HELPER_4(fsdiv, void, env, fp, fp, fp) + DEF_HELPER_4(fddiv, void, env, fp, fp, fp) + DEF_HELPER_4(fsgldiv, void, env, fp, fp, fp) + DEF_HELPER_FLAGS_3(fcmp, TCG_CALL_NO_RWG, void, env, fp, fp) ++DEF_HELPER_2(set_fpsr, void, env, i32) ++DEF_HELPER_1(get_fpsr, i32, env) + DEF_HELPER_FLAGS_2(set_fpcr, TCG_CALL_NO_RWG, void, env, i32) + DEF_HELPER_FLAGS_2(ftst, TCG_CALL_NO_RWG, void, env, fp) + DEF_HELPER_3(fconst, void, env, fp, i32) +diff --git a/target/m68k/translate.c b/target/m68k/translate.c +index 4a0b0b2703..f8eeb70379 100644 +--- a/target/m68k/translate.c ++++ b/target/m68k/translate.c +@@ -4686,7 +4686,7 @@ static void gen_load_fcr(DisasContext *s, TCGv res, int reg) + tcg_gen_movi_i32(res, 0); + break; + case M68K_FPSR: +- tcg_gen_ld_i32(res, tcg_env, offsetof(CPUM68KState, fpsr)); ++ gen_helper_get_fpsr(res, tcg_env); + break; + case M68K_FPCR: + tcg_gen_ld_i32(res, tcg_env, offsetof(CPUM68KState, fpcr)); +@@ -4700,7 +4700,7 @@ static void gen_store_fcr(DisasContext *s, TCGv val, int reg) + case M68K_FPIAR: + break; + case M68K_FPSR: +- tcg_gen_st_i32(val, tcg_env, offsetof(CPUM68KState, fpsr)); ++ gen_helper_set_fpsr(tcg_env, val); + break; + case M68K_FPCR: + gen_helper_set_fpcr(tcg_env, val); +-- +2.41.0.windows.1 + diff --git a/target-ppc-Fix-lxv-stxv-MSR-facility-check.patch b/target-ppc-Fix-lxv-stxv-MSR-facility-check.patch new file mode 100644 index 0000000000000000000000000000000000000000..1afd8e3268ea0951882a6e8ff73c351bbc8b7b9f --- /dev/null +++ b/target-ppc-Fix-lxv-stxv-MSR-facility-check.patch @@ -0,0 +1,49 @@ +From a8b171a0e5be721ee173a533f98594f62b0f0250 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Sun, 29 Sep 2024 07:07:36 -0400 +Subject: [PATCH] target/ppc: Fix lxv/stxv MSR facility check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 2cc0e449d17310877fb28a942d4627ad22bb68ea + +The move to decodetree flipped the inequality test for the VEC / VSX +MSR facility check. + +This caused application crashes under Linux, where these facility +unavailable interrupts are used for lazy-switching of VEC/VSX register +sets. Getting the incorrect interrupt would result in wrong registers +being loaded, potentially overwriting live values and/or exposing +stale ones. + +Cc: qemu-stable@nongnu.org +Reported-by: Joel Stanley +Fixes: 70426b5bb738 ("target/ppc: moved stxvx and lxvx from legacy to decodtree") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1769 +Reviewed-by: Harsh Prateek Bora +Tested-by: Harsh Prateek Bora +Reviewed-by: Cédric Le Goater +Tested-by: Cédric Le Goater +Signed-off-by: Nicholas Piggin +Signed-off-by: qihao_yewu +--- + target/ppc/translate/vsx-impl.c.inc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc +index 6db87ab336..0266f09119 100644 +--- a/target/ppc/translate/vsx-impl.c.inc ++++ b/target/ppc/translate/vsx-impl.c.inc +@@ -2268,7 +2268,7 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, + + static bool do_lstxv_D(DisasContext *ctx, arg_D *a, bool store, bool paired) + { +- if (paired || a->rt >= 32) { ++ if (paired || a->rt < 32) { + REQUIRE_VSX(ctx); + } else { + REQUIRE_VECTOR(ctx); +-- +2.41.0.windows.1 + diff --git a/target-ppc-Fix-lxvx-stxvx-facility-check.patch b/target-ppc-Fix-lxvx-stxvx-facility-check.patch new file mode 100644 index 0000000000000000000000000000000000000000..45c7c5efd5cce0952277c4f36f6e1d61e8cdd0f6 --- /dev/null +++ b/target-ppc-Fix-lxvx-stxvx-facility-check.patch @@ -0,0 +1,63 @@ +From 67ce79a910ab02d8c1e08a9ebfa6c5aae2e9d5af Mon Sep 17 00:00:00 2001 +From: qihao_ss +Date: Sun, 29 Sep 2024 06:44:29 -0400 +Subject: [PATCH] target/ppc: Fix lxvx/stxvx facility check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 8bded2e73e80823a67f730140788a3c5e60bf4b5 + +The XT check for the lxvx/stxvx instructions is currently +inverted. This was introduced during the move to decodetree. + +>From the ISA: + Chapter 7. Vector-Scalar Extension Facility + Load VSX Vector Indexed X-form + + lxvx XT,RA,RB + if TX=0 & MSR.VSX=0 then VSX_Unavailable() + if TX=1 & MSR.VEC=0 then Vector_Unavailable() + ... + Let XT be the value 32×TX + T. + +The code currently does the opposite: + + if (paired || a->rt >= 32) { + REQUIRE_VSX(ctx); + } else { + REQUIRE_VECTOR(ctx); + } + +This was already fixed for lxv/stxv at commit "2cc0e449d1 (target/ppc: +Fix lxv/stxv MSR facility check)", but the indexed forms were missed. + +Cc: qemu-stable@nongnu.org +Fixes: 70426b5bb7 ("target/ppc: moved stxvx and lxvx from legacy to decodtree") +Signed-off-by: Fabiano Rosas +Reviewed-by: Claudio Fontana +Acked-by: Ilya Leoshkevich +Reviewed-by: Fabiano Rosas +Message-ID: <20240911141651.6914-1-farosas@suse.de> +Signed-off-by: Richard Henderson +Signed-off-by: qihao_yewu +--- + target/ppc/translate/vsx-impl.c.inc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc +index 6db87ab336..a2020da9fd 100644 +--- a/target/ppc/translate/vsx-impl.c.inc ++++ b/target/ppc/translate/vsx-impl.c.inc +@@ -2292,7 +2292,7 @@ static bool do_lstxv_PLS_D(DisasContext *ctx, arg_PLS_D *a, + + static bool do_lstxv_X(DisasContext *ctx, arg_X *a, bool store, bool paired) + { +- if (paired || a->rt >= 32) { ++ if (paired || a->rt < 32) { + REQUIRE_VSX(ctx); + } else { + REQUIRE_VECTOR(ctx); +-- +2.41.0.windows.1 + diff --git a/target-ppc-Fix-migration-of-CPUs-with-TLB_EMB-TLB-ty.patch b/target-ppc-Fix-migration-of-CPUs-with-TLB_EMB-TLB-ty.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1b94fe03fd2a74df848b2f520a00f589067f9ea --- /dev/null +++ b/target-ppc-Fix-migration-of-CPUs-with-TLB_EMB-TLB-ty.patch @@ -0,0 +1,55 @@ +From fa282d002c45e1cc1cca6a2541b75ab5889c8c01 Mon Sep 17 00:00:00 2001 +From: Arman Nabiev +Date: Thu, 22 Aug 2024 19:56:53 +0300 +Subject: [PATCH] target/ppc: Fix migration of CPUs with TLB_EMB TLB type + +In vmstate_tlbemb a cut-and-paste error meant we gave +this vmstate subsection the same "cpu/tlb6xx" name as +the vmstate_tlb6xx subsection. This breaks migration load +for any CPU using the TLB_EMB CPU type, because when we +see the "tlb6xx" name in the incoming data we try to +interpret it as a vmstate_tlb6xx subsection, which it +isn't the right format for: + + $ qemu-system-ppc -drive + if=none,format=qcow2,file=/home/petmay01/test-images/virt/dummy.qcow2 + -monitor stdio -M bamboo + QEMU 9.0.92 monitor - type 'help' for more information + (qemu) savevm foo + (qemu) loadvm foo + Missing section footer for cpu + Error: Error -22 while loading VM state + +Correct the incorrect vmstate section name. Since migration +for these CPU types was completely broken before, we don't +need to care that this is a migration compatibility break. + +This affects the PPC 405, 440, 460 and e200 CPU families. + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2522 +Reviewed-by: Peter Maydell +Signed-off-by: Arman Nabiev +Signed-off-by: Fabiano Rosas +(cherry picked from commit 203beb6f047467a4abfc8267c234393cea3f471c) +Signed-off-by: zhujun2 +--- + target/ppc/machine.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/ppc/machine.c b/target/ppc/machine.c +index 68cbdffecd..3e010f3a07 100644 +--- a/target/ppc/machine.c ++++ b/target/ppc/machine.c +@@ -621,7 +621,7 @@ static bool tlbemb_needed(void *opaque) + } + + static const VMStateDescription vmstate_tlbemb = { +- .name = "cpu/tlb6xx", ++ .name = "cpu/tlbemb", + .version_id = 1, + .minimum_version_id = 1, + .needed = tlbemb_needed, +-- +2.41.0.windows.1 + diff --git a/target-riscv-Avoid-bad-shift-in-riscv_cpu_do_interru.patch b/target-riscv-Avoid-bad-shift-in-riscv_cpu_do_interru.patch new file mode 100644 index 0000000000000000000000000000000000000000..45007fbd7df42dadfe6c9e171dcec3510a9cd0bc --- /dev/null +++ b/target-riscv-Avoid-bad-shift-in-riscv_cpu_do_interru.patch @@ -0,0 +1,62 @@ +From e52a2122cb1574723c7c8181ba751cc0ff37648e Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 12 Dec 2024 09:46:18 +0800 +Subject: [PATCH] target/riscv: Avoid bad shift in riscv_cpu_do_interrupt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 5311599cdc48337f2f27b1b51a80d46d75b05ed0 + +In riscv_cpu_do_interrupt() we use the 'cause' value we got out of +cs->exception as a shift value. However this value can be larger +than 31, which means that "1 << cause" is undefined behaviour, +because we do the shift on an 'int' type. + +This causes the undefined behaviour sanitizer to complain +on one of the check-tcg tests: + +$ UBSAN_OPTIONS=print_stacktrace=1:abort_on_error=1:halt_on_error=1 ./build/clang/qemu-system-riscv64 -M virt -semihosting -display none -device loader,file=build/clang/tests/tcg/riscv64-softmmu/issue1060 +../../target/riscv/cpu_helper.c:1805:38: runtime error: shift exponent 63 is too large for 32-bit type 'int' + #0 0x55f2dc026703 in riscv_cpu_do_interrupt /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/clang/../../target/riscv/cpu_helper.c:1805:38 + #1 0x55f2dc3d170e in cpu_handle_exception /mnt/nvmedisk/linaro/qemu-from-laptop/qemu/build/clang/../../accel/tcg/cpu-exec.c:752:9 + +In this case cause is RISCV_EXCP_SEMIHOST, which is 0x3f. + +Use 1ULL instead to ensure that the shift is in range. + +Signed-off-by: Peter Maydell +Fixes: 1697837ed9 ("target/riscv: Add M-mode virtual interrupt and IRQ filtering support.") +Fixes: 40336d5b1d ("target/riscv: Add HS-mode virtual interrupt and IRQ filtering support.") +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Richard Henderson +Reviewed-by: Alistair Francis +Message-ID: <20241128103831.3452572-1-peter.maydell@linaro.org> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Jiao +--- + target/riscv/cpu_helper.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c +index e7e23b34f4..4d8f1248dd 100644 +--- a/target/riscv/cpu_helper.c ++++ b/target/riscv/cpu_helper.c +@@ -1644,10 +1644,10 @@ void riscv_cpu_do_interrupt(CPUState *cs) + bool async = !!(cs->exception_index & RISCV_EXCP_INT_FLAG); + target_ulong cause = cs->exception_index & RISCV_EXCP_INT_MASK; + uint64_t deleg = async ? env->mideleg : env->medeleg; +- bool s_injected = env->mvip & (1 << cause) & env->mvien && +- !(env->mip & (1 << cause)); +- bool vs_injected = env->hvip & (1 << cause) & env->hvien && +- !(env->mip & (1 << cause)); ++ bool s_injected = env->mvip & (1ULL << cause) & env->mvien && ++ !(env->mip & (1ULL << cause)); ++ bool vs_injected = env->hvip & (1ULL << cause) & env->hvien && ++ !(env->mip & (1ULL << cause)); + target_ulong tval = 0; + target_ulong tinst = 0; + target_ulong htval = 0; +-- +2.41.0.windows.1 + diff --git a/target-riscv-Fix-the-element-agnostic-function-probl.patch b/target-riscv-Fix-the-element-agnostic-function-probl.patch new file mode 100644 index 0000000000000000000000000000000000000000..6951aa9cc634518b278225555d866d1945b082a9 --- /dev/null +++ b/target-riscv-Fix-the-element-agnostic-function-probl.patch @@ -0,0 +1,60 @@ +From 194c3cadc1879ff4c3d2fc6c5f962ad751c83d9c Mon Sep 17 00:00:00 2001 +From: Huang Tao +Date: Mon, 25 Mar 2024 10:16:54 +0800 +Subject: [PATCH] target/riscv: Fix the element agnostic function problem + +In RVV and vcrypto instructions, the masked and tail elements are set to 1s +using vext_set_elems_1s function if the vma/vta bit is set. It is the element +agnostic policy. + +However, this function can't deal the big endian situation. This patch fixes +the problem by adding handling of such case. + +Signed-off-by: Huang Tao +Suggested-by: Richard Henderson +Reviewed-by: LIU Zhiwei +Cc: qemu-stable +Message-ID: <20240325021654.6594-1-eric.huang@linux.alibaba.com> +Signed-off-by: Alistair Francis +(cherry picked from commit 75115d880c6d396f8a2d56aab8c12236d85a90e0) +Signed-off-by: zhujun2 +--- + target/riscv/vector_internals.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c +index 9cf5c17cde..be6eb040d2 100644 +--- a/target/riscv/vector_internals.c ++++ b/target/riscv/vector_internals.c +@@ -29,6 +29,28 @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, + if (tot - cnt == 0) { + return ; + } ++ ++ if (HOST_BIG_ENDIAN) { ++ /* ++ * Deal the situation when the elements are insdie ++ * only one uint64 block including setting the ++ * masked-off element. ++ */ ++ if (((tot - 1) ^ cnt) < 8) { ++ memset(base + H1(tot - 1), -1, tot - cnt); ++ return; ++ } ++ /* ++ * Otherwise, at least cross two uint64_t blocks. ++ * Set first unaligned block. ++ */ ++ if (cnt % 8 != 0) { ++ uint32_t j = ROUND_UP(cnt, 8); ++ memset(base + H1(j - 1), -1, j - cnt); ++ cnt = j; ++ } ++ /* Set other 64bit aligend blocks */ ++ } + memset(base + cnt, -1, tot - cnt); + } + +-- +2.41.0.windows.1 + diff --git a/target-riscv-Fix-vcompress-with-rvv_ta_all_1s.patch b/target-riscv-Fix-vcompress-with-rvv_ta_all_1s.patch new file mode 100644 index 0000000000000000000000000000000000000000..351d8d91e2851f830cbdc0e6451abca87b7e517f --- /dev/null +++ b/target-riscv-Fix-vcompress-with-rvv_ta_all_1s.patch @@ -0,0 +1,39 @@ +From 0d93daee2da62d0e86d99fd561d2a973c9634d1f Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 18 Nov 2024 22:32:53 -0500 +Subject: [PATCH] target/riscv: Fix vcompress with rvv_ta_all_1s + +cheery-pick from c128d39edeff337220fc536a3e935bcba01ecb49 + +vcompress packs vl or less fields into vd, so the tail starts after the +last packed field. This could be more clearly expressed in the ISA, +but for now this thread helps to explain it: + +https://github.com/riscv/riscv-v-spec/issues/796 + +Signed-off-by: Anton Blanchard +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Alistair Francis +Message-ID: <20241030043538.939712-1-antonb@tenstorrent.com> +Signed-off-by: Alistair Francis +Signed-off-by: qihao_yewu +--- + target/riscv/vector_helper.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c +index c1c3a4d1ea..42ffd3a68a 100644 +--- a/target/riscv/vector_helper.c ++++ b/target/riscv/vector_helper.c +@@ -5045,7 +5045,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + } \ + env->vstart = 0; \ + /* set tail elements to 1s */ \ +- vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ ++ vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ + } + + /* Compress into vd elements of vs2 where vs1 is enabled */ +-- +2.41.0.windows.1 + diff --git a/target-riscv-SMBIOS-support-for-RISC-V-virt-machine.patch b/target-riscv-SMBIOS-support-for-RISC-V-virt-machine.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c96662c5bb968580c65fb0266e7b58b544f6be3 --- /dev/null +++ b/target-riscv-SMBIOS-support-for-RISC-V-virt-machine.patch @@ -0,0 +1,109 @@ +From 5f27d198462966c13dc60e32be48978ecc987698 Mon Sep 17 00:00:00 2001 +From: yechao-w +Date: Fri, 30 May 2025 09:12:20 +0800 +Subject: [PATCH] target/riscv: SMBIOS support for RISC-V virt machine + +commit ecf286478475d11ae4cdef7e52d9c8e1672f2868 upstream + +Generate SMBIOS tables for the RISC-V mach-virt. +Add CONFIG_SMBIOS=y to the RISC-V default config. +Set the default processor family in the type 4 table. + +The implementation is based on the corresponding ARM and Loongson code. + +With the patch the following firmware tables are provided: + + etc/smbios/smbios-anchor + etc/smbios/smbios-tables + +Signed-off-by: Heinrich Schuchardt +Reviewed-by: Andrew Jones +Message-ID: <20240123184229.10415-4-heinrich.schuchardt@canonical.com> +Signed-off-by: Alistair Francis +--- + hw/riscv/Kconfig | 1 + + hw/riscv/virt.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 43 insertions(+) + +diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig +index b6a5eb4452..1e11ac9432 100644 +--- a/hw/riscv/Kconfig ++++ b/hw/riscv/Kconfig +@@ -41,6 +41,7 @@ config RISCV_VIRT + select RISCV_IMSIC + select SIFIVE_PLIC + select SIFIVE_TEST ++ select SMBIOS + select VIRTIO_MMIO + select FW_CFG_DMA + select PLATFORM_BUS +diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c +index d2eac24156..9b29ed1108 100644 +--- a/hw/riscv/virt.c ++++ b/hw/riscv/virt.c +@@ -36,6 +36,7 @@ + #include "hw/riscv/boot.h" + #include "hw/riscv/numa.h" + #include "kvm/kvm_riscv.h" ++#include "hw/firmware/smbios.h" + #include "hw/intc/riscv_aclint.h" + #include "hw/intc/riscv_aplic.h" + #include "hw/intc/riscv_imsic.h" +@@ -1249,6 +1250,45 @@ static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) + sysbus_mmio_get_region(sysbus, 0)); + } + ++static void virt_build_smbios(RISCVVirtState *s) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(s); ++ MachineState *ms = MACHINE(s); ++ uint8_t *smbios_tables, *smbios_anchor; ++ size_t smbios_tables_len, smbios_anchor_len; ++ struct smbios_phys_mem_area mem_array; ++ const char *product = "QEMU Virtual Machine"; ++ ++ if (kvm_enabled()) { ++ product = "KVM Virtual Machine"; ++ } ++ ++ smbios_set_defaults("QEMU", product, mc->name, false, ++ true, SMBIOS_ENTRY_POINT_TYPE_64); ++ ++ if (riscv_is_32bit(&s->soc[0])) { ++ smbios_set_default_processor_family(0x200); ++ } else { ++ smbios_set_default_processor_family(0x201); ++ } ++ ++ /* build the array of physical mem area from base_memmap */ ++ mem_array.address = s->memmap[VIRT_DRAM].base; ++ mem_array.length = ms->ram_size; ++ ++ smbios_get_tables(ms, &mem_array, 1, ++ &smbios_tables, &smbios_tables_len, ++ &smbios_anchor, &smbios_anchor_len, ++ &error_fatal); ++ ++ if (smbios_anchor) { ++ fw_cfg_add_file(s->fw_cfg, "etc/smbios/smbios-tables", ++ smbios_tables, smbios_tables_len); ++ fw_cfg_add_file(s->fw_cfg, "etc/smbios/smbios-anchor", ++ smbios_anchor, smbios_anchor_len); ++ } ++} ++ + static void virt_machine_done(Notifier *notifier, void *data) + { + RISCVVirtState *s = container_of(notifier, RISCVVirtState, +@@ -1337,6 +1377,8 @@ static void virt_machine_done(Notifier *notifier, void *data) + riscv_setup_direct_kernel(kernel_entry, fdt_load_addr); + } + ++ virt_build_smbios(s); ++ + if (virt_is_acpi_enabled(s)) { + virt_acpi_setup(s); + } +-- +2.33.0 + diff --git a/target-riscv-cpu.c-fix-Zvkb-extension-config.patch b/target-riscv-cpu.c-fix-Zvkb-extension-config.patch new file mode 100644 index 0000000000000000000000000000000000000000..3043991ce6f221ef8ac9101a9b27d29c26fb92af --- /dev/null +++ b/target-riscv-cpu.c-fix-Zvkb-extension-config.patch @@ -0,0 +1,43 @@ +From c7c526af0bb4de631e2e5f1d38518beb8fa5a8a4 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Wed, 5 Jun 2024 15:21:06 +0800 +Subject: [PATCH] target/riscv/cpu.c: fix Zvkb extension config +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from ff33b7a9699e977a050a1014c617a89da1bf8295 + +This code has a typo that writes zvkb to zvkg, causing users can't +enable zvkb through the config. This patch gets this fixed. + +Signed-off-by: Yangyu Chen +Fixes: ea61ef7097d0 ("target/riscv: Move vector crypto extensions to riscv_cpu_extensions") +Reviewed-by: LIU Zhiwei +Reviewed-by: Alistair Francis +Reviewed-by: Max Chou +Reviewed-by:  Weiwei Li +Message-ID: +Cc: qemu-stable +Signed-off-by: Alistair Francis +Signed-off-by: qihao_yewu +--- + target/riscv/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c +index 83c7c0cf07..77cb59b8a1 100644 +--- a/target/riscv/cpu.c ++++ b/target/riscv/cpu.c +@@ -1359,7 +1359,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { + /* Vector cryptography extensions */ + MULTI_EXT_CFG_BOOL("zvbb", ext_zvbb, false), + MULTI_EXT_CFG_BOOL("zvbc", ext_zvbc, false), +- MULTI_EXT_CFG_BOOL("zvkb", ext_zvkg, false), ++ MULTI_EXT_CFG_BOOL("zvkb", ext_zvkb, false), + MULTI_EXT_CFG_BOOL("zvkg", ext_zvkg, false), + MULTI_EXT_CFG_BOOL("zvkned", ext_zvkned, false), + MULTI_EXT_CFG_BOOL("zvknha", ext_zvknha, false), +-- +2.41.0.windows.1 + diff --git a/target-riscv-csr.c-Fix-an-access-to-VXSAT.patch b/target-riscv-csr.c-Fix-an-access-to-VXSAT.patch new file mode 100644 index 0000000000000000000000000000000000000000..50e9e6ff2053f94245cc97f562f805e34bd27b35 --- /dev/null +++ b/target-riscv-csr.c-Fix-an-access-to-VXSAT.patch @@ -0,0 +1,52 @@ +From 6b1b8553ea3810e497d225d64e98dd6eac7b4e2c Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Tue, 5 Nov 2024 03:21:00 -0500 +Subject: [PATCH] target/riscv/csr.c: Fix an access to VXSAT + +cheery-pick from 5a60026cad4e9dba929cab4f63229e4b9110cf0a + +The register VXSAT should be RW only to the first bit. +The remaining bits should be 0. + +The RISC-V Instruction Set Manual Volume I: Unprivileged Architecture + +The vxsat CSR has a single read-write least-significant bit (vxsat[0]) +that indicates if a fixed-point instruction has had to saturate an output +value to fit into a destination format. Bits vxsat[XLEN-1:1] +should be written as zeros. + +Signed-off-by: Evgenii Prokopiev +Reviewed-by: Daniel Henrique Barboza +Reviewed-by: Alistair Francis +Message-ID: <20241002084436.89347-1-evgenii.prokopiev@syntacore.com> +Signed-off-by: Alistair Francis +Signed-off-by: qihao_yewu +--- + target/riscv/csr.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/riscv/csr.c b/target/riscv/csr.c +index fde7ce1a53..d1bb7bc0d3 100644 +--- a/target/riscv/csr.c ++++ b/target/riscv/csr.c +@@ -704,7 +704,7 @@ static RISCVException write_vxrm(CPURISCVState *env, int csrno, + static RISCVException read_vxsat(CPURISCVState *env, int csrno, + target_ulong *val) + { +- *val = env->vxsat; ++ *val = env->vxsat & BIT(0); + return RISCV_EXCP_NONE; + } + +@@ -714,7 +714,7 @@ static RISCVException write_vxsat(CPURISCVState *env, int csrno, + #if !defined(CONFIG_USER_ONLY) + env->mstatus |= MSTATUS_VS; + #endif +- env->vxsat = val; ++ env->vxsat = val & BIT(0); + return RISCV_EXCP_NONE; + } + +-- +2.41.0.windows.1 + diff --git a/target-riscv-kvm-tolerate-KVM-disable-ext-errors.patch b/target-riscv-kvm-tolerate-KVM-disable-ext-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..c8f6257f9bfd7a1643ce9a4f8f8f672d51fcfe59 --- /dev/null +++ b/target-riscv-kvm-tolerate-KVM-disable-ext-errors.patch @@ -0,0 +1,69 @@ +From 33d8e65f37caa34bf0c18a3ecbaa48d3b706564b Mon Sep 17 00:00:00 2001 +From: Daniel Henrique Barboza +Date: Mon, 22 Apr 2024 14:14:25 -0300 +Subject: [PATCH] target/riscv/kvm: tolerate KVM disable ext errors + +Running a KVM guest using a 6.9-rc3 kernel, in a 6.8 host that has zkr +enabled, will fail with a kernel oops SIGILL right at the start. The +reason is that we can't expose zkr without implementing the SEED CSR. +Disabling zkr in the guest would be a workaround, but if the KVM doesn't +allow it we'll error out and never boot. + +In hindsight this is too strict. If we keep proceeding, despite not +disabling the extension in the KVM vcpu, we'll not add the extension in +the riscv,isa. The guest kernel will be unaware of the extension, i.e. +it doesn't matter if the KVM vcpu has it enabled underneath or not. So +it's ok to keep booting in this case. + +Change our current logic to not error out if we fail to disable an +extension in kvm_set_one_reg(), but show a warning and keep booting. It +is important to throw a warning because we must make the user aware that +the extension is still available in the vcpu, meaning that an +ill-behaved guest can ignore the riscv,isa settings and use the +extension. + +The case we're handling happens with an EINVAL error code. If we fail to +disable the extension in KVM for any other reason, error out. + +We'll also keep erroring out when we fail to enable an extension in KVM, +since adding the extension in riscv,isa at this point will cause a guest +malfunction because the extension isn't enabled in the vcpu. + +Suggested-by: Andrew Jones +Signed-off-by: Daniel Henrique Barboza +Reviewed-by: Andrew Jones +Cc: qemu-stable +Message-ID: <20240422171425.333037-2-dbarboza@ventanamicro.com> +Signed-off-by: Alistair Francis +(cherry picked from commit 1215d45b2aa97512a2867e401aa59f3d0c23cb23) +Signed-off-by: zhujun2 +--- + target/riscv/kvm/kvm-cpu.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c +index 45b6cf1cfa..b3dc2070f9 100644 +--- a/target/riscv/kvm/kvm-cpu.c ++++ b/target/riscv/kvm/kvm-cpu.c +@@ -369,10 +369,14 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) + reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg); + ret = kvm_set_one_reg(cs, id, ®); + if (ret != 0) { +- error_report("Unable to %s extension %s in KVM, error %d", +- reg ? "enable" : "disable", +- multi_ext_cfg->name, ret); +- exit(EXIT_FAILURE); ++ if (!reg && ret == -EINVAL) { ++ warn_report("KVM cannot disable extension %s", ++ multi_ext_cfg->name); ++ } else { ++ error_report("Unable to enable extension %s in KVM, error %d", ++ multi_ext_cfg->name, ret); ++ exit(EXIT_FAILURE); ++ } + } + } + } +-- +2.41.0.windows.1 + diff --git a/target-riscv-vector_helper.c-fix-vmvr_v-memcpy-endia.patch b/target-riscv-vector_helper.c-fix-vmvr_v-memcpy-endia.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b6c5646dd1da0e10897d8300eea816ec6c232d3 --- /dev/null +++ b/target-riscv-vector_helper.c-fix-vmvr_v-memcpy-endia.patch @@ -0,0 +1,49 @@ +From 1c6b234766bae8c2b518cfd882e8907b831d8d03 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 11 Jan 2025 11:10:29 +0800 +Subject: [PATCH] target/riscv/vector_helper.c: fix 'vmvr_v' memcpy endianess + +cherry-pick from 768e7b329c0be22035da077fe76221dd0a47103b + +vmvr_v isn't handling the case where the host might be big endian and +the bytes to be copied aren't sequential. + +Suggested-by: Richard Henderson +Fixes: f714361ed7 ("target/riscv: rvv-1.0: implement vstart CSR") +Signed-off-by: Daniel Henrique Barboza +Reviewed-by: Alistair Francis +Reviewed-by: LIU Zhiwei +Reviewed-by: Richard Henderson +Message-ID: <20240314175704.478276-4-dbarboza@ventanamicro.com> +Signed-off-by: Alistair Francis +Signed-off-by: gubin +--- + target/riscv/vector_helper.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c +index 42ffd3a68a..351842f66a 100644 +--- a/target/riscv/vector_helper.c ++++ b/target/riscv/vector_helper.c +@@ -5063,9 +5063,17 @@ void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) + uint32_t startb = env->vstart * sewb; + uint32_t i = startb; + ++ if (HOST_BIG_ENDIAN && i % 8 != 0) { ++ uint32_t j = ROUND_UP(i, 8); ++ memcpy((uint8_t *)vd + H1(j - 1), ++ (uint8_t *)vs2 + H1(j - 1), ++ j - i); ++ i = j; ++ } ++ + memcpy((uint8_t *)vd + H1(i), + (uint8_t *)vs2 + H1(i), +- maxsz - startb); ++ maxsz - i); + + env->vstart = 0; + } +-- +2.41.0.windows.1 + diff --git a/target-riscv-vector_helper.c-optimize-loops-in-ldst-.patch b/target-riscv-vector_helper.c-optimize-loops-in-ldst-.patch new file mode 100644 index 0000000000000000000000000000000000000000..5e608a2b8875cd398c7b76d0827478a94c46ccce --- /dev/null +++ b/target-riscv-vector_helper.c-optimize-loops-in-ldst-.patch @@ -0,0 +1,56 @@ +From a820983749a2d3eebcc36b5a3ae34436fd52db45 Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 11 Jan 2025 10:54:33 +0800 +Subject: [PATCH] target/riscv/vector_helper.c: optimize loops in ldst helpers + +cherry-pick from 0a11629c915f61df798919db51a18ffe4649cb65 + +Change the for loops in ldst helpers to do a single increment in the +counter, and assign it env->vstart, to avoid re-reading from vstart +every time. + +Suggested-by: Richard Henderson +Signed-off-by: Daniel Henrique Barboza +Reviewed-by: Alistair Francis +Reviewed-by: Richard Henderson +Message-ID: <20240314175704.478276-11-dbarboza@ventanamicro.com> +Signed-off-by: Alistair Francis +Signed-off-by: gubin +--- + target/riscv/vector_helper.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c +index 42ffd3a68a..b5acf81cc0 100644 +--- a/target/riscv/vector_helper.c ++++ b/target/riscv/vector_helper.c +@@ -196,7 +196,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, + uint32_t esz = 1 << log2_esz; + uint32_t vma = vext_vma(desc); + +- for (i = env->vstart; i < env->vl; i++, env->vstart++) { ++ for (i = env->vstart; i < env->vl; env->vstart = ++i) { + k = 0; + while (k < nf) { + if (!vm && !vext_elem_mask(v0, i)) { +@@ -262,7 +262,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, + uint32_t esz = 1 << log2_esz; + + /* load bytes from guest memory */ +- for (i = env->vstart; i < evl; i++, env->vstart++) { ++ for (i = env->vstart; i < evl; env->vstart = ++i) { + k = 0; + while (k < nf) { + target_ulong addr = base + ((i * nf + k) << log2_esz); +@@ -376,7 +376,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, + uint32_t vma = vext_vma(desc); + + /* load bytes from guest memory */ +- for (i = env->vstart; i < env->vl; i++, env->vstart++) { ++ for (i = env->vstart; i < env->vl; env->vstart = ++i) { + k = 0; + while (k < nf) { + if (!vm && !vext_elem_mask(v0, i)) { +-- +2.41.0.windows.1 + diff --git a/target-riscv-vector_helper.c-set-vstart-0-in-GEN_VEX.patch b/target-riscv-vector_helper.c-set-vstart-0-in-GEN_VEX.patch new file mode 100644 index 0000000000000000000000000000000000000000..9ba58011d5e3cf58360cc9fb5bb673699237b04c --- /dev/null +++ b/target-riscv-vector_helper.c-set-vstart-0-in-GEN_VEX.patch @@ -0,0 +1,37 @@ +From a7209a19e2d730fed5f52fda44aaa24e8de8a81c Mon Sep 17 00:00:00 2001 +From: gubin +Date: Sat, 11 Jan 2025 10:46:10 +0800 +Subject: [PATCH] target/riscv/vector_helper.c: set vstart = 0 in + GEN_VEXT_VSLIDEUP_VX() + +cherry-pick from d3646e31ce6d1e02e46e6eabdbc2e637c0cbece7 + +The helper isn't setting env->vstart = 0 after its execution, as it is +expected from every vector instruction that completes successfully. + +Signed-off-by: Daniel Henrique Barboza +Reviewed-by: Richard Henderson +Reviewed-by: Alistair Francis +Reviewed-by: LIU Zhiwei +Message-ID: <20240314175704.478276-2-dbarboza@ventanamicro.com> +Signed-off-by: Alistair Francis +Signed-off-by: gubin +--- + target/riscv/vector_helper.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c +index 42ffd3a68a..e69b68ba43 100644 +--- a/target/riscv/vector_helper.c ++++ b/target/riscv/vector_helper.c +@@ -4770,6 +4770,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + } \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ + } \ ++ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ + } +-- +2.41.0.windows.1 + diff --git a/target-s390x-Fix-a-typo-in-s390_cpu_class_init.patch b/target-s390x-Fix-a-typo-in-s390_cpu_class_init.patch new file mode 100644 index 0000000000000000000000000000000000000000..07f7fc11dbf65f7f599334f95bc58d2ba5a4d9f0 --- /dev/null +++ b/target-s390x-Fix-a-typo-in-s390_cpu_class_init.patch @@ -0,0 +1,39 @@ +From 52cc8f5a9ba854268a58402d351d2fd43dddb1b4 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 7 Apr 2025 17:54:20 -0400 +Subject: [PATCH] target/s390x: Fix a typo in s390_cpu_class_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 6a93b1c7b4cfa4f5e3c0b8a17177ce14aaa2346c + +Replace the comma at the end of the line by a semicolon. + +Fixes: 41868f846d2 ("s390x/cpumodel: "host" and "qemu" as CPU subclasses") +Reviewed-by: Richard Henderson +Reviewed-by: Thomas Huth +Signed-off-by: Philippe Mathieu-Daudé +Message-ID: <20250324165356.39540-1-philmd@linaro.org> +Signed-off-by: Thomas Huth +Signed-off-by: qihao_yewu +--- + target/s390x/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index 6acfa1c91b..5e64f24cc2 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -350,7 +350,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) + device_class_set_parent_reset(dc, s390_cpu_reset_full, &scc->parent_reset); + + scc->reset = s390_cpu_reset; +- cc->class_by_name = s390_cpu_class_by_name, ++ cc->class_by_name = s390_cpu_class_by_name; + cc->has_work = s390_cpu_has_work; + cc->dump_state = s390_cpu_dump_state; + cc->query_cpu_fast = s390_query_cpu_fast; +-- +2.41.0.windows.1 + diff --git a/target-sparc-use-signed-denominator-in-sdiv-helper.patch b/target-sparc-use-signed-denominator-in-sdiv-helper.patch new file mode 100644 index 0000000000000000000000000000000000000000..50ab7ee15028c10dd7d96b849698b40ac3d6648c --- /dev/null +++ b/target-sparc-use-signed-denominator-in-sdiv-helper.patch @@ -0,0 +1,41 @@ +From a222f9c1eea20db470c55f534d85987df27a1654 Mon Sep 17 00:00:00 2001 +From: Xu Zheng +Date: Fri, 19 Jul 2024 22:45:21 +0800 +Subject: [PATCH] target/sparc: use signed denominator in sdiv helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The result has to be done with the signed denominator (b32) instead of +the unsigned value passed in argument (b). + +cherry-pick from 6b4965373e561b77f91cfbdf41353635c9661358 +Fixes: 1326010322d6 ("target/sparc: Remove CC_OP_DIV") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2319 +Signed-off-by: Clément Chigot +Reviewed-by: Richard Henderson +Message-Id: <20240606144331.698361-1-chigot@adacore.com> +Signed-off-by: Richard Henderson +(cherry picked from commit 6b4965373e561b77f91cfbdf41353635c9661358) +Signed-off-by: Michael Tokarev +Signed-off-by: Xu Zheng +--- + target/sparc/helper.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/sparc/helper.c b/target/sparc/helper.c +index bd10b60e4b..8820c59e7c 100644 +--- a/target/sparc/helper.c ++++ b/target/sparc/helper.c +@@ -121,7 +121,7 @@ uint64_t helper_sdiv(CPUSPARCState *env, target_ulong a, target_ulong b) + return (uint32_t)(b32 < 0 ? INT32_MAX : INT32_MIN) | (-1ull << 32); + } + +- a64 /= b; ++ a64 /= b32; + r = a64; + if (unlikely(r != a64)) { + return (uint32_t)(a64 < 0 ? INT32_MIN : INT32_MAX) | (-1ull << 32); +-- +2.41.0.windows.1 + diff --git a/tcg-Allow-top-bit-of-SIMD_DATA_BITS-to-be-set-in-sim.patch b/tcg-Allow-top-bit-of-SIMD_DATA_BITS-to-be-set-in-sim.patch new file mode 100644 index 0000000000000000000000000000000000000000..28535595883614649dbdae076b6eddc171f83b21 --- /dev/null +++ b/tcg-Allow-top-bit-of-SIMD_DATA_BITS-to-be-set-in-sim.patch @@ -0,0 +1,69 @@ +From d0b24cfdeb8bd64fa55154d79574352be33ecc51 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 15 Nov 2024 17:25:15 +0000 +Subject: [PATCH] tcg: Allow top bit of SIMD_DATA_BITS to be set in simd_desc() + +In simd_desc() we create a SIMD descriptor from various pieces +including an arbitrary data value from the caller. We try to +sanitize these to make sure everything will fit: the 'data' value +needs to fit in the SIMD_DATA_BITS (== 22) sized field. However we +do that sanitizing with: + tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS)); + +This works for the case where the data is supposed to be considered +as a signed integer (which can then be returned via simd_data()). +However, some callers want to treat the data value as unsigned. + +Specifically, for the Arm SVE operations, make_svemte_desc() +assembles a data value as a collection of fields, and it needs to use +all 22 bits. Currently if MTE is enabled then its MTEDESC SIZEM1 +field may have the most significant bit set, and then it will trip +this assertion. + +Loosen the assertion so that we only check that the data value will +fit into the field in some way, either as a signed or as an unsigned +value. This means we will fail to detect some kinds of bug in the +callers, but we won't spuriously assert for intentional use of the +data field as unsigned. + +Cc: qemu-stable@nongnu.org +Fixes: db432672dc50e ("tcg: Add generic vector expanders") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2601 +Signed-off-by: Peter Maydell +Message-ID: <20241115172515.1229393-1-peter.maydell@linaro.org> +Reviewed-by: Richard Henderson +Signed-off-by: Richard Henderson +Signed-off-by: Zhongrui Tang +--- + tcg/tcg-op-gvec.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c +index bb88943f79..733b44f105 100644 +--- a/tcg/tcg-op-gvec.c ++++ b/tcg/tcg-op-gvec.c +@@ -88,7 +88,20 @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data) + uint32_t desc = 0; + + check_size_align(oprsz, maxsz, 0); +- tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS)); ++ ++ /* ++ * We want to check that 'data' will fit into SIMD_DATA_BITS. ++ * However, some callers want to treat the data as a signed ++ * value (which they can later get back with simd_data()) ++ * and some want to treat it as an unsigned value. ++ * So here we assert only that the data will fit into the ++ * field in at least one way. This means that some invalid ++ * values from the caller will not be detected, e.g. if the ++ * caller wants to handle the value as a signed integer but ++ * incorrectly passes us 1 << (SIMD_DATA_BITS - 1). ++ */ ++ tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS) || ++ data == extract32(data, 0, SIMD_DATA_BITS)); + + oprsz = (oprsz / 8) - 1; + maxsz = (maxsz / 8) - 1; +-- +2.41.0.windows.1 + diff --git a/tcg-loongarch64-Fix-tcg_out_movi-vs-some-pcrel-point.patch b/tcg-loongarch64-Fix-tcg_out_movi-vs-some-pcrel-point.patch new file mode 100644 index 0000000000000000000000000000000000000000..67809a17bf0ddd171efb68d881e343c3a64dfcf2 --- /dev/null +++ b/tcg-loongarch64-Fix-tcg_out_movi-vs-some-pcrel-point.patch @@ -0,0 +1,77 @@ +From 6477ff9d89317a6124f3a46215b1567306b6ebe4 Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Wed, 19 Jun 2024 05:41:13 +0000 +Subject: [PATCH] tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers + +Simplify the logic for two-part, 32-bit pc-relative addresses. +Rather than assume all such fit in int32_t, do some arithmetic +and assert a result, do some arithmetic first and then check +to see if the pieces are in range. + +Cc: qemu-stable@nongnu.org +Fixes: dacc51720db ("tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi") +Reviewed-by: Song Gao +Reported-by: Song Gao +Signed-off-by: Richard Henderson +(cherry picked from commit 521d7fb3ebdf88112ed13556a93e3037742b9eb8) +Signed-off-by: zhujun2 +--- + tcg/loongarch64/tcg-target.c.inc | 32 +++++++++++++++----------------- + 1 file changed, 15 insertions(+), 17 deletions(-) + +diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc +index bab0a173a3..ad2690b90d 100644 +--- a/tcg/loongarch64/tcg-target.c.inc ++++ b/tcg/loongarch64/tcg-target.c.inc +@@ -365,8 +365,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + * back to the slow path. + */ + +- intptr_t pc_offset; +- tcg_target_long val_lo, val_hi, pc_hi, offset_hi; ++ intptr_t src_rx, pc_offset; + tcg_target_long hi12, hi32, hi52; + + /* Value fits in signed i32. */ +@@ -376,24 +375,23 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + } + + /* PC-relative cases. */ +- pc_offset = tcg_pcrel_diff(s, (void *)val); +- if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) { +- /* Single pcaddu2i. */ +- tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); +- return; ++ src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); ++ if ((val & 3) == 0) { ++ pc_offset = val - src_rx; ++ if (pc_offset == sextreg(pc_offset, 0, 22)) { ++ /* Single pcaddu2i. */ ++ tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2); ++ return; ++ } + } + +- if (pc_offset == (int32_t)pc_offset) { +- /* Offset within 32 bits; load with pcalau12i + ori. */ +- val_lo = sextreg(val, 0, 12); +- val_hi = val >> 12; +- pc_hi = (val - pc_offset) >> 12; +- offset_hi = val_hi - pc_hi; +- +- tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20)); +- tcg_out_opc_pcalau12i(s, rd, offset_hi); ++ pc_offset = (val >> 12) - (src_rx >> 12); ++ if (pc_offset == sextreg(pc_offset, 0, 20)) { ++ /* Load with pcalau12i + ori. */ ++ tcg_target_long val_lo = val & 0xfff; ++ tcg_out_opc_pcalau12i(s, rd, pc_offset); + if (val_lo != 0) { +- tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff); ++ tcg_out_opc_ori(s, rd, rd, val_lo); + } + return; + } +-- +2.41.0.windows.1 + diff --git a/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch b/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch new file mode 100644 index 0000000000000000000000000000000000000000..28e2cf523f3eabc362a585cc6ac48cef43396ed8 --- /dev/null +++ b/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch @@ -0,0 +1,98 @@ +From f797e2713a94b48de59324d00c851d89f4438fc0 Mon Sep 17 00:00:00 2001 +From: Miguel Luis +Date: Fri, 3 Feb 2023 12:33:41 -0100 +Subject: [PATCH] tcg/mttcg: enable threads to unregister in tcg_ctxs[] + +[This patch is just for reference. It has problems as it does not takes care of +the TranslationBlocks and their assigned regions during CPU unrealize] + +When using TCG acceleration in a multi-threaded context each vCPU has its own +thread registered in tcg_ctxs[] upon creation and tcg_cur_ctxs stores the current +number of threads that got created. Although, the lack of a mechanism to +unregister these threads is a problem when exercising vCPU hotplug/unplug +due to the fact that tcg_cur_ctxs gets incremented everytime a vCPU gets +hotplugged but never gets decremented everytime a vCPU gets unplugged, therefore +breaking the assert stating tcg_cur_ctxs < tcg_max_ctxs after a certain amount +of vCPU hotplugs. + +Suggested-by: Salil Mehta +[SM: Check Things To Do Section, https://lore.kernel.org/all/20200613213629.21984-1-salil.mehta@huawei.com/] +Signed-off-by: Miguel Luis +--- + accel/tcg/tcg-accel-ops-mttcg.c | 1 + + include/tcg/startup.h | 5 +++++ + tcg/tcg.c | 23 +++++++++++++++++++++++ + 3 files changed, 29 insertions(+) + +diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c +index fac80095bb..73866990ce 100644 +--- a/accel/tcg/tcg-accel-ops-mttcg.c ++++ b/accel/tcg/tcg-accel-ops-mttcg.c +@@ -122,6 +122,7 @@ static void *mttcg_cpu_thread_fn(void *arg) + qemu_mutex_unlock_iothread(); + rcu_remove_force_rcu_notifier(&force_rcu.notifier); + rcu_unregister_thread(); ++ tcg_unregister_thread(); + return NULL; + } + +diff --git a/include/tcg/startup.h b/include/tcg/startup.h +index f71305765c..c6cb1d92a7 100644 +--- a/include/tcg/startup.h ++++ b/include/tcg/startup.h +@@ -45,6 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus); + */ + void tcg_register_thread(void); + ++/** ++ * tcg_register_thread: Unregister this thread with the TCG runtime ++ */ ++void tcg_unregister_thread(void); ++ + /** + * tcg_prologue_init(): Generate the code for the TCG prologue + * +diff --git a/tcg/tcg.c b/tcg/tcg.c +index 896a36caeb..61fcf8597d 100644 +--- a/tcg/tcg.c ++++ b/tcg/tcg.c +@@ -764,6 +764,14 @@ static void alloc_tcg_plugin_context(TCGContext *s) + #endif + } + ++static void free_tcg_plugin_context(TCGContext *s) ++{ ++#ifdef CONFIG_PLUGIN ++ g_ptr_array_unref(s->plugin_tb->insns); ++ g_free(s->plugin_tb); ++#endif ++} ++ + /* + * All TCG threads except the parent (i.e. the one that called tcg_context_init + * and registered the target's TCG globals) must register with this function +@@ -814,6 +822,21 @@ void tcg_register_thread(void) + + tcg_ctx = s; + } ++ ++void tcg_unregister_thread(void) ++{ ++ TCGContext *s = tcg_ctx; ++ unsigned int n; ++ ++ /* Unclaim an entry in tcg_ctxs */ ++ n = qatomic_fetch_dec(&tcg_cur_ctxs); ++ g_assert(n > 1); ++ qatomic_store_release(&tcg_ctxs[n - 1], 0); ++ ++ free_tcg_plugin_context(s); ++ ++ g_free(s); ++} + #endif /* !CONFIG_USER_ONLY */ + + /* pool based memory allocation */ +-- +2.27.0 + diff --git a/tcp_emu-Fix-oob-access.patch b/tcp_emu-Fix-oob-access.patch deleted file mode 100644 index 807dfef08e28fe33a65fede676bbb076f5d9e393..0000000000000000000000000000000000000000 --- a/tcp_emu-Fix-oob-access.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 585634894f511bc1821cef54494bf2d9abc109c9 Mon Sep 17 00:00:00 2001 -From: Samuel Thibault -Date: Tue, 14 Apr 2020 18:04:33 +0800 -Subject: [PATCH] tcp_emu: Fix oob access - -The main loop only checks for one available byte, while we sometimes -need two bytes. ---- - slirp/src/tcp_subr.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index d6dd133a..9c94c03a 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - break; - - case 5: -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - /* - * The difference between versions 1.0 and - * 2.0 is here. For future versions of -@@ -901,6 +904,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - /* This is the field containing the port - * number that RA-player is listening to. - */ -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; - if (lport < 6970) - lport += 256; /* don't know why */ --- -2.23.0 diff --git a/tcp_emu-fix-unsafe-snprintf-usages.patch b/tcp_emu-fix-unsafe-snprintf-usages.patch deleted file mode 100644 index 2f6850a60c2fb942ecc7ef15030686d3dd94aa9c..0000000000000000000000000000000000000000 --- a/tcp_emu-fix-unsafe-snprintf-usages.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 220a52fda279038d46c25d39a372154ff9b024d2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureauls?= -Date: Tue, 14 Apr 2020 19:06:35 +0800 -Subject: [PATCH] tcp_emu: fix unsafe snprintf() usages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Various calls to snprintf() assume that snprintf() returns "only" the -number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Before patch ce131029, if there isn't enough room in "m_data" for the -"DCC ..." message, we overflow "m_data". - -After the patch, if there isn't enough room for the same, we don't -overflow "m_data", but we set "m_len" out-of-bounds. The next time an -access is bounded by "m_len", we'll have a buffer overflow then. - -Use slirp_fmt*() to fix potential OOB memory access. -Reported-by: default avatarLaszlo Ersek -Signed-off-by: default avatarMarc-André Lureau -Reviewed-by: Samuel Thibault's avatarSamuel Thibault -Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> ---- - slirp/src/tcp_subr.c | 15 +++++++-------- - 1 file changed, 7 insertions(+), 8 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 019b637a..6c1b17bd 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - NTOHS(n1); - NTOHS(n2); - m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); -- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); -- assert(m->m_len < M_ROOM(m)); -+ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); - } else { - *eol = '\r'; - } -@@ -696,7 +695,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), - "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, - n5, n6, x == 7 ? buff : ""); - return 1; -@@ -732,7 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - - m->m_len = bptr - m->m_data; /* Adjust length */ - m->m_len += -- snprintf(bptr, M_FREEROOM(m), -+ slirp_fmt(bptr, M_FREEROOM(m), - "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", - n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - -@@ -759,7 +758,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) - m->m_len = -- snprintf(m->m_data, M_ROOM(m), -+ slirp_fmt0(m->m_data, M_ROOM(m), - "%d", ntohs(so->so_fport)) + 1; - return 1; - -@@ -779,7 +778,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), - "DCC CHAT chat %lu %u%c\n", - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), 1); -@@ -791,7 +790,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - } - m->m_len = bptr - m->m_data; /* Adjust length */ - m->m_len += -- snprintf(bptr, M_FREEROOM(m), -+ slirp_fmt(bptr, M_FREEROOM(m), - "DCC SEND %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); -@@ -803,7 +802,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - } - m->m_len = bptr - m->m_data; /* Adjust length */ - m->m_len += -- snprintf(bptr, M_FREEROOM(m), -+ slirp_fmt(bptr, M_FREEROOM(m), - "DCC MOVE %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); --- -2.23.0 diff --git a/test-numa-Adjust-aarch64-numa-test.patch b/test-numa-Adjust-aarch64-numa-test.patch deleted file mode 100644 index 24145937724385b1ff8dd0bd280e5e62341ad659..0000000000000000000000000000000000000000 --- a/test-numa-Adjust-aarch64-numa-test.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 3ef97cc418d1061fc0ec70098270ce2d76005cc1 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Thu, 23 Apr 2020 20:54:18 +0800 -Subject: [PATCH] test/numa: Adjust aarch64 numa test - -We have supported topology for arm/virt in previous patch, which -changes the meaning of "thread-id", so we must modify test case. - -Signed-off-by: Keqian Zhu ---- - tests/numa-test.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - -diff --git a/tests/numa-test.c b/tests/numa-test.c -index 8de8581231..71cdd7b4f7 100644 ---- a/tests/numa-test.c -+++ b/tests/numa-test.c -@@ -231,17 +231,17 @@ static void aarch64_numa_cpu(const void *data) - QObject *e; - QTestState *qts; - -- cli = make_cli(data, "-smp 2 " -+ cli = make_cli(data, "-smp 2,cores=2 " - "-numa node,nodeid=0 -numa node,nodeid=1 " -- "-numa cpu,node-id=1,thread-id=0 " -- "-numa cpu,node-id=0,thread-id=1"); -+ "-numa cpu,node-id=1,core-id=0 " -+ "-numa cpu,node-id=0,core-id=1"); - qts = qtest_init(cli); - cpus = get_cpus(qts, &resp); - g_assert(cpus); - - while ((e = qlist_pop(cpus))) { - QDict *cpu, *props; -- int64_t thread, node; -+ int64_t core, node; - - cpu = qobject_to(QDict, e); - g_assert(qdict_haskey(cpu, "props")); -@@ -249,12 +249,12 @@ static void aarch64_numa_cpu(const void *data) - - g_assert(qdict_haskey(props, "node-id")); - node = qdict_get_int(props, "node-id"); -- g_assert(qdict_haskey(props, "thread-id")); -- thread = qdict_get_int(props, "thread-id"); -+ g_assert(qdict_haskey(props, "core-id")); -+ core = qdict_get_int(props, "core-id"); - -- if (thread == 0) { -+ if (core == 0) { - g_assert_cmpint(node, ==, 1); -- } else if (thread == 1) { -+ } else if (core == 1) { - g_assert_cmpint(node, ==, 0); - } else { - g_assert(false); --- -2.19.1 diff --git a/test-tpm-pass-optional-machine-options-to-swtpm-test.patch b/test-tpm-pass-optional-machine-options-to-swtpm-test.patch deleted file mode 100644 index fe7fd4ac907813d676cdf0c2a713e31279c29685..0000000000000000000000000000000000000000 --- a/test-tpm-pass-optional-machine-options-to-swtpm-test.patch +++ /dev/null @@ -1,187 +0,0 @@ -From c06a3ceacc1793bc1cfe5c2a6ed510c9aea8253d Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Thu, 13 Aug 2020 20:28:25 +0800 -Subject: [PATCH 17/19] test: tpm: pass optional machine options to swtpm test - functions - -We plan to use swtpm test functions on ARM for testing the -sysbus TPM-TIS device. However on ARM there is no default machine -type. So we need to explictly pass some machine options on startup. -Let's allow this by adding a new parameter to both swtpm test -functions and update all call sites. - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Message-id: 20200305165149.618-9-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - tests/tpm-crb-swtpm-test.c | 5 +++-- - tests/tpm-tests.c | 10 ++++++---- - tests/tpm-tests.h | 5 +++-- - tests/tpm-tis-swtpm-test.c | 5 +++-- - tests/tpm-util.c | 8 ++++++-- - tests/tpm-util.h | 3 ++- - 6 files changed, 23 insertions(+), 13 deletions(-) - -diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c -index 2c4fb8ae..5228cb7a 100644 ---- a/tests/tpm-crb-swtpm-test.c -+++ b/tests/tpm-crb-swtpm-test.c -@@ -29,7 +29,8 @@ static void tpm_crb_swtpm_test(const void *data) - { - const TestState *ts = data; - -- tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_crb_transfer, "tpm-crb"); -+ tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_crb_transfer, -+ "tpm-crb", NULL); - } - - static void tpm_crb_swtpm_migration_test(const void *data) -@@ -37,7 +38,7 @@ static void tpm_crb_swtpm_migration_test(const void *data) - const TestState *ts = data; - - tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, ts->uri, -- tpm_util_crb_transfer, "tpm-crb"); -+ tpm_util_crb_transfer, "tpm-crb", NULL); - } - - int main(int argc, char **argv) -diff --git a/tests/tpm-tests.c b/tests/tpm-tests.c -index e640777a..d823bda8 100644 ---- a/tests/tpm-tests.c -+++ b/tests/tpm-tests.c -@@ -30,7 +30,7 @@ tpm_test_swtpm_skip(void) - } - - void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, -- const char *ifmodel) -+ const char *ifmodel, const char *machine_options) - { - char *args = NULL; - QTestState *s; -@@ -47,10 +47,11 @@ void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, - g_assert_true(succ); - - args = g_strdup_printf( -+ "%s " - "-chardev socket,id=chr,path=%s " - "-tpmdev emulator,id=dev,chardev=chr " - "-device %s,tpmdev=dev", -- addr->u.q_unix.path, ifmodel); -+ machine_options ? : "", addr->u.q_unix.path, ifmodel); - - s = qtest_start(args); - g_free(args); -@@ -78,7 +79,8 @@ void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, - void tpm_test_swtpm_migration_test(const char *src_tpm_path, - const char *dst_tpm_path, - const char *uri, tx_func *tx, -- const char *ifmodel) -+ const char *ifmodel, -+ const char *machine_options) - { - gboolean succ; - GPid src_tpm_pid, dst_tpm_pid; -@@ -100,7 +102,7 @@ void tpm_test_swtpm_migration_test(const char *src_tpm_path, - - tpm_util_migration_start_qemu(&src_qemu, &dst_qemu, - src_tpm_addr, dst_tpm_addr, uri, -- ifmodel); -+ ifmodel, machine_options); - - tpm_util_startup(src_qemu, tx); - tpm_util_pcrextend(src_qemu, tx); -diff --git a/tests/tpm-tests.h b/tests/tpm-tests.h -index b97688fe..a5df35ab 100644 ---- a/tests/tpm-tests.h -+++ b/tests/tpm-tests.h -@@ -16,11 +16,12 @@ - #include "tpm-util.h" - - void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, -- const char *ifmodel); -+ const char *ifmodel, const char *machine_options); - - void tpm_test_swtpm_migration_test(const char *src_tpm_path, - const char *dst_tpm_path, - const char *uri, tx_func *tx, -- const char *ifmodel); -+ const char *ifmodel, -+ const char *machine_options); - - #endif /* TESTS_TPM_TESTS_H */ -diff --git a/tests/tpm-tis-swtpm-test.c b/tests/tpm-tis-swtpm-test.c -index 9f58a3a9..9470f157 100644 ---- a/tests/tpm-tis-swtpm-test.c -+++ b/tests/tpm-tis-swtpm-test.c -@@ -29,7 +29,8 @@ static void tpm_tis_swtpm_test(const void *data) - { - const TestState *ts = data; - -- tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_tis_transfer, "tpm-tis"); -+ tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_tis_transfer, -+ "tpm-tis", NULL); - } - - static void tpm_tis_swtpm_migration_test(const void *data) -@@ -37,7 +38,7 @@ static void tpm_tis_swtpm_migration_test(const void *data) - const TestState *ts = data; - - tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, ts->uri, -- tpm_util_tis_transfer, "tpm-tis"); -+ tpm_util_tis_transfer, "tpm-tis", NULL); - } - - int main(int argc, char **argv) -diff --git a/tests/tpm-util.c b/tests/tpm-util.c -index e08b1376..7ecdae2f 100644 ---- a/tests/tpm-util.c -+++ b/tests/tpm-util.c -@@ -258,23 +258,27 @@ void tpm_util_migration_start_qemu(QTestState **src_qemu, - SocketAddress *src_tpm_addr, - SocketAddress *dst_tpm_addr, - const char *miguri, -- const char *ifmodel) -+ const char *ifmodel, -+ const char *machine_options) - { - char *src_qemu_args, *dst_qemu_args; - - src_qemu_args = g_strdup_printf( -+ "%s " - "-chardev socket,id=chr,path=%s " - "-tpmdev emulator,id=dev,chardev=chr " - "-device %s,tpmdev=dev ", -- src_tpm_addr->u.q_unix.path, ifmodel); -+ machine_options ? : "", src_tpm_addr->u.q_unix.path, ifmodel); - - *src_qemu = qtest_init(src_qemu_args); - - dst_qemu_args = g_strdup_printf( -+ "%s " - "-chardev socket,id=chr,path=%s " - "-tpmdev emulator,id=dev,chardev=chr " - "-device %s,tpmdev=dev " - "-incoming %s", -+ machine_options ? : "", - dst_tpm_addr->u.q_unix.path, - ifmodel, miguri); - -diff --git a/tests/tpm-util.h b/tests/tpm-util.h -index 5755698a..15e39249 100644 ---- a/tests/tpm-util.h -+++ b/tests/tpm-util.h -@@ -44,7 +44,8 @@ void tpm_util_migration_start_qemu(QTestState **src_qemu, - SocketAddress *src_tpm_addr, - SocketAddress *dst_tpm_addr, - const char *miguri, -- const char *ifmodel); -+ const char *ifmodel, -+ const char *machine_options); - - void tpm_util_wait_for_migration_complete(QTestState *who); - --- -2.23.0 - diff --git a/test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch b/test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch deleted file mode 100644 index fe33c8f4bd99eba304dc696d70a5126c559cd052..0000000000000000000000000000000000000000 --- a/test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch +++ /dev/null @@ -1,226 +0,0 @@ -From 2d28c0edddeaee5e4aa6e8c6b109776cddc1c4e4 Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Thu, 13 Aug 2020 21:37:23 +0800 -Subject: [PATCH 19/19] test: tpm-tis: Add Sysbus TPM-TIS device test - -The tests themselves are the same as the ISA device ones. -Only the main() changes as the tpm-tis-device device gets -instantiated. Also the base address of the device is not -0xFED40000 anymore but matches the base address of the -ARM virt platform bus. - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Message-id: 20200305165149.618-11-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - tests/Makefile.include | 5 ++ - tests/tpm-tis-device-swtpm-test.c | 76 +++++++++++++++++++++++++++ - tests/tpm-tis-device-test.c | 87 +++++++++++++++++++++++++++++++ - 3 files changed, 168 insertions(+) - create mode 100644 tests/tpm-tis-device-swtpm-test.c - create mode 100644 tests/tpm-tis-device-test.c - -diff --git a/tests/Makefile.include b/tests/Makefile.include -index 950b32a2..d6de4e10 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -263,6 +263,8 @@ check-qtest-arm-y += tests/boot-serial-test$(EXESUF) - check-qtest-arm-y += tests/hexloader-test$(EXESUF) - check-qtest-arm-$(CONFIG_PFLASH_CFI02) += tests/pflash-cfi02-test$(EXESUF) - -+check-qtest-aarch64-$(CONFIG_TPM_TIS_SYSBUS) += tpm-tis-device-test -+check-qtest-aarch64-$(CONFIG_TPM_TIS_SYSBUS) += tpm-tis-device-swtpm-test - check-qtest-aarch64-y = tests/numa-test$(EXESUF) - check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) - check-qtest-aarch64-y += tests/migration-test$(EXESUF) -@@ -667,7 +669,10 @@ tests/tpm-crb-swtpm-test$(EXESUF): tests/tpm-crb-swtpm-test.o tests/tpm-emu.o \ - tests/tpm-crb-test$(EXESUF): tests/tpm-crb-test.o tests/tpm-emu.o $(test-io-obj-y) - tests/tpm-tis-swtpm-test$(EXESUF): tests/tpm-tis-swtpm-test.o tests/tpm-emu.o \ - tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y) -+tests/tpm-tis-device-swtpm-test$(EXESUF): tests/tpm-tis-device-swtpm-test.o tests/tpm-emu.o \ -+ tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y) - tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-tis-util.o tests/tpm-emu.o $(test-io-obj-y) -+tests/tpm-tis-device-test$(EXESUF): tests/tpm-tis-device-test.o tests/tpm-tis-util.o tests/tpm-emu.o $(test-io-obj-y) - tests/test-io-channel-file$(EXESUF): tests/test-io-channel-file.o \ - tests/io-channel-helpers.o $(test-io-obj-y) - tests/test-io-channel-tls$(EXESUF): tests/test-io-channel-tls.o \ -diff --git a/tests/tpm-tis-device-swtpm-test.c b/tests/tpm-tis-device-swtpm-test.c -new file mode 100644 -index 00000000..7b200351 ---- /dev/null -+++ b/tests/tpm-tis-device-swtpm-test.c -@@ -0,0 +1,76 @@ -+/* -+ * QTest testcase for Sysbus TPM TIS talking to external swtpm and swtpm -+ * migration -+ * -+ * Copyright (c) 2018 IBM Corporation -+ * with parts borrowed from migration-test.c that is: -+ * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates -+ * -+ * Authors: -+ * Stefan Berger -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include -+ -+#include "libqtest.h" -+#include "qemu/module.h" -+#include "tpm-tests.h" -+#include "hw/acpi/tpm.h" -+ -+uint64_t tpm_tis_base_addr = 0xc000000; -+#define MACHINE_OPTIONS "-machine virt,gic-version=max -accel tcg" -+ -+typedef struct TestState { -+ char *src_tpm_path; -+ char *dst_tpm_path; -+ char *uri; -+} TestState; -+ -+static void tpm_tis_swtpm_test(const void *data) -+{ -+ const TestState *ts = data; -+ -+ tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_tis_transfer, -+ "tpm-tis-device", MACHINE_OPTIONS); -+} -+ -+static void tpm_tis_swtpm_migration_test(const void *data) -+{ -+ const TestState *ts = data; -+ -+ tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, ts->uri, -+ tpm_util_tis_transfer, "tpm-tis-device", -+ MACHINE_OPTIONS); -+} -+ -+int main(int argc, char **argv) -+{ -+ int ret; -+ TestState ts = { 0 }; -+ -+ ts.src_tpm_path = g_dir_make_tmp("qemu-tpm-tis-device-swtpm-test.XXXXXX", -+ NULL); -+ ts.dst_tpm_path = g_dir_make_tmp("qemu-tpm-tis-device-swtpm-test.XXXXXX", -+ NULL); -+ ts.uri = g_strdup_printf("unix:%s/migsocket", ts.src_tpm_path); -+ -+ module_call_init(MODULE_INIT_QOM); -+ g_test_init(&argc, &argv, NULL); -+ -+ qtest_add_data_func("/tpm/tis-swtpm/test", &ts, tpm_tis_swtpm_test); -+ qtest_add_data_func("/tpm/tis-swtpm-migration/test", &ts, -+ tpm_tis_swtpm_migration_test); -+ ret = g_test_run(); -+ -+ g_rmdir(ts.dst_tpm_path); -+ g_free(ts.dst_tpm_path); -+ g_rmdir(ts.src_tpm_path); -+ g_free(ts.src_tpm_path); -+ g_free(ts.uri); -+ -+ return ret; -+} -diff --git a/tests/tpm-tis-device-test.c b/tests/tpm-tis-device-test.c -new file mode 100644 -index 00000000..63ed3644 ---- /dev/null -+++ b/tests/tpm-tis-device-test.c -@@ -0,0 +1,87 @@ -+/* -+ * QTest testcase for SYSBUS TPM TIS -+ * -+ * Copyright (c) 2018 Red Hat, Inc. -+ * Copyright (c) 2018 IBM Corporation -+ * -+ * Authors: -+ * Marc-André Lureau -+ * Stefan Berger -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include -+ -+#include "io/channel-socket.h" -+#include "libqtest-single.h" -+#include "qemu/module.h" -+#include "tpm-emu.h" -+#include "tpm-util.h" -+#include "tpm-tis-util.h" -+ -+/* -+ * As the Sysbus tpm-tis-device is instantiated on the ARM virt -+ * platform bus and it is the only sysbus device dynamically -+ * instantiated, it gets plugged at its base address -+ */ -+uint64_t tpm_tis_base_addr = 0xc000000; -+ -+int main(int argc, char **argv) -+{ -+ char *tmp_path = g_dir_make_tmp("qemu-tpm-tis-device-test.XXXXXX", NULL); -+ GThread *thread; -+ TestState test; -+ char *args; -+ int ret; -+ -+ module_call_init(MODULE_INIT_QOM); -+ g_test_init(&argc, &argv, NULL); -+ -+ test.addr = g_new0(SocketAddress, 1); -+ test.addr->type = SOCKET_ADDRESS_TYPE_UNIX; -+ test.addr->u.q_unix.path = g_build_filename(tmp_path, "sock", NULL); -+ g_mutex_init(&test.data_mutex); -+ g_cond_init(&test.data_cond); -+ test.data_cond_signal = false; -+ -+ thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test); -+ tpm_emu_test_wait_cond(&test); -+ -+ args = g_strdup_printf( -+ "-machine virt,gic-version=max -accel tcg " -+ "-chardev socket,id=chr,path=%s " -+ "-tpmdev emulator,id=dev,chardev=chr " -+ "-device tpm-tis-device,tpmdev=dev", -+ test.addr->u.q_unix.path); -+ qtest_start(args); -+ -+ qtest_add_data_func("/tpm-tis/test_check_localities", &test, -+ tpm_tis_test_check_localities); -+ -+ qtest_add_data_func("/tpm-tis/test_check_access_reg", &test, -+ tpm_tis_test_check_access_reg); -+ -+ qtest_add_data_func("/tpm-tis/test_check_access_reg_seize", &test, -+ tpm_tis_test_check_access_reg_seize); -+ -+ qtest_add_data_func("/tpm-tis/test_check_access_reg_release", &test, -+ tpm_tis_test_check_access_reg_release); -+ -+ qtest_add_data_func("/tpm-tis/test_check_transmit", &test, -+ tpm_tis_test_check_transmit); -+ -+ ret = g_test_run(); -+ -+ qtest_end(); -+ -+ g_thread_join(thread); -+ g_unlink(test.addr->u.q_unix.path); -+ qapi_free_SocketAddress(test.addr); -+ g_rmdir(tmp_path); -+ g_free(tmp_path); -+ g_free(args); -+ return ret; -+} --- -2.23.0 - diff --git a/test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch b/test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch deleted file mode 100644 index 4c7be00a5f1d015d5da4fcf4791e6a175f0ff9c7..0000000000000000000000000000000000000000 --- a/test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch +++ /dev/null @@ -1,1044 +0,0 @@ -From c8ed2a1fbe306ecbfb5c7d4156ae81c029829d95 Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Thu, 13 Aug 2020 20:56:54 +0800 -Subject: [PATCH 18/19] test: tpm-tis: Get prepared to share tests between ISA - and sysbus devices - -ISA and sysbus TPM-TIS devices will share their tests. Only -the main() will change (instantiation option is different). -Also the base address of the TPM-TIS device is going to be -different. on x86 it is located at 0xFED40000 while on ARM -it can be located at any location, discovered through the -device tree description. - -So we put shared test functions in a new object module. -Each test needs to set tpm_tis_base_addr global variable. - -Also take benefit of this move to fix "block comments using -a leading */ on a separate line" checkpatch warnings. - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Message-id: 20200305165149.618-10-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - tests/Makefile.include | 2 +- - tests/tpm-crb-swtpm-test.c | 4 + - tests/tpm-crb-test.c | 3 + - tests/tpm-tis-swtpm-test.c | 3 + - tests/tpm-tis-test.c | 414 +--------------------------------- - tests/tpm-tis-util.c | 451 +++++++++++++++++++++++++++++++++++++ - tests/tpm-tis-util.h | 23 ++ - tests/tpm-util.c | 3 - - tests/tpm-util.h | 5 + - 9 files changed, 493 insertions(+), 415 deletions(-) - create mode 100644 tests/tpm-tis-util.c - create mode 100644 tests/tpm-tis-util.h - -diff --git a/tests/Makefile.include b/tests/Makefile.include -index c151de64..950b32a2 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -667,7 +667,7 @@ tests/tpm-crb-swtpm-test$(EXESUF): tests/tpm-crb-swtpm-test.o tests/tpm-emu.o \ - tests/tpm-crb-test$(EXESUF): tests/tpm-crb-test.o tests/tpm-emu.o $(test-io-obj-y) - tests/tpm-tis-swtpm-test$(EXESUF): tests/tpm-tis-swtpm-test.o tests/tpm-emu.o \ - tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y) --tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-emu.o $(test-io-obj-y) -+tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-tis-util.o tests/tpm-emu.o $(test-io-obj-y) - tests/test-io-channel-file$(EXESUF): tests/test-io-channel-file.o \ - tests/io-channel-helpers.o $(test-io-obj-y) - tests/test-io-channel-tls$(EXESUF): tests/test-io-channel-tls.o \ -diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c -index 5228cb7a..55fdb565 100644 ---- a/tests/tpm-crb-swtpm-test.c -+++ b/tests/tpm-crb-swtpm-test.c -@@ -18,6 +18,10 @@ - #include "libqtest.h" - #include "qemu/module.h" - #include "tpm-tests.h" -+#include "hw/acpi/tpm.h" -+ -+/* Not used but needed for linking */ -+uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; - - typedef struct TestState { - char *src_tpm_path; -diff --git a/tests/tpm-crb-test.c b/tests/tpm-crb-test.c -index a139caa5..32695810 100644 ---- a/tests/tpm-crb-test.c -+++ b/tests/tpm-crb-test.c -@@ -19,6 +19,9 @@ - #include "qemu/module.h" - #include "tpm-emu.h" - -+/* Not used but needed for linking */ -+uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; -+ - #define TPM_CMD "\x80\x01\x00\x00\x00\x0c\x00\x00\x01\x44\x00\x00" - - static void tpm_crb_test(const void *data) -diff --git a/tests/tpm-tis-swtpm-test.c b/tests/tpm-tis-swtpm-test.c -index 9470f157..90131cb3 100644 ---- a/tests/tpm-tis-swtpm-test.c -+++ b/tests/tpm-tis-swtpm-test.c -@@ -18,6 +18,9 @@ - #include "libqtest.h" - #include "qemu/module.h" - #include "tpm-tests.h" -+#include "hw/acpi/tpm.h" -+ -+uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; - - typedef struct TestState { - char *src_tpm_path; -diff --git a/tests/tpm-tis-test.c b/tests/tpm-tis-test.c -index 92a7e95a..8042de13 100644 ---- a/tests/tpm-tis-test.c -+++ b/tests/tpm-tis-test.c -@@ -1,5 +1,5 @@ - /* -- * QTest testcase for TPM TIS -+ * QTest testcase for ISA TPM TIS - * - * Copyright (c) 2018 Red Hat, Inc. - * Copyright (c) 2018 IBM Corporation -@@ -20,417 +20,9 @@ - #include "libqtest.h" - #include "qemu/module.h" - #include "tpm-emu.h" -+#include "tpm-tis-util.h" - --#define TIS_REG(LOCTY, REG) \ -- (TPM_TIS_ADDR_BASE + ((LOCTY) << 12) + REG) -- --#define DEBUG_TIS_TEST 0 -- --#define DPRINTF(fmt, ...) do { \ -- if (DEBUG_TIS_TEST) { \ -- printf(fmt, ## __VA_ARGS__); \ -- } \ --} while (0) -- --#define DPRINTF_ACCESS \ -- DPRINTF("%s: %d: locty=%d l=%d access=0x%02x pending_request_flag=0x%x\n", \ -- __func__, __LINE__, locty, l, access, pending_request_flag) -- --#define DPRINTF_STS \ -- DPRINTF("%s: %d: sts = 0x%08x\n", __func__, __LINE__, sts) -- --static const uint8_t TPM_CMD[12] = -- "\x80\x01\x00\x00\x00\x0c\x00\x00\x01\x44\x00\x00"; -- --static void tpm_tis_test_check_localities(const void *data) --{ -- uint8_t locty; -- uint8_t access; -- uint32_t ifaceid; -- uint32_t capability; -- uint32_t didvid; -- uint32_t rid; -- -- for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES; locty++) { -- access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- capability = readl(TIS_REG(locty, TPM_TIS_REG_INTF_CAPABILITY)); -- g_assert_cmpint(capability, ==, TPM_TIS_CAPABILITIES_SUPPORTED2_0); -- -- ifaceid = readl(TIS_REG(locty, TPM_TIS_REG_INTERFACE_ID)); -- g_assert_cmpint(ifaceid, ==, TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0); -- -- didvid = readl(TIS_REG(locty, TPM_TIS_REG_DID_VID)); -- g_assert_cmpint(didvid, !=, 0); -- g_assert_cmpint(didvid, !=, 0xffffffff); -- -- rid = readl(TIS_REG(locty, TPM_TIS_REG_RID)); -- g_assert_cmpint(rid, !=, 0); -- g_assert_cmpint(rid, !=, 0xffffffff); -- } --} -- --static void tpm_tis_test_check_access_reg(const void *data) --{ -- uint8_t locty; -- uint8_t access; -- -- /* do not test locality 4 (hw only) */ -- for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* request use of locality */ -- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -- -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* release access */ -- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), -- TPM_TIS_ACCESS_ACTIVE_LOCALITY); -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- } --} -- --/* -- * Test case for seizing access by a higher number locality -- */ --static void tpm_tis_test_check_access_reg_seize(const void *data) --{ -- int locty, l; -- uint8_t access; -- uint8_t pending_request_flag; -- -- /* do not test locality 4 (hw only) */ -- for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { -- pending_request_flag = 0; -- -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* request use of locality */ -- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* lower localities cannot seize access */ -- for (l = 0; l < locty; l++) { -- /* lower locality is not active */ -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* try to request use from 'l' */ -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -- -- /* requesting use from 'l' was not possible; -- we must see REQUEST_USE and possibly PENDING_REQUEST */ -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_REQUEST_USE | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* locality 'locty' must be unchanged; -- we must see PENDING_REQUEST */ -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- TPM_TIS_ACCESS_PENDING_REQUEST | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* try to seize from 'l' */ -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); -- /* seize from 'l' was not possible */ -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_REQUEST_USE | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* locality 'locty' must be unchanged */ -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- TPM_TIS_ACCESS_PENDING_REQUEST | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* on the next loop we will have a PENDING_REQUEST flag -- set for locality 'l' */ -- pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; -- } -- -- /* higher localities can 'seize' access but not 'request use'; -- note: this will activate first l+1, then l+2 etc. */ -- for (l = locty + 1; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { -- /* try to 'request use' from 'l' */ -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -- -- /* requesting use from 'l' was not possible; we should see -- REQUEST_USE and may see PENDING_REQUEST */ -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_REQUEST_USE | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* locality 'l-1' must be unchanged; we should always -- see PENDING_REQUEST from 'l' requesting access */ -- access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- TPM_TIS_ACCESS_PENDING_REQUEST | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* try to seize from 'l' */ -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); -- -- /* seize from 'l' was possible */ -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* l - 1 should show that it has BEEN_SEIZED */ -- access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_BEEN_SEIZED | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* clear the BEEN_SEIZED flag and make sure it's gone */ -- writeb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS), -- TPM_TIS_ACCESS_BEEN_SEIZED); -- -- access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- } -- -- /* PENDING_REQUEST will not be set if locty = 0 since all localities -- were active; in case of locty = 1, locality 0 will be active -- but no PENDING_REQUEST anywhere */ -- if (locty <= 1) { -- pending_request_flag = 0; -- } -- -- /* release access from l - 1; this activates locty - 1 */ -- l--; -- -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- -- DPRINTF("%s: %d: relinquishing control on l = %d\n", -- __func__, __LINE__, l); -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), -- TPM_TIS_ACCESS_ACTIVE_LOCALITY); -- -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- for (l = locty - 1; l >= 0; l--) { -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* release this locality */ -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), -- TPM_TIS_ACCESS_ACTIVE_LOCALITY); -- -- if (l == 1) { -- pending_request_flag = 0; -- } -- } -- -- /* no locality may be active now */ -- for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- } -- } --} -- --/* -- * Test case for getting access when higher number locality relinquishes access -- */ --static void tpm_tis_test_check_access_reg_release(const void *data) --{ -- int locty, l; -- uint8_t access; -- uint8_t pending_request_flag; -- -- /* do not test locality 4 (hw only) */ -- for (locty = TPM_TIS_NUM_LOCALITIES - 2; locty >= 0; locty--) { -- pending_request_flag = 0; -- -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* request use of locality */ -- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- /* request use of all other localities */ -- for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { -- if (l == locty) { -- continue; -- } -- /* request use of locality 'l' -- we MUST see REQUEST USE and -- may see PENDING_REQUEST */ -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_REQUEST_USE | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; -- } -- /* release locality 'locty' */ -- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), -- TPM_TIS_ACCESS_ACTIVE_LOCALITY); -- /* highest locality should now be active; release it and make sure the -- next higest locality is active afterwards */ -- for (l = TPM_TIS_NUM_LOCALITIES - 2; l >= 0; l--) { -- if (l == locty) { -- continue; -- } -- /* 'l' should be active now */ -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- /* 'l' relinquishes access */ -- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), -- TPM_TIS_ACCESS_ACTIVE_LOCALITY); -- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -- DPRINTF_ACCESS; -- if (l == 1 || (locty <= 1 && l == 2)) { -- pending_request_flag = 0; -- } -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- pending_request_flag | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- } -- } --} -- --/* -- * Test case for transmitting packets -- */ --static void tpm_tis_test_check_transmit(const void *data) --{ -- const TestState *s = data; -- uint8_t access; -- uint32_t sts; -- uint16_t bcount; -- size_t i; -- -- /* request use of locality 0 */ -- writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -- access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); -- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -- TPM_TIS_ACCESS_ACTIVE_LOCALITY | -- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -- -- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -- DPRINTF_STS; -- -- g_assert_cmpint(sts & 0xff, ==, 0); -- g_assert_cmpint(sts & TPM_TIS_STS_TPM_FAMILY_MASK, ==, -- TPM_TIS_STS_TPM_FAMILY2_0); -- -- bcount = (sts >> 8) & 0xffff; -- g_assert_cmpint(bcount, >=, 128); -- -- writel(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_COMMAND_READY); -- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -- DPRINTF_STS; -- g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_COMMAND_READY); -- -- /* transmit command */ -- for (i = 0; i < sizeof(TPM_CMD); i++) { -- writeb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO), TPM_CMD[i]); -- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -- DPRINTF_STS; -- if (i < sizeof(TPM_CMD) - 1) { -- g_assert_cmpint(sts & 0xff, ==, -- TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); -- } else { -- g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_VALID); -- } -- g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); -- } -- /* start processing */ -- writeb(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_TPM_GO); -- -- uint64_t end_time = g_get_monotonic_time() + 50 * G_TIME_SPAN_SECOND; -- do { -- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -- if ((sts & TPM_TIS_STS_DATA_AVAILABLE) != 0) { -- break; -- } -- } while (g_get_monotonic_time() < end_time); -- -- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -- DPRINTF_STS; -- g_assert_cmpint(sts & 0xff, == , -- TPM_TIS_STS_VALID | TPM_TIS_STS_DATA_AVAILABLE); -- bcount = (sts >> 8) & 0xffff; -- -- /* read response */ -- uint8_t tpm_msg[sizeof(struct tpm_hdr)]; -- g_assert_cmpint(sizeof(tpm_msg), ==, bcount); -- -- for (i = 0; i < sizeof(tpm_msg); i++) { -- tpm_msg[i] = readb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO)); -- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -- DPRINTF_STS; -- if (sts & TPM_TIS_STS_DATA_AVAILABLE) { -- g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); -- } -- } -- g_assert_cmpmem(tpm_msg, sizeof(tpm_msg), s->tpm_msg, sizeof(*s->tpm_msg)); -- -- /* relinquish use of locality 0 */ -- writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_ACTIVE_LOCALITY); -- access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); --} -+uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; - - int main(int argc, char **argv) - { -diff --git a/tests/tpm-tis-util.c b/tests/tpm-tis-util.c -new file mode 100644 -index 00000000..9aff503f ---- /dev/null -+++ b/tests/tpm-tis-util.c -@@ -0,0 +1,451 @@ -+/* -+ * QTest testcase for TPM TIS: common test functions used for both -+ * the ISA and SYSBUS devices -+ * -+ * Copyright (c) 2018 Red Hat, Inc. -+ * Copyright (c) 2018 IBM Corporation -+ * -+ * Authors: -+ * Marc-André Lureau -+ * Stefan Berger -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include -+ -+#include "hw/acpi/tpm.h" -+#include "io/channel-socket.h" -+#include "libqtest.h" -+#include "qemu/module.h" -+#include "tpm-emu.h" -+#include "tpm-util.h" -+#include "tpm-tis-util.h" -+ -+#define DEBUG_TIS_TEST 0 -+ -+#define DPRINTF(fmt, ...) do { \ -+ if (DEBUG_TIS_TEST) { \ -+ printf(fmt, ## __VA_ARGS__); \ -+ } \ -+} while (0) -+ -+#define DPRINTF_ACCESS \ -+ DPRINTF("%s: %d: locty=%d l=%d access=0x%02x pending_request_flag=0x%x\n", \ -+ __func__, __LINE__, locty, l, access, pending_request_flag) -+ -+#define DPRINTF_STS \ -+ DPRINTF("%s: %d: sts = 0x%08x\n", __func__, __LINE__, sts) -+ -+static const uint8_t TPM_CMD[12] = -+ "\x80\x01\x00\x00\x00\x0c\x00\x00\x01\x44\x00\x00"; -+ -+void tpm_tis_test_check_localities(const void *data) -+{ -+ uint8_t locty; -+ uint8_t access; -+ uint32_t ifaceid; -+ uint32_t capability; -+ uint32_t didvid; -+ uint32_t rid; -+ -+ for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES; locty++) { -+ access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ capability = readl(TIS_REG(locty, TPM_TIS_REG_INTF_CAPABILITY)); -+ g_assert_cmpint(capability, ==, TPM_TIS_CAPABILITIES_SUPPORTED2_0); -+ -+ ifaceid = readl(TIS_REG(locty, TPM_TIS_REG_INTERFACE_ID)); -+ g_assert_cmpint(ifaceid, ==, TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0); -+ -+ didvid = readl(TIS_REG(locty, TPM_TIS_REG_DID_VID)); -+ g_assert_cmpint(didvid, !=, 0); -+ g_assert_cmpint(didvid, !=, 0xffffffff); -+ -+ rid = readl(TIS_REG(locty, TPM_TIS_REG_RID)); -+ g_assert_cmpint(rid, !=, 0); -+ g_assert_cmpint(rid, !=, 0xffffffff); -+ } -+} -+ -+void tpm_tis_test_check_access_reg(const void *data) -+{ -+ uint8_t locty; -+ uint8_t access; -+ -+ /* do not test locality 4 (hw only) */ -+ for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* request use of locality */ -+ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -+ -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* release access */ -+ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ } -+} -+ -+/* -+ * Test case for seizing access by a higher number locality -+ */ -+void tpm_tis_test_check_access_reg_seize(const void *data) -+{ -+ int locty, l; -+ uint8_t access; -+ uint8_t pending_request_flag; -+ -+ /* do not test locality 4 (hw only) */ -+ for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { -+ pending_request_flag = 0; -+ -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* request use of locality */ -+ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* lower localities cannot seize access */ -+ for (l = 0; l < locty; l++) { -+ /* lower locality is not active */ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* try to request use from 'l' */ -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -+ -+ /* -+ * requesting use from 'l' was not possible; -+ * we must see REQUEST_USE and possibly PENDING_REQUEST -+ */ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_REQUEST_USE | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* -+ * locality 'locty' must be unchanged; -+ * we must see PENDING_REQUEST -+ */ -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ TPM_TIS_ACCESS_PENDING_REQUEST | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* try to seize from 'l' */ -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); -+ /* seize from 'l' was not possible */ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_REQUEST_USE | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* locality 'locty' must be unchanged */ -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ TPM_TIS_ACCESS_PENDING_REQUEST | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* -+ * on the next loop we will have a PENDING_REQUEST flag -+ * set for locality 'l' -+ */ -+ pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; -+ } -+ -+ /* -+ * higher localities can 'seize' access but not 'request use'; -+ * note: this will activate first l+1, then l+2 etc. -+ */ -+ for (l = locty + 1; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { -+ /* try to 'request use' from 'l' */ -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -+ -+ /* -+ * requesting use from 'l' was not possible; we should see -+ * REQUEST_USE and may see PENDING_REQUEST -+ */ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_REQUEST_USE | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* -+ * locality 'l-1' must be unchanged; we should always -+ * see PENDING_REQUEST from 'l' requesting access -+ */ -+ access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ TPM_TIS_ACCESS_PENDING_REQUEST | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* try to seize from 'l' */ -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); -+ -+ /* seize from 'l' was possible */ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* l - 1 should show that it has BEEN_SEIZED */ -+ access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_BEEN_SEIZED | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* clear the BEEN_SEIZED flag and make sure it's gone */ -+ writeb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS), -+ TPM_TIS_ACCESS_BEEN_SEIZED); -+ -+ access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ } -+ -+ /* -+ * PENDING_REQUEST will not be set if locty = 0 since all localities -+ * were active; in case of locty = 1, locality 0 will be active -+ * but no PENDING_REQUEST anywhere -+ */ -+ if (locty <= 1) { -+ pending_request_flag = 0; -+ } -+ -+ /* release access from l - 1; this activates locty - 1 */ -+ l--; -+ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ -+ DPRINTF("%s: %d: relinquishing control on l = %d\n", -+ __func__, __LINE__, l); -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ for (l = locty - 1; l >= 0; l--) { -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* release this locality */ -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ -+ if (l == 1) { -+ pending_request_flag = 0; -+ } -+ } -+ -+ /* no locality may be active now */ -+ for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ } -+ } -+} -+ -+/* -+ * Test case for getting access when higher number locality relinquishes access -+ */ -+void tpm_tis_test_check_access_reg_release(const void *data) -+{ -+ int locty, l; -+ uint8_t access; -+ uint8_t pending_request_flag; -+ -+ /* do not test locality 4 (hw only) */ -+ for (locty = TPM_TIS_NUM_LOCALITIES - 2; locty >= 0; locty--) { -+ pending_request_flag = 0; -+ -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* request use of locality */ -+ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -+ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ /* request use of all other localities */ -+ for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { -+ if (l == locty) { -+ continue; -+ } -+ /* -+ * request use of locality 'l' -- we MUST see REQUEST USE and -+ * may see PENDING_REQUEST -+ */ -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_REQUEST_USE | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; -+ } -+ /* release locality 'locty' */ -+ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ /* -+ * highest locality should now be active; release it and make sure the -+ * next higest locality is active afterwards -+ */ -+ for (l = TPM_TIS_NUM_LOCALITIES - 2; l >= 0; l--) { -+ if (l == locty) { -+ continue; -+ } -+ /* 'l' should be active now */ -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ /* 'l' relinquishes access */ -+ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); -+ DPRINTF_ACCESS; -+ if (l == 1 || (locty <= 1 && l == 2)) { -+ pending_request_flag = 0; -+ } -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ pending_request_flag | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ } -+ } -+} -+ -+/* -+ * Test case for transmitting packets -+ */ -+void tpm_tis_test_check_transmit(const void *data) -+{ -+ const TestState *s = data; -+ uint8_t access; -+ uint32_t sts; -+ uint16_t bcount; -+ size_t i; -+ -+ /* request use of locality 0 */ -+ writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); -+ access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); -+ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY | -+ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); -+ -+ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -+ DPRINTF_STS; -+ -+ g_assert_cmpint(sts & 0xff, ==, 0); -+ g_assert_cmpint(sts & TPM_TIS_STS_TPM_FAMILY_MASK, ==, -+ TPM_TIS_STS_TPM_FAMILY2_0); -+ -+ bcount = (sts >> 8) & 0xffff; -+ g_assert_cmpint(bcount, >=, 128); -+ -+ writel(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_COMMAND_READY); -+ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -+ DPRINTF_STS; -+ g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_COMMAND_READY); -+ -+ /* transmit command */ -+ for (i = 0; i < sizeof(TPM_CMD); i++) { -+ writeb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO), TPM_CMD[i]); -+ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -+ DPRINTF_STS; -+ if (i < sizeof(TPM_CMD) - 1) { -+ g_assert_cmpint(sts & 0xff, ==, -+ TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); -+ } else { -+ g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_VALID); -+ } -+ g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); -+ } -+ /* start processing */ -+ writeb(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_TPM_GO); -+ -+ uint64_t end_time = g_get_monotonic_time() + 50 * G_TIME_SPAN_SECOND; -+ do { -+ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -+ if ((sts & TPM_TIS_STS_DATA_AVAILABLE) != 0) { -+ break; -+ } -+ } while (g_get_monotonic_time() < end_time); -+ -+ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -+ DPRINTF_STS; -+ g_assert_cmpint(sts & 0xff, == , -+ TPM_TIS_STS_VALID | TPM_TIS_STS_DATA_AVAILABLE); -+ bcount = (sts >> 8) & 0xffff; -+ -+ /* read response */ -+ uint8_t tpm_msg[sizeof(struct tpm_hdr)]; -+ g_assert_cmpint(sizeof(tpm_msg), ==, bcount); -+ -+ for (i = 0; i < sizeof(tpm_msg); i++) { -+ tpm_msg[i] = readb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO)); -+ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); -+ DPRINTF_STS; -+ if (sts & TPM_TIS_STS_DATA_AVAILABLE) { -+ g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); -+ } -+ } -+ g_assert_cmpmem(tpm_msg, sizeof(tpm_msg), s->tpm_msg, sizeof(*s->tpm_msg)); -+ -+ /* relinquish use of locality 0 */ -+ writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); -+} -diff --git a/tests/tpm-tis-util.h b/tests/tpm-tis-util.h -new file mode 100644 -index 00000000..d10efe86 ---- /dev/null -+++ b/tests/tpm-tis-util.h -@@ -0,0 +1,23 @@ -+/* -+ * QTest TPM TIS: Common test functions used for both the -+ * ISA and SYSBUS devices -+ * -+ * Copyright (c) 2018 IBM Corporation -+ * -+ * Authors: -+ * Stefan Berger -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#ifndef TESTS_TPM_TIS_UTIL_H -+#define TESTS_TPM_TIS_UTIL_H -+ -+void tpm_tis_test_check_localities(const void *data); -+void tpm_tis_test_check_access_reg(const void *data); -+void tpm_tis_test_check_access_reg_seize(const void *data); -+void tpm_tis_test_check_access_reg_release(const void *data); -+void tpm_tis_test_check_transmit(const void *data); -+ -+#endif /* TESTS_TPM_TIS_UTIL_H */ -diff --git a/tests/tpm-util.c b/tests/tpm-util.c -index 7ecdae2f..34efae8f 100644 ---- a/tests/tpm-util.c -+++ b/tests/tpm-util.c -@@ -19,9 +19,6 @@ - #include "tpm-util.h" - #include "qapi/qmp/qdict.h" - --#define TIS_REG(LOCTY, REG) \ -- (TPM_TIS_ADDR_BASE + ((LOCTY) << 12) + REG) -- - void tpm_util_crb_transfer(QTestState *s, - const unsigned char *req, size_t req_size, - unsigned char *rsp, size_t rsp_size) -diff --git a/tests/tpm-util.h b/tests/tpm-util.h -index 15e39249..3b97d690 100644 ---- a/tests/tpm-util.h -+++ b/tests/tpm-util.h -@@ -15,6 +15,11 @@ - - #include "io/channel-socket.h" - -+extern uint64_t tpm_tis_base_addr; -+ -+#define TIS_REG(LOCTY, REG) \ -+ (tpm_tis_base_addr + ((LOCTY) << 12) + REG) -+ - typedef void (tx_func)(QTestState *s, - const unsigned char *req, size_t req_size, - unsigned char *rsp, size_t rsp_size); --- -2.23.0 - diff --git a/tests-Add-bios-tests-to-arm-virt.patch b/tests-Add-bios-tests-to-arm-virt.patch deleted file mode 100644 index 025afb506017f9bc1c6fdb26df35c9534a8f3672..0000000000000000000000000000000000000000 --- a/tests-Add-bios-tests-to-arm-virt.patch +++ /dev/null @@ -1,86 +0,0 @@ -From abbcc35ccb22d81d69a28dc66b5f5d94e673a25e Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:33 +0100 -Subject: [PATCH] tests: Add bios tests to arm/virt - -This adds numamem and memhp tests for arm/virt platform. - -Signed-off-by: Shameer Kolothum -Reviewed-by: Igor Mammedov -Message-Id: <20190918130633.4872-12-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - tests/bios-tables-test.c | 49 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 49 insertions(+) - -diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c -index 53a91a8067..5e177b7155 100644 ---- a/tests/bios-tables-test.c -+++ b/tests/bios-tables-test.c -@@ -874,6 +874,53 @@ static void test_acpi_piix4_tcg_dimm_pxm(void) - test_acpi_tcg_dimm_pxm(MACHINE_PC); - } - -+static void test_acpi_virt_tcg_memhp(void) -+{ -+ test_data data = { -+ .machine = "virt", -+ .accel = "tcg", -+ .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", -+ .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", -+ .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", -+ .ram_start = 0x40000000ULL, -+ .scan_len = 256ULL * 1024 * 1024, -+ }; -+ -+ data.variant = ".memhp"; -+ test_acpi_one(" -cpu cortex-a57" -+ " -m 256M,slots=3,maxmem=1G" -+ " -object memory-backend-ram,id=ram0,size=128M" -+ " -object memory-backend-ram,id=ram1,size=128M" -+ " -numa node,memdev=ram0 -numa node,memdev=ram1" -+ " -numa dist,src=0,dst=1,val=21", -+ &data); -+ -+ free_test_data(&data); -+ -+} -+ -+static void test_acpi_virt_tcg_numamem(void) -+{ -+ test_data data = { -+ .machine = "virt", -+ .accel = "tcg", -+ .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", -+ .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", -+ .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", -+ .ram_start = 0x40000000ULL, -+ .scan_len = 128ULL * 1024 * 1024, -+ }; -+ -+ data.variant = ".numamem"; -+ test_acpi_one(" -cpu cortex-a57" -+ " -object memory-backend-ram,id=ram0,size=128M" -+ " -numa node,memdev=ram0", -+ &data); -+ -+ free_test_data(&data); -+ -+} -+ - static void test_acpi_virt_tcg(void) - { - test_data data = { -@@ -920,6 +967,8 @@ int main(int argc, char *argv[]) - qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); - } else if (strcmp(arch, "aarch64") == 0) { - qtest_add_func("acpi/virt", test_acpi_virt_tcg); -+ qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); -+ qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); - } - ret = g_test_run(); - boot_sector_cleanup(disk); --- -2.19.1 diff --git a/tests-Disable-filemonitor-testcase.patch b/tests-Disable-filemonitor-testcase.patch new file mode 100644 index 0000000000000000000000000000000000000000..a12778e836e6f24397d8258a73dc509120cb9d7b --- /dev/null +++ b/tests-Disable-filemonitor-testcase.patch @@ -0,0 +1,32 @@ +From bad33579c56b73d56e0b220c98faad7893609b85 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 18 Mar 2024 10:21:04 +0800 +Subject: [PATCH] tests: Disable filemonitor testcase + +Since filemonitor testcase requires that host kernel being a LTS version, +we cannot guarantee that on OBS system. Lets disable it by default. + +Signed-off-by: Ying Fang +Signed-off-by: Jinhao Gao +Signed-off-by: Yuan Zhang +--- + tests/unit/meson.build | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/tests/unit/meson.build b/tests/unit/meson.build +index a05d471090..598ba41bb9 100644 +--- a/tests/unit/meson.build ++++ b/tests/unit/meson.build +@@ -142,9 +142,6 @@ if have_system + 'test-vmstate': [migration, io], + 'test-yank': ['socket-helpers.c', qom, io, chardev] + } +- if config_host_data.get('CONFIG_INOTIFY1') +- tests += {'test-util-filemonitor': []} +- endif + + # Some tests: test-char, test-qdev-global-props, and test-qga, + # are not runnable under TSan due to a known issue. +-- +2.27.0 + diff --git a/tests-Disalbe-filemonitor-testcase.patch b/tests-Disalbe-filemonitor-testcase.patch deleted file mode 100644 index b389299e35dd49154f6e660ee3d66237b15ec58b..0000000000000000000000000000000000000000 --- a/tests-Disalbe-filemonitor-testcase.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 4f1eaa63065594276c11958e963377a09668d44b Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Thu, 6 Aug 2020 10:05:00 +0800 -Subject: [PATCH] tests: Disalbe filemonitor testcase - -Since filemonitor testcase requires that host kernel being a LTS version, -we cannot guarantee that on OBS system. Let's disable it by default. - -Signed-of-by: Ying Fang - -diff --git a/tests/Makefile.include b/tests/Makefile.include -index d8cf00c1..f3273ad3 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -117,7 +117,6 @@ ifneq (,$(findstring qemu-ga,$(TOOLS))) - check-unit-$(call land,$(CONFIG_LINUX),$(CONFIG_VIRTIO_SERIAL)) += tests/test-qga$(EXESUF) - endif - check-unit-y += tests/test-timed-average$(EXESUF) --check-unit-$(CONFIG_INOTIFY1) += tests/test-util-filemonitor$(EXESUF) - check-unit-y += tests/test-util-sockets$(EXESUF) - check-unit-$(CONFIG_BLOCK) += tests/test-authz-simple$(EXESUF) - check-unit-$(CONFIG_BLOCK) += tests/test-authz-list$(EXESUF) -@@ -654,8 +653,6 @@ tests/test-crypto-tlssession$(EXESUF): tests/test-crypto-tlssession.o \ - tests/crypto-tls-x509-helpers.o tests/pkix_asn1_tab.o \ - tests/crypto-tls-psk-helpers.o \ - $(test-crypto-obj-y) --tests/test-util-filemonitor$(EXESUF): tests/test-util-filemonitor.o \ -- $(test-util-obj-y) - tests/test-util-sockets$(EXESUF): tests/test-util-sockets.o \ - tests/socket-helpers.o $(test-util-obj-y) - tests/test-authz-simple$(EXESUF): tests/test-authz-simple.o $(test-authz-obj-y) --- -2.23.0 - diff --git a/tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch b/tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch deleted file mode 100644 index e739883feb04d14f33a97a2a0b6690ac6c5ccc24..0000000000000000000000000000000000000000 --- a/tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 27e2533e43f0ab2b8a60f1902f58f8752581ea9f Mon Sep 17 00:00:00 2001 -From: Shameer Kolothum -Date: Wed, 18 Sep 2019 14:06:32 +0100 -Subject: [PATCH] tests: Update ACPI tables list for upcoming arm/virt tests - -This is in preparation to add numamem and memhp tests to -arm/virt platform. The bios-tables-test-allowed-diff.h -is updated with a list of expected ACPI tables that needs to be -present in tests/data/acpi/virt folder. - -Signed-off-by: Shameer Kolothum -Message-Id: <20190918130633.4872-11-shameerali.kolothum.thodi@huawei.com> -Acked-by: Peter Maydell -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov ---- - tests/bios-tables-test-allowed-diff.h | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h -index 32a401ae35..3776dd2f3d 100644 ---- a/tests/bios-tables-test-allowed-diff.h -+++ b/tests/bios-tables-test-allowed-diff.h -@@ -1,4 +1,17 @@ - /* List of comma-separated changed AML files to ignore */ - "tests/data/acpi/virt/DSDT", -+"tests/data/acpi/virt/APIC.memhp", -+"tests/data/acpi/virt/APIC.numamem", - "tests/data/acpi/virt/DSDT.memhp", - "tests/data/acpi/virt/DSDT.numamem", -+"tests/data/acpi/virt/FACP.memhp", -+"tests/data/acpi/virt/FACP.numamem", -+"tests/data/acpi/virt/GTDT.memhp", -+"tests/data/acpi/virt/GTDT.numamem", -+"tests/data/acpi/virt/MCFG.memhp", -+"tests/data/acpi/virt/MCFG.numamem", -+"tests/data/acpi/virt/SLIT.memhp", -+"tests/data/acpi/virt/SPCR.memhp", -+"tests/data/acpi/virt/SPCR.numamem", -+"tests/data/acpi/virt/SRAT.memhp", -+"tests/data/acpi/virt/SRAT.numamem", --- -2.19.1 diff --git a/tests-Wait-for-migration-completion-on-destination-Q.patch b/tests-Wait-for-migration-completion-on-destination-Q.patch new file mode 100644 index 0000000000000000000000000000000000000000..f65d4f86e1b9233b82e3dea9b9fcb105af14a97e --- /dev/null +++ b/tests-Wait-for-migration-completion-on-destination-Q.patch @@ -0,0 +1,44 @@ +From c4d618ea0dc507084d9c1e2b61e58691a73c2cf4 Mon Sep 17 00:00:00 2001 +From: Susanooo +Date: Thu, 24 Oct 2024 10:10:34 +0800 +Subject: [PATCH] tests: Wait for migration completion on destination QEMU to + avoid failures +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Rather than waiting for the completion of migration on the source side, +wait for it on the destination QEMU side to avoid accessing the TPM TIS +memory mapped registers before QEMU could restore their state. This +error condition could be triggered on busy systems where the destination +QEMU did not have enough time to restore the TIS state while the test case +was already reading its registers. The test case was for example reading +the STS register and received an unexpected value (0xffffffff), whic +lead to a segmentation fault later on due to trying to read 0xffff bytes +from the TIS into a buffer. + +Cc: +Reported-by: Fabiano Rosas +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Stefan Berger +Signed-off-by: zhangchujun +--- + tests/qtest/tpm-tests.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qtest/tpm-tests.c b/tests/qtest/tpm-tests.c +index fb94496bbd..197714f8d9 100644 +--- a/tests/qtest/tpm-tests.c ++++ b/tests/qtest/tpm-tests.c +@@ -114,7 +114,7 @@ void tpm_test_swtpm_migration_test(const char *src_tpm_path, + sizeof(tpm_pcrread_resp)); + + tpm_util_migrate(src_qemu, uri); +- tpm_util_wait_for_migration_complete(src_qemu); ++ tpm_util_wait_for_migration_complete(dst_qemu); + + tpm_util_pcrread(dst_qemu, tx, tpm_pcrread_resp, + sizeof(tpm_pcrread_resp)); +-- +2.41.0.windows.1 + diff --git a/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch b/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6cd45895335b8bbfdcba149b7c30692b9afb0e7 --- /dev/null +++ b/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch @@ -0,0 +1,62 @@ +From cecec52ca38fa98a821c2a833e71a5fae1cc735d Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 20:10:51 +0800 +Subject: [PATCH] tests/acpi: Update expected ACPI tables for vcpu hotplug + +Update the ACPI tables for vcpu hotplug. + +Signed-off-by: Keqian Zhu +--- + tests/qtest/bios-tables-test-allowed-diff.h | 40 ------------------ + 1 files changed, 40 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index c7406e395a..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,41 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/pc/DSDT", +-"tests/data/acpi/pc/DSDT.acpierst", +-"tests/data/acpi/pc/DSDT.acpihmat", +-"tests/data/acpi/pc/DSDT.bridge", +-"tests/data/acpi/pc/DSDT.cphp", +-"tests/data/acpi/pc/DSDT.dimmpxm", +-"tests/data/acpi/pc/DSDT.hpbridge", +-"tests/data/acpi/pc/DSDT.hpbrroot", +-"tests/data/acpi/pc/DSDT.ipmikcs", +-"tests/data/acpi/pc/DSDT.memhp", +-"tests/data/acpi/pc/DSDT.nohpet", +-"tests/data/acpi/pc/DSDT.numamem", +-"tests/data/acpi/pc/DSDT.roothp", +-"tests/data/acpi/q35/DSDT", +-"tests/data/acpi/q35/DSDT.acpierst", +-"tests/data/acpi/q35/DSDT.acpihmat", +-"tests/data/acpi/q35/DSDT.acpihmat-noinitiator", +-"tests/data/acpi/q35/DSDT.applesmc", +-"tests/data/acpi/q35/DSDT.bridge", +-"tests/data/acpi/q35/DSDT.cphp", +-"tests/data/acpi/q35/DSDT.cxl", +-"tests/data/acpi/q35/DSDT.dimmpxm", +-"tests/data/acpi/q35/DSDT.ipmibt", +-"tests/data/acpi/q35/DSDT.ipmismbus", +-"tests/data/acpi/q35/DSDT.ivrs", +-"tests/data/acpi/q35/DSDT.memhp", +-"tests/data/acpi/q35/DSDT.mmio64", +-"tests/data/acpi/q35/DSDT.multi-bridge", +-"tests/data/acpi/q35/DSDT.noacpihp", +-"tests/data/acpi/q35/DSDT.nohpet", +-"tests/data/acpi/q35/DSDT.numamem", +-"tests/data/acpi/q35/DSDT.pvpanic-isa", +-"tests/data/acpi/q35/DSDT.tis.tpm12", +-"tests/data/acpi/q35/DSDT.tis.tpm2", +-"tests/data/acpi/q35/DSDT.viot", +-"tests/data/acpi/virt/DSDT", +-"tests/data/acpi/virt/DSDT.acpihmatvirt", +-"tests/data/acpi/virt/DSDT.memhp", +-"tests/data/acpi/virt/DSDT.pxb", +-"tests/data/acpi/virt/DSDT.topology", +\ No newline at end of file +-- +2.27.0 + diff --git a/tests-acpi-add-empty-files.patch b/tests-acpi-add-empty-files.patch deleted file mode 100644 index 46e51c0de0e0ba84b6edf01fb62e3005acd37697..0000000000000000000000000000000000000000 --- a/tests-acpi-add-empty-files.patch +++ /dev/null @@ -1,88 +0,0 @@ -From c943416df54931cea8b19183fd7c4f2dbd86ec72 Mon Sep 17 00:00:00 2001 -From: "Michael S. Tsirkin" -Date: Sun, 29 Sep 2019 10:54:12 -0400 -Subject: [PATCH] tests/acpi: add empty files - -Needed to make tests pass. Will replace with actual files. - -Signed-off-by: Michael S. Tsirkin ---- - tests/data/acpi/virt/APIC.memhp | 0 - tests/data/acpi/virt/APIC.numamem | 0 - tests/data/acpi/virt/DSDT.memhp | 0 - tests/data/acpi/virt/DSDT.numamem | 0 - tests/data/acpi/virt/FACP.memhp | 0 - tests/data/acpi/virt/FACP.numamem | 0 - tests/data/acpi/virt/GTDT.memhp | 0 - tests/data/acpi/virt/GTDT.numamem | 0 - tests/data/acpi/virt/MCFG.memhp | 0 - tests/data/acpi/virt/MCFG.numamem | 0 - tests/data/acpi/virt/SLIT.memhp | 0 - tests/data/acpi/virt/SPCR.memhp | 0 - tests/data/acpi/virt/SPCR.numamem | 0 - tests/data/acpi/virt/SRAT.memhp | 0 - tests/data/acpi/virt/SRAT.numamem | 0 - 15 files changed, 0 insertions(+), 0 deletions(-) - create mode 100644 tests/data/acpi/virt/APIC.memhp - create mode 100644 tests/data/acpi/virt/APIC.numamem - create mode 100644 tests/data/acpi/virt/DSDT.memhp - create mode 100644 tests/data/acpi/virt/DSDT.numamem - create mode 100644 tests/data/acpi/virt/FACP.memhp - create mode 100644 tests/data/acpi/virt/FACP.numamem - create mode 100644 tests/data/acpi/virt/GTDT.memhp - create mode 100644 tests/data/acpi/virt/GTDT.numamem - create mode 100644 tests/data/acpi/virt/MCFG.memhp - create mode 100644 tests/data/acpi/virt/MCFG.numamem - create mode 100644 tests/data/acpi/virt/SLIT.memhp - create mode 100644 tests/data/acpi/virt/SPCR.memhp - create mode 100644 tests/data/acpi/virt/SPCR.numamem - create mode 100644 tests/data/acpi/virt/SRAT.memhp - create mode 100644 tests/data/acpi/virt/SRAT.numamem - -diff --git a/tests/data/acpi/virt/APIC.memhp b/tests/data/acpi/virt/APIC.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/APIC.numamem b/tests/data/acpi/virt/APIC.numamem -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/DSDT.memhp b/tests/data/acpi/virt/DSDT.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/DSDT.numamem b/tests/data/acpi/virt/DSDT.numamem -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/FACP.memhp b/tests/data/acpi/virt/FACP.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/FACP.numamem b/tests/data/acpi/virt/FACP.numamem -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/GTDT.memhp b/tests/data/acpi/virt/GTDT.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/GTDT.numamem b/tests/data/acpi/virt/GTDT.numamem -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/MCFG.memhp b/tests/data/acpi/virt/MCFG.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/MCFG.numamem b/tests/data/acpi/virt/MCFG.numamem -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/SLIT.memhp b/tests/data/acpi/virt/SLIT.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/SPCR.memhp b/tests/data/acpi/virt/SPCR.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/SPCR.numamem b/tests/data/acpi/virt/SPCR.numamem -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/SRAT.memhp b/tests/data/acpi/virt/SRAT.memhp -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tests/data/acpi/virt/SRAT.numamem b/tests/data/acpi/virt/SRAT.numamem -new file mode 100644 -index 0000000000..e69de29bb2 --- -2.19.1 diff --git a/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch b/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch new file mode 100644 index 0000000000000000000000000000000000000000..67d9c444d4f5069d6eeb2a4ee6f4e4cce374ea88 --- /dev/null +++ b/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch @@ -0,0 +1,62 @@ +From 6cfe9afcaceb7d9fb7d54f08b2362fc654b54d12 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 17:23:18 +0800 +Subject: [PATCH] tests/acpi/bios-tables-test: Allow changes to virt/DSDT file + +Prepare to change of cpu aml. + +Signed-off-by: Keqian Zhu +--- + tests/qtest/bios-tables-test-allowed-diff.h | 40 +++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..c7406e395a 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,41 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/pc/DSDT", ++"tests/data/acpi/pc/DSDT.acpierst", ++"tests/data/acpi/pc/DSDT.acpihmat", ++"tests/data/acpi/pc/DSDT.bridge", ++"tests/data/acpi/pc/DSDT.cphp", ++"tests/data/acpi/pc/DSDT.dimmpxm", ++"tests/data/acpi/pc/DSDT.hpbridge", ++"tests/data/acpi/pc/DSDT.hpbrroot", ++"tests/data/acpi/pc/DSDT.ipmikcs", ++"tests/data/acpi/pc/DSDT.memhp", ++"tests/data/acpi/pc/DSDT.nohpet", ++"tests/data/acpi/pc/DSDT.numamem", ++"tests/data/acpi/pc/DSDT.roothp", ++"tests/data/acpi/q35/DSDT", ++"tests/data/acpi/q35/DSDT.acpierst", ++"tests/data/acpi/q35/DSDT.acpihmat", ++"tests/data/acpi/q35/DSDT.acpihmat-noinitiator", ++"tests/data/acpi/q35/DSDT.applesmc", ++"tests/data/acpi/q35/DSDT.bridge", ++"tests/data/acpi/q35/DSDT.cphp", ++"tests/data/acpi/q35/DSDT.cxl", ++"tests/data/acpi/q35/DSDT.dimmpxm", ++"tests/data/acpi/q35/DSDT.ipmibt", ++"tests/data/acpi/q35/DSDT.ipmismbus", ++"tests/data/acpi/q35/DSDT.ivrs", ++"tests/data/acpi/q35/DSDT.memhp", ++"tests/data/acpi/q35/DSDT.mmio64", ++"tests/data/acpi/q35/DSDT.multi-bridge", ++"tests/data/acpi/q35/DSDT.noacpihp", ++"tests/data/acpi/q35/DSDT.nohpet", ++"tests/data/acpi/q35/DSDT.numamem", ++"tests/data/acpi/q35/DSDT.pvpanic-isa", ++"tests/data/acpi/q35/DSDT.tis.tpm12", ++"tests/data/acpi/q35/DSDT.tis.tpm2", ++"tests/data/acpi/q35/DSDT.viot", ++"tests/data/acpi/virt/DSDT", ++"tests/data/acpi/virt/DSDT.acpihmatvirt", ++"tests/data/acpi/virt/DSDT.memhp", ++"tests/data/acpi/virt/DSDT.pxb", ++"tests/data/acpi/virt/DSDT.topology", +\ No newline at end of file +-- +2.27.0 + diff --git a/tests-allow-empty-expected-files.patch b/tests-allow-empty-expected-files.patch deleted file mode 100644 index 615fb2121d3ffd93d926fe4f3e9623ffce16eb88..0000000000000000000000000000000000000000 --- a/tests-allow-empty-expected-files.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 2ab0636e0c8fcb8b5b1b222f0d5ae7f4dfc663c5 Mon Sep 17 00:00:00 2001 -From: "Michael S. Tsirkin" -Date: Sat, 5 Oct 2019 17:09:17 -0400 -Subject: [PATCH] tests: allow empty expected files - -An empty expected file is a handy way to seed the files -without creating merge conflicts. - -Signed-off-by: Michael S. Tsirkin ---- - tests/bios-tables-test.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c -index a356ac3489..53a91a8067 100644 ---- a/tests/bios-tables-test.c -+++ b/tests/bios-tables-test.c -@@ -334,7 +334,10 @@ try_again: - g_assert(ret); - g_assert_no_error(error); - g_assert(exp_sdt.aml); -- g_assert(exp_sdt.aml_len); -+ if (!exp_sdt.aml_len) { -+ fprintf(stderr, "Warning! zero length expected file '%s'\n", -+ aml_file); -+ } - - g_array_append_val(exp_tables, exp_sdt); - } --- -2.19.1 diff --git a/tests-allow-filtering-crypto-cipher-benchmark-tests.patch b/tests-allow-filtering-crypto-cipher-benchmark-tests.patch deleted file mode 100644 index 51f6b70461b0ade8ff80b2f8ac0302546593228f..0000000000000000000000000000000000000000 --- a/tests-allow-filtering-crypto-cipher-benchmark-tests.patch +++ /dev/null @@ -1,56 +0,0 @@ -From c2a6b4b3204aef2efc39f1b59bc110b54ca24587 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 15 Oct 2019 11:19:29 +0100 -Subject: [PATCH] tests: allow filtering crypto cipher benchmark tests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add support for specifying a cipher mode and chunk size as argv to -filter which combinations are benchmarked. For example to only -benchmark XTS mode with 512 byte chunks: - - ./tests/benchmark-crypto-cipher xts 512 - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefano Garzarella -Signed-off-by: Daniel P. Berrangé ---- - tests/benchmark-crypto-cipher.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - -diff --git a/tests/benchmark-crypto-cipher.c b/tests/benchmark-crypto-cipher.c -index cb6b7200a5..53032334ec 100644 ---- a/tests/benchmark-crypto-cipher.c -+++ b/tests/benchmark-crypto-cipher.c -@@ -163,15 +163,26 @@ static void test_cipher_speed_xts_aes_256(const void *opaque) - - int main(int argc, char **argv) - { -+ char *alg = NULL; -+ char *size = NULL; - g_test_init(&argc, &argv, NULL); - g_assert(qcrypto_init(NULL) == 0); - - #define ADD_TEST(mode, cipher, keysize, chunk) \ -- g_test_add_data_func( \ -+ if ((!alg || g_str_equal(alg, #mode)) && \ -+ (!size || g_str_equal(size, #chunk))) \ -+ g_test_add_data_func( \ - "/crypto/cipher/" #mode "-" #cipher "-" #keysize "/chunk-" #chunk, \ - (void *)chunk, \ - test_cipher_speed_ ## mode ## _ ## cipher ## _ ## keysize) - -+ if (argc >= 2) { -+ alg = argv[1]; -+ } -+ if (argc >= 3) { -+ size = argv[2]; -+ } -+ - #define ADD_TESTS(chunk) \ - do { \ - ADD_TEST(ecb, aes, 128, chunk); \ --- -2.27.0 - diff --git a/tests-avocado-fix-typo-in-replay_linux.patch b/tests-avocado-fix-typo-in-replay_linux.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a59496350738cf28d84cc85ac56b81adf497129 --- /dev/null +++ b/tests-avocado-fix-typo-in-replay_linux.patch @@ -0,0 +1,36 @@ +From 0f62625a0f8b6244203fbd2838b8e9c29efc2eea Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 17 Oct 2024 10:49:54 +0800 +Subject: [PATCH] tests/avocado: fix typo in replay_linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 2d8508bbab39bf342fe80e73c0b528eb3960fa37 + +Reviewed-by: Pavel Dovgalyuk +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Signed-off-by: Alex Bennée +Message-Id: <20231211091346.14616-3-alex.bennee@linaro.org> +Signed-off-by: Zhang Jiao +--- + tests/avocado/replay_linux.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/avocado/replay_linux.py b/tests/avocado/replay_linux.py +index 270ccc1eae..e95bff3299 100644 +--- a/tests/avocado/replay_linux.py ++++ b/tests/avocado/replay_linux.py +@@ -94,7 +94,7 @@ def launch_and_wait(self, record, args, shift): + else: + vm.event_wait('SHUTDOWN', self.timeout) + vm.wait() +- logger.info('successfully fihished the replay') ++ logger.info('successfully finished the replay') + elapsed = time.time() - start_time + logger.info('elapsed time %.2f sec' % elapsed) + return elapsed +-- +2.41.0.windows.1 + diff --git a/tests-benchmark-crypto-with-fixed-data-size-not-time.patch b/tests-benchmark-crypto-with-fixed-data-size-not-time.patch deleted file mode 100644 index 8841294a8a43877948bd2c74228794aafdaf0114..0000000000000000000000000000000000000000 --- a/tests-benchmark-crypto-with-fixed-data-size-not-time.patch +++ /dev/null @@ -1,150 +0,0 @@ -From c151519a7f5c08dde9a32534bc485588a5793967 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Thu, 17 Oct 2019 14:22:19 +0100 -Subject: [PATCH] tests: benchmark crypto with fixed data size, not time period -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently the crypto benchmarks are processing data in varying chunk -sizes, over a fixed time period. This turns out to be a terrible idea -because with small chunk sizes the overhead of checking the elapsed -time on each loop iteration masks the true performance. - -Benchmarking over a fixed data size avoids the loop running any system -calls which can interfere with the performance measurements. - -Before this change - -Enc chunk 512 bytes 2283.47 MB/sec Dec chunk 512 bytes 2236.23 MB/sec OK -Enc chunk 4096 bytes 2744.97 MB/sec Dec chunk 4096 bytes 2614.71 MB/sec OK -Enc chunk 16384 bytes 2777.53 MB/sec Dec chunk 16384 bytes 2678.44 MB/sec OK -Enc chunk 65536 bytes 2809.34 MB/sec Dec chunk 65536 bytes 2699.47 MB/sec OK - -After this change - -Enc chunk 512 bytes 2058.22 MB/sec Dec chunk 512 bytes 2030.11 MB/sec OK -Enc chunk 4096 bytes 2699.27 MB/sec Dec chunk 4096 bytes 2573.78 MB/sec OK -Enc chunk 16384 bytes 2748.52 MB/sec Dec chunk 16384 bytes 2653.76 MB/sec OK -Enc chunk 65536 bytes 2814.08 MB/sec Dec chunk 65536 bytes 2712.74 MB/sec OK - -The actual crypto performance hasn't changed, which shows how -significant the mis-measurement has been for small data sizes. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefano Garzarella -Signed-off-by: Daniel P. Berrangé ---- - tests/benchmark-crypto-cipher.c | 26 ++++++++++++++------------ - tests/benchmark-crypto-hash.c | 17 +++++++++-------- - 2 files changed, 23 insertions(+), 20 deletions(-) - -diff --git a/tests/benchmark-crypto-cipher.c b/tests/benchmark-crypto-cipher.c -index 67fdf8c31d..cb6b7200a5 100644 ---- a/tests/benchmark-crypto-cipher.c -+++ b/tests/benchmark-crypto-cipher.c -@@ -21,11 +21,12 @@ static void test_cipher_speed(size_t chunk_size, - { - QCryptoCipher *cipher; - Error *err = NULL; -- double total = 0.0; - uint8_t *key = NULL, *iv = NULL; - uint8_t *plaintext = NULL, *ciphertext = NULL; - size_t nkey; - size_t niv; -+ const size_t total = 2 * GiB; -+ size_t remain; - - if (!qcrypto_cipher_supports(alg, mode)) { - return; -@@ -58,33 +59,34 @@ static void test_cipher_speed(size_t chunk_size, - &err) == 0); - - g_test_timer_start(); -- do { -+ remain = total; -+ while (remain) { - g_assert(qcrypto_cipher_encrypt(cipher, - plaintext, - ciphertext, - chunk_size, - &err) == 0); -- total += chunk_size; -- } while (g_test_timer_elapsed() < 1.0); -+ remain -= chunk_size; -+ } -+ g_test_timer_elapsed(); - -- total /= MiB; - g_print("Enc chunk %zu bytes ", chunk_size); -- g_print("%.2f MB/sec ", total / g_test_timer_last()); -+ g_print("%.2f MB/sec ", (double)total / MiB / g_test_timer_last()); - -- total = 0.0; - g_test_timer_start(); -- do { -+ remain = total; -+ while (remain) { - g_assert(qcrypto_cipher_decrypt(cipher, - plaintext, - ciphertext, - chunk_size, - &err) == 0); -- total += chunk_size; -- } while (g_test_timer_elapsed() < 1.0); -+ remain -= chunk_size; -+ } -+ g_test_timer_elapsed(); - -- total /= MiB; - g_print("Dec chunk %zu bytes ", chunk_size); -- g_print("%.2f MB/sec ", total / g_test_timer_last()); -+ g_print("%.2f MB/sec ", (double)total / MiB / g_test_timer_last()); - - qcrypto_cipher_free(cipher); - g_free(plaintext); -diff --git a/tests/benchmark-crypto-hash.c b/tests/benchmark-crypto-hash.c -index 9b6f7a9155..7f659f7323 100644 ---- a/tests/benchmark-crypto-hash.c -+++ b/tests/benchmark-crypto-hash.c -@@ -20,7 +20,8 @@ static void test_hash_speed(const void *opaque) - size_t chunk_size = (size_t)opaque; - uint8_t *in = NULL, *out = NULL; - size_t out_len = 0; -- double total = 0.0; -+ const size_t total = 2 * GiB; -+ size_t remain; - struct iovec iov; - int ret; - -@@ -31,20 +32,20 @@ static void test_hash_speed(const void *opaque) - iov.iov_len = chunk_size; - - g_test_timer_start(); -- do { -+ remain = total; -+ while (remain) { - ret = qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256, - &iov, 1, &out, &out_len, - NULL); - g_assert(ret == 0); - -- total += chunk_size; -- } while (g_test_timer_elapsed() < 5.0); -+ remain -= chunk_size; -+ } -+ g_test_timer_elapsed(); - -- total /= MiB; - g_print("sha256: "); -- g_print("Testing chunk_size %zu bytes ", chunk_size); -- g_print("done: %.2f MB in %.2f secs: ", total, g_test_timer_last()); -- g_print("%.2f MB/sec\n", total / g_test_timer_last()); -+ g_print("Hash %zu GB chunk size %zu bytes ", total / GiB, chunk_size); -+ g_print("%.2f MB/sec ", (double)total / MiB / g_test_timer_last()); - - g_free(out); - g_free(in); --- -2.27.0 - diff --git a/tests-bios-tables-test-Rename-smbios-type-4-related-.patch b/tests-bios-tables-test-Rename-smbios-type-4-related-.patch new file mode 100644 index 0000000000000000000000000000000000000000..6622fc9d05f520b8d78f8730b0a692acd1b1ccf2 --- /dev/null +++ b/tests-bios-tables-test-Rename-smbios-type-4-related-.patch @@ -0,0 +1,97 @@ +From b59b75fc9f7ed73323179305363f0c2e00613863 Mon Sep 17 00:00:00 2001 +From: Zhao Liu +Date: Tue, 28 Nov 2023 00:02:02 +0800 +Subject: [PATCH] tests: bios-tables-test: Rename smbios type 4 related test + functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In fact, type4-count, core-count, core-count2, thread-count and +thread-count2 are tested with KVM not TCG. + +Rename these test functions to reflect KVM base instead of TCG. + +Signed-off-by: Zhao Liu +Message-Id: <20231127160202.1037290-1-zhao1.liu@linux.intel.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Igor Mammedov +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + tests/qtest/bios-tables-test.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index fe6a9a8563..21811a1ab5 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1015,7 +1015,7 @@ static void test_acpi_q35_tcg(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_type4_count(void) ++static void test_acpi_q35_kvm_type4_count(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1031,7 +1031,7 @@ static void test_acpi_q35_tcg_type4_count(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_core_count(void) ++static void test_acpi_q35_kvm_core_count(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1048,7 +1048,7 @@ static void test_acpi_q35_tcg_core_count(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_core_count2(void) ++static void test_acpi_q35_kvm_core_count2(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1065,7 +1065,7 @@ static void test_acpi_q35_tcg_core_count2(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_thread_count(void) ++static void test_acpi_q35_kvm_thread_count(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1082,7 +1082,7 @@ static void test_acpi_q35_tcg_thread_count(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_thread_count2(void) ++static void test_acpi_q35_kvm_thread_count2(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -2262,15 +2262,15 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic); + qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); + qtest_add_func("acpi/q35/type4-count", +- test_acpi_q35_tcg_type4_count); ++ test_acpi_q35_kvm_type4_count); + qtest_add_func("acpi/q35/core-count", +- test_acpi_q35_tcg_core_count); ++ test_acpi_q35_kvm_core_count); + qtest_add_func("acpi/q35/core-count2", +- test_acpi_q35_tcg_core_count2); ++ test_acpi_q35_kvm_core_count2); + qtest_add_func("acpi/q35/thread-count", +- test_acpi_q35_tcg_thread_count); ++ test_acpi_q35_kvm_thread_count); + qtest_add_func("acpi/q35/thread-count2", +- test_acpi_q35_tcg_thread_count2); ++ test_acpi_q35_kvm_thread_count2); + } + if (qtest_has_device("virtio-iommu-pci")) { + qtest_add_func("acpi/q35/viot", test_acpi_q35_viot); +-- +2.27.0 + diff --git a/tests-bios-tables-test-disable-this-testcase.patch b/tests-bios-tables-test-disable-this-testcase.patch deleted file mode 100644 index 993fee935546735c16bfe9a30ff856ac135f4d53..0000000000000000000000000000000000000000 --- a/tests-bios-tables-test-disable-this-testcase.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 0814ef80cdf212c68b73fc1fbad4eeece3560ef9 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 15 Apr 2020 19:52:09 +0800 -Subject: [PATCH] tests/bios-tables-test: disable this testcase - -We will change ARM virt ACPI FACP and PPTT table in order to -support CPU topology information presentation. However our -change make this testcase fail since we changed the table -totally and we cannot apply patch with rpmbuild system. - -Signed-off-by: Ying Fang ---- - tests/Makefile.include | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/tests/Makefile.include b/tests/Makefile.include -index fd7fdb86..d8cf00c1 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -164,7 +164,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) - check-qtest-i386-y += tests/ahci-test$(EXESUF) - check-qtest-i386-y += tests/hd-geo-test$(EXESUF) - check-qtest-i386-y += tests/boot-order-test$(EXESUF) --check-qtest-i386-y += tests/bios-tables-test$(EXESUF) -+# check-qtest-i386-y += tests/bios-tables-test$(EXESUF) - check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) - check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-i386-y += tests/rtc-test$(EXESUF) -@@ -269,7 +269,7 @@ check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) - check-qtest-aarch64-y += tests/migration-test$(EXESUF) - # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make test unconditional - ifneq ($(ARCH),arm) --check-qtest-aarch64-y += tests/bios-tables-test$(EXESUF) -+#check-qtest-aarch64-y += tests/bios-tables-test$(EXESUF) - endif - - check-qtest-microblazeel-y += $(check-qtest-microblaze-y) -@@ -783,7 +783,7 @@ tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o - tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o - tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) - tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) --tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ -+#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ - tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) - tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) - tests/microbit-test$(EXESUF): tests/microbit-test.o --- -2.23.0 diff --git a/tests-bump-QOS_PATH_MAX_ELEMENT_SIZE-again.patch b/tests-bump-QOS_PATH_MAX_ELEMENT_SIZE-again.patch new file mode 100644 index 0000000000000000000000000000000000000000..3124074cb8393433d279d3c3f33abc5c18dbcb09 --- /dev/null +++ b/tests-bump-QOS_PATH_MAX_ELEMENT_SIZE-again.patch @@ -0,0 +1,43 @@ +From 8c7e606ff2e59df7be719b13f28fe629414fcb30 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Alex=20Benn=C3=A9e?= +Date: Tue, 5 Mar 2024 12:09:37 +0000 +Subject: [PATCH] tests: bump QOS_PATH_MAX_ELEMENT_SIZE again +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We "fixed" a bug with LTO builds with 100c459f194 (tests/qtest: bump +up QOS_PATH_MAX_ELEMENT_SIZE) but it seems it has triggered again. + +The array is sized according to the maximum anticipated length of a +path on the graph. However, the worst case for a depth-first search is +to push all nodes on the graph. So it's not really LTO, it depends on +the ordering of the constructors. + +Lets be more assertive raising QOS_PATH_MAX_ELEMENT_SIZE to make it go +away again. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1186 (again) +Reviewed-by: Thomas Huth +Signed-off-by: Alex Bennée +Message-Id: <20240305121005.3528075-2-alex.bennee@linaro.org> +--- + tests/qtest/libqos/qgraph.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qtest/libqos/qgraph.h b/tests/qtest/libqos/qgraph.h +index 287022a67c..1b5de02e7b 100644 +--- a/tests/qtest/libqos/qgraph.h ++++ b/tests/qtest/libqos/qgraph.h +@@ -24,7 +24,7 @@ + #include "libqos-malloc.h" + + /* maximum path length */ +-#define QOS_PATH_MAX_ELEMENT_SIZE 64 ++#define QOS_PATH_MAX_ELEMENT_SIZE 128 + + typedef struct QOSGraphObject QOSGraphObject; + typedef struct QOSGraphNode QOSGraphNode; +-- +2.41.0.windows.1 + diff --git a/tests-data-acpi-Update-DSDT-acpi-tables.patch b/tests-data-acpi-Update-DSDT-acpi-tables.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc52330a4c343dcb9f5cbfd768667ad6cceb6ccc --- /dev/null +++ b/tests-data-acpi-Update-DSDT-acpi-tables.patch @@ -0,0 +1,70 @@ +From 4a065d0fbbe159dfbc073e4480434d6889b7c5a4 Mon Sep 17 00:00:00 2001 +From: caijian +Date: Mon, 31 Mar 2025 15:03:02 +0800 +Subject: [PATCH] tests/data/acpi: Update DSDT acpi tables + +- * Disassembly of tests/data/acpi/virt/DSDT, Fri Mar 28 16:43:04 2025 ++ * Disassembly of /tmp/aml-1KF432, Fri Mar 28 16:43:04 2025 + * + * Original Table Header: + * Signature "DSDT" + * Length 0x000016B6 (5814) + * Revision 0x02 +- * Checksum 0x46 ++ * Checksum 0x47 + * OEM ID "BOCHS " + * OEM Table ID "BXPC " + * OEM Revision 0x00000001 (1) + * Compiler ID "BXPC" + * Compiler Version 0x00000001 (1) + */ + DefinitionBlock ("", "DSDT", 2, "BOCHS ", "BXPC ", 0x00000001) +@@ -2090,33 +2090,33 @@ + } + Else + { + CDW1 |= 0x04 + Return (Arg3) + } + } + + Method (_DSM, 4, NotSerialized) // _DSM: Device-Specific Method + { + If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling Interface */)) + { + If ((Arg2 == Zero)) + { + Return (Buffer (One) + { +- 0x01 // . ++ 0x00 // . + }) + } + } + + Return (Buffer (One) + { + 0x00 + }) + } + +Signed-off-by: caijian +--- + tests/qtest/bios-tables-test-allowed-diff.h | 6 ------ + 1 files changed, 6 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index e4a94bb8bd..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,7 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/microvm/DSDT.pcie", +-"tests/data/acpi/virt/DSDT", +-"tests/data/acpi/virt/DSDT.acpihmatvirt", +-"tests/data/acpi/virt/DSDT.memhp", +-"tests/data/acpi/virt/DSDT.pxb", +-"tests/data/acpi/virt/DSDT.topology", +-- +2.41.0.windows.1 + diff --git a/tests-data-acpi-virt-Update-IORT-acpi-table.patch b/tests-data-acpi-virt-Update-IORT-acpi-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..d509ccc4570e9fc300f6d99427f342aae199983a --- /dev/null +++ b/tests-data-acpi-virt-Update-IORT-acpi-table.patch @@ -0,0 +1,76 @@ +From bf12438e93f2d55aac6245f6a9f77f51b6fd2d8a Mon Sep 17 00:00:00 2001 +From: caijian +Date: Mon, 31 Mar 2025 15:06:24 +0800 +Subject: [PATCH] tests/data/acpi/virt: Update IORT acpi table + +- * Disassembly of tests/data/acpi/virt/IORT, Fri Mar 28 18:05:37 2025 ++ * Disassembly of /tmp/aml-9R3932, Fri Mar 28 18:05:37 2025 + * + * ACPI Data Table [IORT] + * + * Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue + */ + + [000h 0000 4] Signature : "IORT" [IO Remapping Table] + [004h 0004 4] Table Length : 00000080 +-[008h 0008 1] Revision : 03 +-[009h 0009 1] Checksum : B3 ++[008h 0008 1] Revision : 05 ++[009h 0009 1] Checksum : AE + [00Ah 0010 6] Oem ID : "BOCHS " + [010h 0016 8] Oem Table ID : "BXPC " + [018h 0024 4] Oem Revision : 00000001 + [01Ch 0028 4] Asl Compiler ID : "BXPC" + [020h 0032 4] Asl Compiler Revision : 00000001 +@@ -45,32 +45,32 @@ + [058h 0088 4] Cache Coherency : 00000001 + [05Ch 0092 1] Hints (decoded below) : 00 + Transient : 0 + Write Allocate : 0 + Read Allocate : 0 + Override : 0 + [05Dh 0093 2] Reserved : 0000 + [05Fh 0095 1] Memory Flags (decoded below) : 03 + Coherency : 1 + Device Attribute : 1 + [060h 0096 4] ATS Attribute : 00000000 + [064h 0100 4] PCI Segment Number : 00000000 + [068h 0104 1] Memory Size Limit : 40 + [069h 0105 3] Reserved : 000000 + + [06Ch 0108 4] Input base : 00000000 +-[070h 0112 4] ID Count : 0000FFFF ++[070h 0112 4] ID Count : 00010000 + [074h 0116 4] Output Base : 00000000 + [078h 0120 4] Output Reference : 00000030 + [07Ch 0124 4] Flags (decoded below) : 00000000 + Single Mapping : 0 + + Raw Table Data: Length 128 (0x80) + +- 0000: 49 4F 52 54 80 00 00 00 03 B3 42 4F 43 48 53 20 // IORT......BOCHS ++ 0000: 49 4F 52 54 80 00 00 00 05 AE 42 4F 43 48 53 20 // IORT......BOCHS + 0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43 // BXPC ....BXPC + 0020: 01 00 00 00 02 00 00 00 30 00 00 00 00 00 00 00 // ........0....... + 0030: 00 18 00 01 00 00 00 00 00 00 00 00 00 00 00 00 // ................ + 0040: 01 00 00 00 00 00 00 00 02 38 00 03 01 00 00 00 // .........8...... + 0050: 01 00 00 00 24 00 00 00 01 00 00 00 00 00 00 03 // ....$........... + 0060: 00 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 // ........@....... +- 0070: FF FF 00 00 00 00 00 00 30 00 00 00 00 00 00 00 // ........0....... ++ 0070: 00 00 01 00 00 00 00 00 30 00 00 00 00 00 00 00 // ........0....... + +Signed-off-by: caijian +--- + tests/qtest/bios-tables-test-allowed-diff.h | 1 - + 1 files changed, 1 deletion(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 9a5a923d6b..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,2 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/virt/IORT", +-- +2.41.0.windows.1 + diff --git a/tests-docker-update-debian-i686-and-mipsel-images-to.patch b/tests-docker-update-debian-i686-and-mipsel-images-to.patch new file mode 100644 index 0000000000000000000000000000000000000000..a06421b24027e13ab99a74d39766ef6624955de9 --- /dev/null +++ b/tests-docker-update-debian-i686-and-mipsel-images-to.patch @@ -0,0 +1,154 @@ +From ea21c12b545ad6eecded5f34472d3f226f5a2e15 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Alex=20Benn=C3=A9e?= +Date: Tue, 10 Sep 2024 18:38:52 +0100 +Subject: [PATCH] tests/docker: update debian i686 and mipsel images to + bookworm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Whatever issues there were which stopped these being updates when the +rest were have now been resolved. However mips64el continues to be +broken so don't update it here. + +Reviewed-by: Pierrick Bouvier +Reviewed-by: Richard Henderson +Signed-off-by: Alex Bennée +Message-Id: <20240910173900.4154726-3-alex.bennee@linaro.org> +(cherry picked from commit 19d2111059c87d3f58349f27b9be9dee81fc1681) +Signed-off-by: zhujun2 +--- + tests/docker/dockerfiles/debian-i686-cross.docker | 10 ++++------ + tests/docker/dockerfiles/debian-mipsel-cross.docker | 10 ++++------ + tests/lcitool/refresh | 4 ++-- + 3 files changed, 10 insertions(+), 14 deletions(-) + +diff --git a/tests/docker/dockerfiles/debian-i686-cross.docker b/tests/docker/dockerfiles/debian-i686-cross.docker +index 3fc4e15acd..e1c8e2b494 100644 +--- a/tests/docker/dockerfiles/debian-i686-cross.docker ++++ b/tests/docker/dockerfiles/debian-i686-cross.docker +@@ -1,10 +1,10 @@ + # THIS FILE WAS AUTO-GENERATED + # +-# $ lcitool dockerfile --layers all --cross-arch i686 debian-11 qemu ++# $ lcitool dockerfile --layers all --cross-arch i686 debian-12 qemu + # + # https://gitlab.com/libvirt/libvirt-ci + +-FROM docker.io/library/debian:11-slim ++FROM docker.io/library/debian:12-slim + + RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ +@@ -47,16 +47,15 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ + python3-opencv \ + python3-pillow \ + python3-pip \ +- python3-setuptools \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ +- python3-wheel \ + python3-yaml \ + rpm2cpio \ + sed \ + socat \ + sparse \ ++ swtpm \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ +@@ -67,8 +66,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ + sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ + dpkg-reconfigure locales + +-RUN /usr/bin/pip3 install tomli +- + ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" + ENV LANG "en_US.UTF-8" + ENV MAKE "/usr/bin/make" +@@ -145,6 +142,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ + libvdeplug-dev:i386 \ + libvirglrenderer-dev:i386 \ + libvte-2.91-dev:i386 \ ++ libxdp-dev:i386 \ + libzstd-dev:i386 \ + nettle-dev:i386 \ + systemtap-sdt-dev:i386 \ +diff --git a/tests/docker/dockerfiles/debian-mipsel-cross.docker b/tests/docker/dockerfiles/debian-mipsel-cross.docker +index 5fcd641f15..79ce4ae503 100644 +--- a/tests/docker/dockerfiles/debian-mipsel-cross.docker ++++ b/tests/docker/dockerfiles/debian-mipsel-cross.docker +@@ -1,10 +1,10 @@ + # THIS FILE WAS AUTO-GENERATED + # +-# $ lcitool dockerfile --layers all --cross-arch mipsel debian-11 qemu ++# $ lcitool dockerfile --layers all --cross-arch mipsel debian-12 qemu + # + # https://gitlab.com/libvirt/libvirt-ci + +-FROM docker.io/library/debian:11-slim ++FROM docker.io/library/debian:12-slim + + RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ +@@ -47,16 +47,15 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ + python3-opencv \ + python3-pillow \ + python3-pip \ +- python3-setuptools \ + python3-sphinx \ + python3-sphinx-rtd-theme \ + python3-venv \ +- python3-wheel \ + python3-yaml \ + rpm2cpio \ + sed \ + socat \ + sparse \ ++ swtpm \ + tar \ + tesseract-ocr \ + tesseract-ocr-eng \ +@@ -67,8 +66,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ + sed -Ei 's,^# (en_US\.UTF-8 .*)$,\1,' /etc/locale.gen && \ + dpkg-reconfigure locales + +-RUN /usr/bin/pip3 install tomli +- + ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers" + ENV LANG "en_US.UTF-8" + ENV MAKE "/usr/bin/make" +@@ -143,6 +140,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ + libvdeplug-dev:mipsel \ + libvirglrenderer-dev:mipsel \ + libvte-2.91-dev:mipsel \ ++ libxdp-dev:mipsel \ + libzstd-dev:mipsel \ + nettle-dev:mipsel \ + systemtap-sdt-dev:mipsel \ +diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh +index 0c93557ad6..42ed7eba1d 100755 +--- a/tests/lcitool/refresh ++++ b/tests/lcitool/refresh +@@ -159,7 +159,7 @@ try: + trailer=cross_build("arm-linux-gnueabihf-", + "arm-softmmu,arm-linux-user")) + +- generate_dockerfile("debian-i686-cross", "debian-11", ++ generate_dockerfile("debian-i686-cross", "debian-12", + cross="i686", + trailer=cross_build("x86_64-linux-gnu-", + "x86_64-softmmu," +@@ -171,7 +171,7 @@ try: + trailer=cross_build("mips64el-linux-gnuabi64-", + "mips64el-softmmu,mips64el-linux-user")) + +- generate_dockerfile("debian-mipsel-cross", "debian-11", ++ generate_dockerfile("debian-mipsel-cross", "debian-12", + cross="mipsel", + trailer=cross_build("mipsel-linux-gnu-", + "mipsel-softmmu,mipsel-linux-user")) +-- +2.41.0.windows.1 + diff --git a/tests-document-how-to-update-acpi-tables.patch b/tests-document-how-to-update-acpi-tables.patch deleted file mode 100644 index c961069b6e77c2a193b34d606466f04c7b059611..0000000000000000000000000000000000000000 --- a/tests-document-how-to-update-acpi-tables.patch +++ /dev/null @@ -1,53 +0,0 @@ -From d9642ad522d34f0d803a87654a2c258baf1070dd Mon Sep 17 00:00:00 2001 -From: "Michael S. Tsirkin" -Date: Sat, 5 Oct 2019 17:25:55 -0400 -Subject: [PATCH] tests: document how to update acpi tables - -Looks like no one understands how to do it. -Document the process. - -Signed-off-by: Michael S. Tsirkin ---- - tests/bios-tables-test.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c -index 5e177b7155..d47ee9be99 100644 ---- a/tests/bios-tables-test.c -+++ b/tests/bios-tables-test.c -@@ -10,6 +10,33 @@ - * See the COPYING file in the top-level directory. - */ - -+/* -+ * How to add or update the tests: -+ * Contributor: -+ * 1. add empty files for new tables, if any, under tests/data/acpi -+ * 2. list any changed files in tests/bios-tables-test-allowed-diff.h -+ * 3. commit the above *before* making changes that affect the tables -+ * Maintainer: -+ * After 1-3 above tests will pass but ignore differences with the expected files. -+ * You will also notice that tests/bios-tables-test-allowed-diff.h lists -+ * a bunch of files. This is your hint that you need to do the below: -+ * 4. Run -+ * make check V=1 -+ * this will produce a bunch of warnings about differences -+ * beween actual and expected ACPI tables. If you have IASL installed, -+ * they will also be disassembled so you can look at the disassembled -+ * output. If not - disassemble them yourself in any way you like. -+ * Look at the differences - make sure they make sense and match what the -+ * changes you are merging are supposed to do. -+ * -+ * 5. From build directory, run: -+ * $(SRC_PATH)/tests/data/acpi/rebuild-expected-aml.sh -+ * 6. Now commit any changes. -+ * 7. Before doing a pull request, make sure tests/bios-tables-test-allowed-diff.h -+ * is empty - this will ensure following changes to ACPI tables will -+ * be noticed. -+ */ -+ - #include "qemu/osdep.h" - #include - #include "qemu-common.h" --- -2.19.1 diff --git a/tests-libqos-Add-loongarch-virt-machine-node.patch b/tests-libqos-Add-loongarch-virt-machine-node.patch new file mode 100644 index 0000000000000000000000000000000000000000..31abdbcf06230a8a121800b2390ce906aba877ff --- /dev/null +++ b/tests-libqos-Add-loongarch-virt-machine-node.patch @@ -0,0 +1,183 @@ +From 254957f2de480901a063759d762d4b1eca5b5bb0 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Tue, 28 May 2024 16:20:53 +0800 +Subject: [PATCH 28/78] tests/libqos: Add loongarch virt machine node + +Add loongarch virt machine to the graph. It is a modified copy of +the existing riscv virtmachine in riscv-virt-machine.c + +It contains a generic-pcihost controller, and an extra function +loongarch_config_qpci_bus() to configure GPEX pci host controller +information, such as ecam and pio_base addresses. + +Also hotplug handle checking about TYPE_VIRTIO_IOMMU_PCI device is +added on loongarch virt machine, since virtio_mmu_pci device requires +it. + +Signed-off-by: Bibo Mao +Acked-by: Thomas Huth +Message-Id: <20240528082053.938564-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +Signed-off-by: Xianglai Li +--- + hw/loongarch/virt.c | 2 + + tests/qtest/libqos/loongarch-virt-machine.c | 114 ++++++++++++++++++++ + tests/qtest/libqos/meson.build | 1 + + 3 files changed, 117 insertions(+) + create mode 100644 tests/qtest/libqos/loongarch-virt-machine.c + +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 11ba879e52..f7874bccf9 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -47,6 +47,7 @@ + #include "sysemu/tpm.h" + #include "sysemu/block-backend.h" + #include "hw/block/flash.h" ++#include "hw/virtio/virtio-iommu.h" + #include "qemu/error-report.h" + + static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms) +@@ -1302,6 +1303,7 @@ static HotplugHandler *virt_get_hotplug_handler(MachineState *machine, + MachineClass *mc = MACHINE_GET_CLASS(machine); + + if (device_is_dynamic_sysbus(mc, dev) || ++ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || + memhp_type_supported(dev)) { + return HOTPLUG_HANDLER(machine); + } +diff --git a/tests/qtest/libqos/loongarch-virt-machine.c b/tests/qtest/libqos/loongarch-virt-machine.c +new file mode 100644 +index 0000000000..c12089c015 +--- /dev/null ++++ b/tests/qtest/libqos/loongarch-virt-machine.c +@@ -0,0 +1,114 @@ ++/* ++ * libqos driver framework ++ * ++ * Copyright (c) 2018 Emanuele Giuseppe Esposito ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License version 2.1 as published by the Free Software Foundation. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, see ++ */ ++ ++#include "qemu/osdep.h" ++#include "../libqtest.h" ++#include "qemu/module.h" ++#include "libqos-malloc.h" ++#include "qgraph.h" ++#include "virtio-mmio.h" ++#include "generic-pcihost.h" ++#include "hw/pci/pci_regs.h" ++ ++#define LOONGARCH_PAGE_SIZE 0x1000 ++#define LOONGARCH_VIRT_RAM_ADDR 0x100000 ++#define LOONGARCH_VIRT_RAM_SIZE 0xFF00000 ++ ++#define LOONGARCH_VIRT_PIO_BASE 0x18000000 ++#define LOONGARCH_VIRT_PCIE_PIO_OFFSET 0x4000 ++#define LOONGARCH_VIRT_PCIE_PIO_LIMIT 0x10000 ++#define LOONGARCH_VIRT_PCIE_ECAM_BASE 0x20000000 ++#define LOONGARCH_VIRT_PCIE_MMIO32_BASE 0x40000000 ++#define LOONGARCH_VIRT_PCIE_MMIO32_LIMIT 0x80000000 ++ ++typedef struct QVirtMachine QVirtMachine; ++ ++struct QVirtMachine { ++ QOSGraphObject obj; ++ QGuestAllocator alloc; ++ QVirtioMMIODevice virtio_mmio; ++ QGenericPCIHost bridge; ++}; ++ ++static void virt_destructor(QOSGraphObject *obj) ++{ ++ QVirtMachine *machine = (QVirtMachine *) obj; ++ alloc_destroy(&machine->alloc); ++} ++ ++static void *virt_get_driver(void *object, const char *interface) ++{ ++ QVirtMachine *machine = object; ++ if (!g_strcmp0(interface, "memory")) { ++ return &machine->alloc; ++ } ++ ++ fprintf(stderr, "%s not present in loongarch/virtio\n", interface); ++ g_assert_not_reached(); ++} ++ ++static QOSGraphObject *virt_get_device(void *obj, const char *device) ++{ ++ QVirtMachine *machine = obj; ++ if (!g_strcmp0(device, "generic-pcihost")) { ++ return &machine->bridge.obj; ++ } else if (!g_strcmp0(device, "virtio-mmio")) { ++ return &machine->virtio_mmio.obj; ++ } ++ ++ fprintf(stderr, "%s not present in loongarch/virt\n", device); ++ g_assert_not_reached(); ++} ++ ++static void loongarch_config_qpci_bus(QGenericPCIBus *qpci) ++{ ++ qpci->gpex_pio_base = LOONGARCH_VIRT_PIO_BASE; ++ qpci->bus.pio_alloc_ptr = LOONGARCH_VIRT_PCIE_PIO_OFFSET; ++ qpci->bus.pio_limit = LOONGARCH_VIRT_PCIE_PIO_LIMIT; ++ qpci->bus.mmio_alloc_ptr = LOONGARCH_VIRT_PCIE_MMIO32_BASE; ++ qpci->bus.mmio_limit = LOONGARCH_VIRT_PCIE_MMIO32_LIMIT; ++ qpci->ecam_alloc_ptr = LOONGARCH_VIRT_PCIE_ECAM_BASE; ++} ++ ++static void *qos_create_machine_loongarch_virt(QTestState *qts) ++{ ++ QVirtMachine *machine = g_new0(QVirtMachine, 1); ++ ++ alloc_init(&machine->alloc, 0, ++ LOONGARCH_VIRT_RAM_ADDR, ++ LOONGARCH_VIRT_RAM_ADDR + LOONGARCH_VIRT_RAM_SIZE, ++ LOONGARCH_PAGE_SIZE); ++ ++ qos_create_generic_pcihost(&machine->bridge, qts, &machine->alloc); ++ loongarch_config_qpci_bus(&machine->bridge.pci); ++ ++ machine->obj.get_device = virt_get_device; ++ machine->obj.get_driver = virt_get_driver; ++ machine->obj.destructor = virt_destructor; ++ return machine; ++} ++ ++static void virt_machine_register_nodes(void) ++{ ++ qos_node_create_machine_args("loongarch64/virt", ++ qos_create_machine_loongarch_virt, ++ " -cpu la464"); ++ qos_node_contains("loongarch64/virt", "generic-pcihost", NULL); ++} ++ ++libqos_init(virt_machine_register_nodes); +diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build +index 90aae42a22..482c9b2aab 100644 +--- a/tests/qtest/libqos/meson.build ++++ b/tests/qtest/libqos/meson.build +@@ -60,6 +60,7 @@ libqos_srcs = files( + 'arm-xilinx-zynq-a9-machine.c', + 'ppc64_pseries-machine.c', + 'x86_64_pc-machine.c', ++ 'loongarch-virt-machine.c', + ) + + if have_virtfs +-- +2.39.1 + diff --git a/tests-migration-Add-integration-test-for-qatzip-comp.patch b/tests-migration-Add-integration-test-for-qatzip-comp.patch new file mode 100644 index 0000000000000000000000000000000000000000..681124eb6f7e7c4e04eeccc60ef2f89a2ad4697e --- /dev/null +++ b/tests-migration-Add-integration-test-for-qatzip-comp.patch @@ -0,0 +1,76 @@ +From 049442961f30f504475a7cb4b4c02043a7fb3c04 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:22 -0700 +Subject: [92/99] tests/migration: Add integration test for 'qatzip' + compression method + +commit afe166d4e8bc33bc448cd573b55d0ac094187d48 upstream. + +Adds an integration test for 'qatzip'. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-6-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 7ecf4ce9a5..3385ca1f15 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2582,6 +2582,18 @@ test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, + } + #endif /* CONFIG_ZSTD */ + ++#ifdef CONFIG_QATZIP ++static void * ++test_migrate_precopy_tcp_multifd_qatzip_start(QTestState *from, ++ QTestState *to) ++{ ++ migrate_set_parameter_int(from, "multifd-qatzip-level", 2); ++ migrate_set_parameter_int(to, "multifd-qatzip-level", 2); ++ ++ return test_migrate_precopy_tcp_multifd_start_common(from, to, "qatzip"); ++} ++#endif ++ + #ifdef CONFIG_QPL + static void * + test_migrate_precopy_tcp_multifd_qpl_start(QTestState *from, +@@ -2634,6 +2646,17 @@ static void test_multifd_tcp_zstd(void) + } + #endif + ++#ifdef CONFIG_QATZIP ++static void test_multifd_tcp_qatzip(void) ++{ ++ MigrateCommon args = { ++ .listen_uri = "defer", ++ .start_hook = test_migrate_precopy_tcp_multifd_qatzip_start, ++ }; ++ test_precopy_common(&args); ++} ++#endif ++ + #ifdef CONFIG_QPL + static void test_multifd_tcp_qpl(void) + { +@@ -3531,6 +3554,10 @@ int main(int argc, char **argv) + migration_test_add("/migration/multifd/tcp/plain/zstd", + test_multifd_tcp_zstd); + #endif ++#ifdef CONFIG_QATZIP ++ migration_test_add("/migration/multifd/tcp/plain/qatzip", ++ test_multifd_tcp_qatzip); ++#endif + #ifdef CONFIG_QPL + migration_test_add("/migration/multifd/tcp/plain/qpl", + test_multifd_tcp_qpl); +-- +2.33.0 + diff --git a/tests-migration-Set-compression-level-in-migration-t.patch b/tests-migration-Set-compression-level-in-migration-t.patch new file mode 100644 index 0000000000000000000000000000000000000000..566c10d4088b8e0c4c9371c0599b7575827d24a3 --- /dev/null +++ b/tests-migration-Set-compression-level-in-migration-t.patch @@ -0,0 +1,49 @@ +From 51191c9239aee8a25428fef53fe99589e1aca711 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 1 Mar 2024 03:59:01 +0000 +Subject: [63/99] tests/migration: Set compression level in migration tests + +commit 2b571432314ab42da742fbb578f4174166ecd7f5 upstream. + +Adds calls to set compression level for `zstd` and `zlib` migration +tests, just to make sure that the calls work. + +Signed-off-by: Bryan Zhang +Link: https://lore.kernel.org/r/20240301035901.4006936-3-bryan.zhang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 13888be898..0ac5e7ddc9 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2560,6 +2560,13 @@ static void * + test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from, + QTestState *to) + { ++ /* ++ * Overloading this test to also check that set_parameter does not error. ++ * This is also done in the tests for the other compression methods. ++ */ ++ migrate_set_parameter_int(from, "multifd-zlib-level", 2); ++ migrate_set_parameter_int(to, "multifd-zlib-level", 2); ++ + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zlib"); + } + +@@ -2568,6 +2575,9 @@ static void * + test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, + QTestState *to) + { ++ migrate_set_parameter_int(from, "multifd-zstd-level", 2); ++ migrate_set_parameter_int(to, "multifd-zstd-level", 2); ++ + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zstd"); + } + #endif /* CONFIG_ZSTD */ +-- +2.33.0 + diff --git a/tests-migration-test-add-qpl-compression-test.patch b/tests-migration-test-add-qpl-compression-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c5904e5a79c2f3f76de9b48e84678dc3bf23bd8 --- /dev/null +++ b/tests-migration-test-add-qpl-compression-test.patch @@ -0,0 +1,80 @@ +From 3b4704d5856f383244b0c2a1e6c180cdcc672eb0 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:10 +0800 +Subject: [79/99] tests/migration-test: add qpl compression test + +commit 08b82d207d138173ddd334c91b387213508a6e13 upstream. + +add qpl to compression method test for multifd migration + +the qpl compression supports software path and hardware +path(IAA device), and the hardware path is used first by +default. If the hardware path is unavailable, it will +automatically fallback to the software path for testing. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Peter Xu +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas + + Conflicts: + tests/qtest/migration-test.c +[jz: resolve simple context conflict] +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 0ac5e7ddc9..16cb7993b3 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2582,6 +2582,15 @@ test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, + } + #endif /* CONFIG_ZSTD */ + ++#ifdef CONFIG_QPL ++static void * ++test_migrate_precopy_tcp_multifd_qpl_start(QTestState *from, ++ QTestState *to) ++{ ++ return test_migrate_precopy_tcp_multifd_start_common(from, to, "qpl"); ++} ++#endif /* CONFIG_QPL */ ++ + static void test_multifd_tcp_none(void) + { + MigrateCommon args = { +@@ -2617,6 +2626,17 @@ static void test_multifd_tcp_zstd(void) + } + #endif + ++#ifdef CONFIG_QPL ++static void test_multifd_tcp_qpl(void) ++{ ++ MigrateCommon args = { ++ .listen_uri = "defer", ++ .start_hook = test_migrate_precopy_tcp_multifd_qpl_start, ++ }; ++ test_precopy_common(&args); ++} ++#endif ++ + #ifdef CONFIG_GNUTLS + static void * + test_migrate_multifd_tcp_tls_psk_start_match(QTestState *from, +@@ -3492,6 +3512,10 @@ int main(int argc, char **argv) + migration_test_add("/migration/multifd/tcp/plain/zstd", + test_multifd_tcp_zstd); + #endif ++#ifdef CONFIG_QPL ++ migration_test_add("/migration/multifd/tcp/plain/qpl", ++ test_multifd_tcp_qpl); ++#endif + #ifdef CONFIG_GNUTLS + migration_test_add("/migration/multifd/tcp/tls/psk/match", + test_multifd_tcp_tls_psk_match); +-- +2.33.0 + diff --git a/tests-migration-test-add-uadk-compression-test.patch b/tests-migration-test-add-uadk-compression-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc516626a944d970cc0a5c6b0508417cc4f0a971 --- /dev/null +++ b/tests-migration-test-add-uadk-compression-test.patch @@ -0,0 +1,66 @@ +From 76db600f67d72fdb24d794954c85a902968f71ea Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:10 +0100 +Subject: [86/99] tests/migration-test: add uadk compression test + +commit c519caa825f5eba6e204bed5a464df167a5421d0 upstream. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 16cb7993b3..7ecf4ce9a5 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2590,6 +2590,14 @@ test_migrate_precopy_tcp_multifd_qpl_start(QTestState *from, + return test_migrate_precopy_tcp_multifd_start_common(from, to, "qpl"); + } + #endif /* CONFIG_QPL */ ++#ifdef CONFIG_UADK ++static void * ++test_migrate_precopy_tcp_multifd_uadk_start(QTestState *from, ++ QTestState *to) ++{ ++ return test_migrate_precopy_tcp_multifd_start_common(from, to, "uadk"); ++} ++#endif /* CONFIG_UADK */ + + static void test_multifd_tcp_none(void) + { +@@ -2637,6 +2645,17 @@ static void test_multifd_tcp_qpl(void) + } + #endif + ++#ifdef CONFIG_UADK ++static void test_multifd_tcp_uadk(void) ++{ ++ MigrateCommon args = { ++ .listen_uri = "defer", ++ .start_hook = test_migrate_precopy_tcp_multifd_uadk_start, ++ }; ++ test_precopy_common(&args); ++} ++#endif ++ + #ifdef CONFIG_GNUTLS + static void * + test_migrate_multifd_tcp_tls_psk_start_match(QTestState *from, +@@ -3516,6 +3535,10 @@ int main(int argc, char **argv) + migration_test_add("/migration/multifd/tcp/plain/qpl", + test_multifd_tcp_qpl); + #endif ++#ifdef CONFIG_UADK ++ migration_test_add("/migration/multifd/tcp/plain/uadk", ++ test_multifd_tcp_uadk); ++#endif + #ifdef CONFIG_GNUTLS + migration_test_add("/migration/multifd/tcp/tls/psk/match", + test_multifd_tcp_tls_psk_match); +-- +2.33.0 + diff --git a/tests-qemu-iotests-resolved-the-problem-that-the-108.patch b/tests-qemu-iotests-resolved-the-problem-that-the-108.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4a3c54f52e349b6d5cb2fd66b8af8dd0387e8ce --- /dev/null +++ b/tests-qemu-iotests-resolved-the-problem-that-the-108.patch @@ -0,0 +1,31 @@ +From d95cbdd8738d61b8bc7c9a1541dade42c1f48314 Mon Sep 17 00:00:00 2001 +From: adttil <2429917001@qq.com> +Date: Thu, 1 Feb 2024 21:53:58 +0800 +Subject: [PATCH] tests/qemu-iotests: resolved the problem that the 108 test + cases in the container fail + +The loop device cannot be created in the compilation environment of the +container. Therefore, a judgment condition is added to the +initialization variable loopdev to check whether loop-control exists. + +Signed-off-by: Adttil <2429917001@qq.com> +--- + tests/qemu-iotests/108 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 54e935acf2..a6fe261265 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -55,7 +55,7 @@ _supported_os Linux + _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file + + # This test either needs sudo -n losetup or FUSE exports to work +-if sudo -n losetup &>/dev/null; then ++if test -c "/dev/loop-control" && sudo -n losetup &>/dev/null; then + loopdev=true + else + loopdev=false +-- +2.27.0 + diff --git a/tests-qtest-Allow-DSDT-acpi-tables-to-change.patch b/tests-qtest-Allow-DSDT-acpi-tables-to-change.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e3356c5387828ae5fa0232d594e341461d6f465 --- /dev/null +++ b/tests-qtest-Allow-DSDT-acpi-tables-to-change.patch @@ -0,0 +1,27 @@ +From ea23e4215b332446d4964769d004f7a11caba00b Mon Sep 17 00:00:00 2001 +From: caijian +Date: Mon, 31 Mar 2025 15:02:37 +0800 +Subject: [PATCH] tests/qtest: Allow DSDT acpi tables to change + +List all DSDT files and allow them to change. + +Signed-of-by: caijian +--- + tests/qtest/bios-tables-test-allowed-diff.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..e4a94bb8bd 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,7 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/microvm/DSDT.pcie", ++"tests/data/acpi/virt/DSDT", ++"tests/data/acpi/virt/DSDT.acpihmatvirt", ++"tests/data/acpi/virt/DSDT.memhp", ++"tests/data/acpi/virt/DSDT.pxb", ++"tests/data/acpi/virt/DSDT.topology", +-- +2.41.0.windows.1 + diff --git a/tests-qtest-Allow-IORT-acpi-table-to-change.patch b/tests-qtest-Allow-IORT-acpi-table-to-change.patch new file mode 100644 index 0000000000000000000000000000000000000000..22bbe28f530de1bfb837468eba5ea89f168bf69a --- /dev/null +++ b/tests-qtest-Allow-IORT-acpi-table-to-change.patch @@ -0,0 +1,22 @@ +From ca17fd9b9e608e0a6e8a948ccf46fa020c12f510 Mon Sep 17 00:00:00 2001 +From: caijian +Date: Mon, 31 Mar 2025 15:06:13 +0800 +Subject: [PATCH] tests/qtest: Allow IORT acpi table to change + +List changed IORT file and allow it to change. + +Signed-off-by: caijian +--- + tests/qtest/bios-tables-test-allowed-diff.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..9a5a923d6b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,2 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/IORT", +-- +2.41.0.windows.1 + diff --git a/tests-qtest-Re-enable-multifd-cancel-test.patch b/tests-qtest-Re-enable-multifd-cancel-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..a62956eb590470fa91ccf1184a7026c5bfea7d61 --- /dev/null +++ b/tests-qtest-Re-enable-multifd-cancel-test.patch @@ -0,0 +1,43 @@ +From eea4f476c2c35e4153637d5efe25ce308c2aaa55 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Wed, 11 Oct 2023 15:46:04 -0300 +Subject: [14/99] tests/qtest: Re-enable multifd cancel test + +commit 75b1f88cd2dd5eeb1fd817a2f3a291c2670f9c50 upstream. + +We've found the source of flakiness in this test, so re-enable it. + +Reviewed-by: Juan Quintela +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20230606144551.24367-4-farosas@suse.de +[peterx: rebase to 2a61a6964c, to use migration_test_add()] +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 470b06bbb4..13888be898 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -3474,14 +3474,8 @@ int main(int argc, char **argv) + } + migration_test_add("/migration/multifd/tcp/plain/none", + test_multifd_tcp_none); +- /* +- * This test is flaky and sometimes fails in CI and otherwise: +- * don't run unless user opts in via environment variable. +- */ +- if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- migration_test_add("/migration/multifd/tcp/plain/cancel", +- test_multifd_tcp_cancel); +- } ++ migration_test_add("/migration/multifd/tcp/plain/cancel", ++ test_multifd_tcp_cancel); + migration_test_add("/migration/multifd/tcp/plain/zlib", + test_multifd_tcp_zlib); + #ifdef CONFIG_ZSTD +-- +2.33.0 + diff --git a/tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch b/tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch new file mode 100644 index 0000000000000000000000000000000000000000..a0de622f1091a6efa7047f5d82920dc63174fe97 --- /dev/null +++ b/tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch @@ -0,0 +1,88 @@ +From d78a7031877a343563200e875c4ef2d71522f1d0 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:43 -0300 +Subject: [12/99] tests/qtest/migration: Add a wrapper to print test names + +commit e33b6712dba206547a313a6f2608b0fd967ee558 upstream. + +Our usage of gtest results in us losing the very basic functionality +of "knowing which test failed". The issue is that gtest only prints +test names ("paths" in gtest parlance) once the test has finished, but +we use asserts in the tests and crash gtest itself before it can print +anything. We also use a final abort when the result of g_test_run is +not 0. + +Depending on how the test failed/broke we can see the function that +trigged the abort, which may be representative of the test, but it +could also just be some generic function. + +We have been relying on the primitive method of looking at the name of +the previous successful test and then looking at the code to figure +out which test should have come next. + +Add a wrapper to the test registration that does the job of printing +the test name before running. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-7-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-helpers.c | 32 ++++++++++++++++++++++++++++++++ + tests/qtest/migration-helpers.h | 1 + + 2 files changed, 33 insertions(+) + +diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c +index f1106128a9..164e09c299 100644 +--- a/tests/qtest/migration-helpers.c ++++ b/tests/qtest/migration-helpers.c +@@ -298,3 +298,35 @@ char *resolve_machine_version(const char *alias, const char *var1, + + return find_common_machine_version(machine_name, var1, var2); + } ++ ++typedef struct { ++ char *name; ++ void (*func)(void); ++} MigrationTest; ++ ++static void migration_test_destroy(gpointer data) ++{ ++ MigrationTest *test = (MigrationTest *)data; ++ ++ g_free(test->name); ++ g_free(test); ++} ++ ++static void migration_test_wrapper(const void *data) ++{ ++ MigrationTest *test = (MigrationTest *)data; ++ ++ g_test_message("Running /%s%s", qtest_get_arch(), test->name); ++ test->func(); ++} ++ ++void migration_test_add(const char *path, void (*fn)(void)) ++{ ++ MigrationTest *test = g_new0(MigrationTest, 1); ++ ++ test->func = fn; ++ test->name = g_strdup(path); ++ ++ qtest_add_data_func_full(path, test, migration_test_wrapper, ++ migration_test_destroy); ++} +diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h +index e31dc85cc7..0d9a02edc7 100644 +--- a/tests/qtest/migration-helpers.h ++++ b/tests/qtest/migration-helpers.h +@@ -47,4 +47,5 @@ char *find_common_machine_version(const char *mtype, const char *var1, + const char *var2); + char *resolve_machine_version(const char *alias, const char *var1, + const char *var2); ++void migration_test_add(const char *path, void (*fn)(void)); + #endif /* MIGRATION_HELPERS_H */ +-- +2.33.0 + diff --git a/tests-qtest-migration-Print-migration-incoming-error.patch b/tests-qtest-migration-Print-migration-incoming-error.patch new file mode 100644 index 0000000000000000000000000000000000000000..e205c82f1e69196f41902c7615c875e9acbf47da --- /dev/null +++ b/tests-qtest-migration-Print-migration-incoming-error.patch @@ -0,0 +1,39 @@ +From 20c8c77ba5e362b1bfada691b2242648d3626d5d Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:42 -0300 +Subject: [11/99] tests/qtest/migration: Print migration incoming errors + +commit 679a7382a389875c0f7835a1a409ebf4859f8410 upstream. + +We're currently just asserting when incoming migration fails. Let's +print the error message from QMP as well. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-6-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-helpers.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c +index 24fb7b3525..f1106128a9 100644 +--- a/tests/qtest/migration-helpers.c ++++ b/tests/qtest/migration-helpers.c +@@ -118,6 +118,12 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...) + + rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}", + args); ++ ++ if (!qdict_haskey(rsp, "return")) { ++ g_autoptr(GString) s = qobject_to_json_pretty(QOBJECT(rsp), true); ++ g_test_message("%s", s->str); ++ } ++ + g_assert(qdict_haskey(rsp, "return")); + qobject_unref(rsp); + +-- +2.33.0 + diff --git a/tests-qtest-migration-Use-the-new-migration_test_add.patch b/tests-qtest-migration-Use-the-new-migration_test_add.patch new file mode 100644 index 0000000000000000000000000000000000000000..7fb81eca3be1c5ec008c1617e02d0a00602223fe --- /dev/null +++ b/tests-qtest-migration-Use-the-new-migration_test_add.patch @@ -0,0 +1,308 @@ +From a26a1ea993f48dbccd0fee3812b7535531b1cc14 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:44 -0300 +Subject: [13/99] tests/qtest/migration: Use the new migration_test_add + +commit 6f0771de903bb7623dc85bbf9f94f641979daaaa upstream. + +Replace the tests registration with the new function that prints tests +names. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-8-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + tests/qtest/migration-test.c +[jz: resolve context conflicts due to live-suspend which is not backported] +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 202 ++++++++++++++++++----------------- + 1 file changed, 104 insertions(+), 98 deletions(-) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 0fbaa6a90f..470b06bbb4 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -3339,62 +3339,64 @@ int main(int argc, char **argv) + module_call_init(MODULE_INIT_QOM); + + if (has_uffd) { +- qtest_add_func("/migration/postcopy/plain", test_postcopy); +- qtest_add_func("/migration/postcopy/recovery/plain", +- test_postcopy_recovery); +- qtest_add_func("/migration/postcopy/preempt/plain", test_postcopy_preempt); +- qtest_add_func("/migration/postcopy/preempt/recovery/plain", +- test_postcopy_preempt_recovery); ++ migration_test_add("/migration/postcopy/plain", test_postcopy); ++ migration_test_add("/migration/postcopy/recovery/plain", ++ test_postcopy_recovery); ++ migration_test_add("/migration/postcopy/preempt/plain", ++ test_postcopy_preempt); ++ migration_test_add("/migration/postcopy/preempt/recovery/plain", ++ test_postcopy_preempt_recovery); + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/postcopy/compress/plain", +- test_postcopy_compress); +- qtest_add_func("/migration/postcopy/recovery/compress/plain", +- test_postcopy_recovery_compress); ++ migration_test_add("/migration/postcopy/compress/plain", ++ test_postcopy_compress); ++ migration_test_add("/migration/postcopy/recovery/compress/plain", ++ test_postcopy_recovery_compress); + } + #ifndef _WIN32 +- qtest_add_func("/migration/postcopy/recovery/double-failures", +- test_postcopy_recovery_double_fail); ++ migration_test_add("/migration/postcopy/recovery/double-failures", ++ test_postcopy_recovery_double_fail); + #endif /* _WIN32 */ +- + } + +- qtest_add_func("/migration/bad_dest", test_baddest); ++ migration_test_add("/migration/bad_dest", test_baddest); + #ifndef _WIN32 + if (!g_str_equal(arch, "s390x")) { +- qtest_add_func("/migration/analyze-script", test_analyze_script); ++ migration_test_add("/migration/analyze-script", test_analyze_script); + } + #endif +- qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain); +- qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle); ++ migration_test_add("/migration/precopy/unix/plain", ++ test_precopy_unix_plain); ++ migration_test_add("/migration/precopy/unix/xbzrle", ++ test_precopy_unix_xbzrle); + /* + * Compression fails from time to time. + * Put test here but don't enable it until everything is fixed. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/precopy/unix/compress/wait", +- test_precopy_unix_compress); +- qtest_add_func("/migration/precopy/unix/compress/nowait", +- test_precopy_unix_compress_nowait); ++ migration_test_add("/migration/precopy/unix/compress/wait", ++ test_precopy_unix_compress); ++ migration_test_add("/migration/precopy/unix/compress/nowait", ++ test_precopy_unix_compress_nowait); + } + +- qtest_add_func("/migration/precopy/file", +- test_precopy_file); +- qtest_add_func("/migration/precopy/file/offset", +- test_precopy_file_offset); +- qtest_add_func("/migration/precopy/file/offset/bad", +- test_precopy_file_offset_bad); ++ migration_test_add("/migration/precopy/file", ++ test_precopy_file); ++ migration_test_add("/migration/precopy/file/offset", ++ test_precopy_file_offset); ++ migration_test_add("/migration/precopy/file/offset/bad", ++ test_precopy_file_offset_bad); + + /* + * Our CI system has problems with shared memory. + * Don't run this test until we find a workaround. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/mode/reboot", test_mode_reboot); ++ migration_test_add("/migration/mode/reboot", test_mode_reboot); + } + + #ifdef CONFIG_GNUTLS +- qtest_add_func("/migration/precopy/unix/tls/psk", +- test_precopy_unix_tls_psk); ++ migration_test_add("/migration/precopy/unix/tls/psk", ++ test_precopy_unix_tls_psk); + + if (has_uffd) { + /* +@@ -3402,110 +3404,114 @@ int main(int argc, char **argv) + * channels are tested under precopy. Here what we want to test is the + * general postcopy path that has TLS channel enabled. + */ +- qtest_add_func("/migration/postcopy/tls/psk", test_postcopy_tls_psk); +- qtest_add_func("/migration/postcopy/recovery/tls/psk", +- test_postcopy_recovery_tls_psk); +- qtest_add_func("/migration/postcopy/preempt/tls/psk", +- test_postcopy_preempt_tls_psk); +- qtest_add_func("/migration/postcopy/preempt/recovery/tls/psk", +- test_postcopy_preempt_all); ++ migration_test_add("/migration/postcopy/tls/psk", ++ test_postcopy_tls_psk); ++ migration_test_add("/migration/postcopy/recovery/tls/psk", ++ test_postcopy_recovery_tls_psk); ++ migration_test_add("/migration/postcopy/preempt/tls/psk", ++ test_postcopy_preempt_tls_psk); ++ migration_test_add("/migration/postcopy/preempt/recovery/tls/psk", ++ test_postcopy_preempt_all); + } + #ifdef CONFIG_TASN1 +- qtest_add_func("/migration/precopy/unix/tls/x509/default-host", +- test_precopy_unix_tls_x509_default_host); +- qtest_add_func("/migration/precopy/unix/tls/x509/override-host", +- test_precopy_unix_tls_x509_override_host); ++ migration_test_add("/migration/precopy/unix/tls/x509/default-host", ++ test_precopy_unix_tls_x509_default_host); ++ migration_test_add("/migration/precopy/unix/tls/x509/override-host", ++ test_precopy_unix_tls_x509_override_host); + #endif /* CONFIG_TASN1 */ + #endif /* CONFIG_GNUTLS */ + +- qtest_add_func("/migration/precopy/tcp/plain", test_precopy_tcp_plain); ++ migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain); + +- qtest_add_func("/migration/precopy/tcp/plain/switchover-ack", +- test_precopy_tcp_switchover_ack); ++ migration_test_add("/migration/precopy/tcp/plain/switchover-ack", ++ test_precopy_tcp_switchover_ack); + + #ifdef CONFIG_GNUTLS +- qtest_add_func("/migration/precopy/tcp/tls/psk/match", +- test_precopy_tcp_tls_psk_match); +- qtest_add_func("/migration/precopy/tcp/tls/psk/mismatch", +- test_precopy_tcp_tls_psk_mismatch); ++ migration_test_add("/migration/precopy/tcp/tls/psk/match", ++ test_precopy_tcp_tls_psk_match); ++ migration_test_add("/migration/precopy/tcp/tls/psk/mismatch", ++ test_precopy_tcp_tls_psk_mismatch); + #ifdef CONFIG_TASN1 +- qtest_add_func("/migration/precopy/tcp/tls/x509/default-host", +- test_precopy_tcp_tls_x509_default_host); +- qtest_add_func("/migration/precopy/tcp/tls/x509/override-host", +- test_precopy_tcp_tls_x509_override_host); +- qtest_add_func("/migration/precopy/tcp/tls/x509/mismatch-host", +- test_precopy_tcp_tls_x509_mismatch_host); +- qtest_add_func("/migration/precopy/tcp/tls/x509/friendly-client", +- test_precopy_tcp_tls_x509_friendly_client); +- qtest_add_func("/migration/precopy/tcp/tls/x509/hostile-client", +- test_precopy_tcp_tls_x509_hostile_client); +- qtest_add_func("/migration/precopy/tcp/tls/x509/allow-anon-client", +- test_precopy_tcp_tls_x509_allow_anon_client); +- qtest_add_func("/migration/precopy/tcp/tls/x509/reject-anon-client", +- test_precopy_tcp_tls_x509_reject_anon_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/default-host", ++ test_precopy_tcp_tls_x509_default_host); ++ migration_test_add("/migration/precopy/tcp/tls/x509/override-host", ++ test_precopy_tcp_tls_x509_override_host); ++ migration_test_add("/migration/precopy/tcp/tls/x509/mismatch-host", ++ test_precopy_tcp_tls_x509_mismatch_host); ++ migration_test_add("/migration/precopy/tcp/tls/x509/friendly-client", ++ test_precopy_tcp_tls_x509_friendly_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/hostile-client", ++ test_precopy_tcp_tls_x509_hostile_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/allow-anon-client", ++ test_precopy_tcp_tls_x509_allow_anon_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/reject-anon-client", ++ test_precopy_tcp_tls_x509_reject_anon_client); + #endif /* CONFIG_TASN1 */ + #endif /* CONFIG_GNUTLS */ + +- /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */ ++ /* migration_test_add("/migration/ignore_shared", test_ignore_shared); */ + #ifndef _WIN32 +- qtest_add_func("/migration/fd_proto", test_migrate_fd_proto); ++ migration_test_add("/migration/fd_proto", test_migrate_fd_proto); + #endif +- qtest_add_func("/migration/validate_uuid", test_validate_uuid); +- qtest_add_func("/migration/validate_uuid_error", test_validate_uuid_error); +- qtest_add_func("/migration/validate_uuid_src_not_set", +- test_validate_uuid_src_not_set); +- qtest_add_func("/migration/validate_uuid_dst_not_set", +- test_validate_uuid_dst_not_set); ++ migration_test_add("/migration/validate_uuid", test_validate_uuid); ++ migration_test_add("/migration/validate_uuid_error", ++ test_validate_uuid_error); ++ migration_test_add("/migration/validate_uuid_src_not_set", ++ test_validate_uuid_src_not_set); ++ migration_test_add("/migration/validate_uuid_dst_not_set", ++ test_validate_uuid_dst_not_set); + /* + * See explanation why this test is slow on function definition + */ + if (g_test_slow()) { +- qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); ++ migration_test_add("/migration/auto_converge", ++ test_migrate_auto_converge); + if (g_str_equal(arch, "x86_64") && + has_kvm && kvm_dirty_ring_supported()) { +- qtest_add_func("/migration/dirty_limit", test_migrate_dirty_limit); ++ migration_test_add("/migration/dirty_limit", ++ test_migrate_dirty_limit); + } + } +- qtest_add_func("/migration/multifd/tcp/plain/none", +- test_multifd_tcp_none); ++ migration_test_add("/migration/multifd/tcp/plain/none", ++ test_multifd_tcp_none); + /* + * This test is flaky and sometimes fails in CI and otherwise: + * don't run unless user opts in via environment variable. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/multifd/tcp/plain/cancel", +- test_multifd_tcp_cancel); ++ migration_test_add("/migration/multifd/tcp/plain/cancel", ++ test_multifd_tcp_cancel); + } +- qtest_add_func("/migration/multifd/tcp/plain/zlib", +- test_multifd_tcp_zlib); ++ migration_test_add("/migration/multifd/tcp/plain/zlib", ++ test_multifd_tcp_zlib); + #ifdef CONFIG_ZSTD +- qtest_add_func("/migration/multifd/tcp/plain/zstd", +- test_multifd_tcp_zstd); ++ migration_test_add("/migration/multifd/tcp/plain/zstd", ++ test_multifd_tcp_zstd); + #endif + #ifdef CONFIG_GNUTLS +- qtest_add_func("/migration/multifd/tcp/tls/psk/match", +- test_multifd_tcp_tls_psk_match); +- qtest_add_func("/migration/multifd/tcp/tls/psk/mismatch", +- test_multifd_tcp_tls_psk_mismatch); ++ migration_test_add("/migration/multifd/tcp/tls/psk/match", ++ test_multifd_tcp_tls_psk_match); ++ migration_test_add("/migration/multifd/tcp/tls/psk/mismatch", ++ test_multifd_tcp_tls_psk_mismatch); + #ifdef CONFIG_TASN1 +- qtest_add_func("/migration/multifd/tcp/tls/x509/default-host", +- test_multifd_tcp_tls_x509_default_host); +- qtest_add_func("/migration/multifd/tcp/tls/x509/override-host", +- test_multifd_tcp_tls_x509_override_host); +- qtest_add_func("/migration/multifd/tcp/tls/x509/mismatch-host", +- test_multifd_tcp_tls_x509_mismatch_host); +- qtest_add_func("/migration/multifd/tcp/tls/x509/allow-anon-client", +- test_multifd_tcp_tls_x509_allow_anon_client); +- qtest_add_func("/migration/multifd/tcp/tls/x509/reject-anon-client", +- test_multifd_tcp_tls_x509_reject_anon_client); ++ migration_test_add("/migration/multifd/tcp/tls/x509/default-host", ++ test_multifd_tcp_tls_x509_default_host); ++ migration_test_add("/migration/multifd/tcp/tls/x509/override-host", ++ test_multifd_tcp_tls_x509_override_host); ++ migration_test_add("/migration/multifd/tcp/tls/x509/mismatch-host", ++ test_multifd_tcp_tls_x509_mismatch_host); ++ migration_test_add("/migration/multifd/tcp/tls/x509/allow-anon-client", ++ test_multifd_tcp_tls_x509_allow_anon_client); ++ migration_test_add("/migration/multifd/tcp/tls/x509/reject-anon-client", ++ test_multifd_tcp_tls_x509_reject_anon_client); + #endif /* CONFIG_TASN1 */ + #endif /* CONFIG_GNUTLS */ + + if (g_str_equal(arch, "x86_64") && has_kvm && kvm_dirty_ring_supported()) { +- qtest_add_func("/migration/dirty_ring", +- test_precopy_unix_dirty_ring); +- qtest_add_func("/migration/vcpu_dirty_limit", +- test_vcpu_dirty_limit); ++ migration_test_add("/migration/dirty_ring", ++ test_precopy_unix_dirty_ring); ++ migration_test_add("/migration/vcpu_dirty_limit", ++ test_vcpu_dirty_limit); + } + + ret = g_test_run(); +-- +2.33.0 + diff --git a/tests-unit-test-char-Avoid-using-g_alloca.patch b/tests-unit-test-char-Avoid-using-g_alloca.patch new file mode 100644 index 0000000000000000000000000000000000000000..e28323ec53a6963bec8784437526bd9eceeb358a --- /dev/null +++ b/tests-unit-test-char-Avoid-using-g_alloca.patch @@ -0,0 +1,46 @@ +From 693b6555bb16c82ec8fefa50263b0e8fcdc54cdc Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 14 Jun 2025 15:59:16 +0800 +Subject: [PATCH] tests/unit/test-char: Avoid using g_alloca() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + tests/unit/test-char: Avoid using g_alloca() + + Do not use g_alloca(), simply allocate the CharBackend + structure on the stack. + + Signed-off-by: Philippe Mathieu-Daudé + Reviewed-by: Pierrick Bouvier + Reviewed-by: Stefan Hajnoczi + + Signed-off-by: dinglimin +--- + tests/unit/test-char.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/tests/unit/test-char.c b/tests/unit/test-char.c +index 649fdf64e1..0cb2633190 100644 +--- a/tests/unit/test-char.c ++++ b/tests/unit/test-char.c +@@ -574,7 +574,7 @@ static void char_udp_test_internal(Chardev *reuse_chr, int sock) + struct sockaddr_in other; + SocketIdleData d = { 0, }; + Chardev *chr; +- CharBackend *be; ++ CharBackend stack_be, *be = &stack_be; + socklen_t alen = sizeof(other); + int ret; + char buf[10]; +@@ -590,7 +590,6 @@ static void char_udp_test_internal(Chardev *reuse_chr, int sock) + chr = qemu_chr_new("client", tmp, NULL); + g_assert_nonnull(chr); + +- be = g_alloca(sizeof(CharBackend)); + qemu_chr_fe_init(be, chr, &error_abort); + } + +-- +2.33.0 + diff --git a/tests-virt-Allow-changes-to-PPTT-test-table.patch b/tests-virt-Allow-changes-to-PPTT-test-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9cb5f6893a4ad9a7e80437b9a778abdb259bfae --- /dev/null +++ b/tests-virt-Allow-changes-to-PPTT-test-table.patch @@ -0,0 +1,25 @@ +From 3402740cb4f6d6b9baabfde0a7667b4990b010a5 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 30 Mar 2024 19:21:59 +0800 +Subject: [PATCH] tests: virt: Allow changes to PPTT test table + +Allow changes to test/data/acpi/virt/PPTT*, prepare to change the +building policy of the cluster topology. + +Signed-off-by: Kunkun Jiang +--- + tests/qtest/bios-tables-test-allowed-diff.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..18d02a710d 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,4 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/PPTT", ++"tests/data/acpi/virt/PPTT.acpihmatvirt", ++"tests/data/acpi/virt/PPTT.topology", +-- +2.27.0 + diff --git a/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch b/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..6cb5a114d1d72fba550886fe5f58ea0ba1954ffd --- /dev/null +++ b/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch @@ -0,0 +1,25 @@ +From b062e2f182af4c44fbd3a03eda9c934686037032 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 30 Mar 2024 20:16:32 +0800 +Subject: [PATCH] tests: virt: Update expected ACPI tables for virt test + +Update the ACPI tables according to the acpi aml_build change, also +empty bios-tables-test-allowed-diff.h. + +Signed-off-by: Kunkun Jiang +--- + tests/qtest/bios-tables-test-allowed-diff.h | 3 --- + 1 files changed, 3 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 18d02a710d..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,4 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/virt/PPTT", +-"tests/data/acpi/virt/PPTT.acpihmatvirt", +-"tests/data/acpi/virt/PPTT.topology", +-- +2.27.0 + diff --git a/tpm-Add-the-SysBus-TPM-TIS-device.patch b/tpm-Add-the-SysBus-TPM-TIS-device.patch deleted file mode 100644 index e0a6254025932eb942b3a15d16b66d4808a33f42..0000000000000000000000000000000000000000 --- a/tpm-Add-the-SysBus-TPM-TIS-device.patch +++ /dev/null @@ -1,231 +0,0 @@ -From 4fe655326eeae322b621dcc25c53af722d2e1afa Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Tue, 11 Aug 2020 11:23:34 +0800 -Subject: [PATCH 14/19] tpm: Add the SysBus TPM TIS device - -Introduce the tpm-tis-device which is a sysbus device -and is bound to be used on ARM. - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Tested-by: Ard Biesheuvel -Acked-by: Ard Biesheuvel -Message-id: 20200305165149.618-6-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - hw/tpm/Kconfig | 5 ++ - hw/tpm/Makefile.objs | 1 + - hw/tpm/tpm_tis_sysbus.c | 159 ++++++++++++++++++++++++++++++++++++++++ - include/sysemu/tpm.h | 1 + - 4 files changed, 166 insertions(+) - create mode 100644 hw/tpm/tpm_tis_sysbus.c - -diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig -index 686f8206..4794e7fe 100644 ---- a/hw/tpm/Kconfig -+++ b/hw/tpm/Kconfig -@@ -7,6 +7,11 @@ config TPM_TIS_ISA - depends on TPM && ISA_BUS - select TPM_TIS - -+config TPM_TIS_SYSBUS -+ bool -+ depends on TPM -+ select TPM_TIS -+ - config TPM_TIS - bool - depends on TPM -diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs -index 3ef2036c..f1ec4beb 100644 ---- a/hw/tpm/Makefile.objs -+++ b/hw/tpm/Makefile.objs -@@ -1,6 +1,7 @@ - common-obj-$(CONFIG_TPM) += tpm_util.o - obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o - common-obj-$(CONFIG_TPM_TIS_ISA) += tpm_tis_isa.o -+common-obj-$(CONFIG_TPM_TIS_SYSBUS) += tpm_tis_sysbus.o - common-obj-$(CONFIG_TPM_TIS) += tpm_tis_common.o - common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o - common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o -diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c -new file mode 100644 -index 00000000..18c02aed ---- /dev/null -+++ b/hw/tpm/tpm_tis_sysbus.c -@@ -0,0 +1,159 @@ -+/* -+ * tpm_tis_sysbus.c - QEMU's TPM TIS SYSBUS Device -+ * -+ * Copyright (C) 2006,2010-2013 IBM Corporation -+ * -+ * Authors: -+ * Stefan Berger -+ * David Safford -+ * -+ * Xen 4 support: Andrease Niederl -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ * -+ * Implementation of the TIS interface according to specs found at -+ * http://www.trustedcomputinggroup.org. This implementation currently -+ * supports version 1.3, 21 March 2013 -+ * In the developers menu choose the PC Client section then find the TIS -+ * specification. -+ * -+ * TPM TIS for TPM 2 implementation following TCG PC Client Platform -+ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 -+ */ -+ -+#include "qemu/osdep.h" -+#include "hw/qdev-properties.h" -+#include "migration/vmstate.h" -+#include "tpm_util.h" -+#include "hw/sysbus.h" -+#include "tpm_tis.h" -+ -+typedef struct TPMStateSysBus { -+ /*< private >*/ -+ SysBusDevice parent_obj; -+ -+ /*< public >*/ -+ TPMState state; /* not a QOM object */ -+} TPMStateSysBus; -+ -+#define TPM_TIS_SYSBUS(obj) OBJECT_CHECK(TPMStateSysBus, (obj), TYPE_TPM_TIS_SYSBUS) -+ -+static int tpm_tis_pre_save_sysbus(void *opaque) -+{ -+ TPMStateSysBus *sbdev = opaque; -+ -+ return tpm_tis_pre_save(&sbdev->state); -+} -+ -+static const VMStateDescription vmstate_tpm_tis_sysbus = { -+ .name = "tpm-tis", -+ .version_id = 0, -+ .pre_save = tpm_tis_pre_save_sysbus, -+ .fields = (VMStateField[]) { -+ VMSTATE_BUFFER(state.buffer, TPMStateSysBus), -+ VMSTATE_UINT16(state.rw_offset, TPMStateSysBus), -+ VMSTATE_UINT8(state.active_locty, TPMStateSysBus), -+ VMSTATE_UINT8(state.aborting_locty, TPMStateSysBus), -+ VMSTATE_UINT8(state.next_locty, TPMStateSysBus), -+ -+ VMSTATE_STRUCT_ARRAY(state.loc, TPMStateSysBus, TPM_TIS_NUM_LOCALITIES, -+ 0, vmstate_locty, TPMLocality), -+ -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ -+static void tpm_tis_sysbus_request_completed(TPMIf *ti, int ret) -+{ -+ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(ti); -+ TPMState *s = &sbdev->state; -+ -+ tpm_tis_request_completed(s, ret); -+} -+ -+static enum TPMVersion tpm_tis_sysbus_get_tpm_version(TPMIf *ti) -+{ -+ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(ti); -+ TPMState *s = &sbdev->state; -+ -+ return tpm_tis_get_tpm_version(s); -+} -+ -+static void tpm_tis_sysbus_reset(DeviceState *dev) -+{ -+ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(dev); -+ TPMState *s = &sbdev->state; -+ -+ return tpm_tis_reset(s); -+} -+ -+static Property tpm_tis_sysbus_properties[] = { -+ DEFINE_PROP_UINT32("irq", TPMStateSysBus, state.irq_num, TPM_TIS_IRQ), -+ DEFINE_PROP_TPMBE("tpmdev", TPMStateSysBus, state.be_driver), -+ DEFINE_PROP_BOOL("ppi", TPMStateSysBus, state.ppi_enabled, true), -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ -+static void tpm_tis_sysbus_initfn(Object *obj) -+{ -+ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(obj); -+ TPMState *s = &sbdev->state; -+ -+ memory_region_init_io(&s->mmio, obj, &tpm_tis_memory_ops, -+ s, "tpm-tis-mmio", -+ TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); -+ -+ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); -+ sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); -+} -+ -+static void tpm_tis_sysbus_realizefn(DeviceState *dev, Error **errp) -+{ -+ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(dev); -+ TPMState *s = &sbdev->state; -+ -+ if (!tpm_find()) { -+ error_setg(errp, "at most one TPM device is permitted"); -+ return; -+ } -+ -+ if (!s->be_driver) { -+ error_setg(errp, "'tpmdev' property is required"); -+ return; -+ } -+} -+ -+static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(klass); -+ TPMIfClass *tc = TPM_IF_CLASS(klass); -+ -+ dc->props = tpm_tis_sysbus_properties; -+ dc->vmsd = &vmstate_tpm_tis_sysbus; -+ tc->model = TPM_MODEL_TPM_TIS; -+ dc->realize = tpm_tis_sysbus_realizefn; -+ dc->user_creatable = true; -+ dc->reset = tpm_tis_sysbus_reset; -+ tc->request_completed = tpm_tis_sysbus_request_completed; -+ tc->get_version = tpm_tis_sysbus_get_tpm_version; -+} -+ -+static const TypeInfo tpm_tis_sysbus_info = { -+ .name = TYPE_TPM_TIS_SYSBUS, -+ .parent = TYPE_SYS_BUS_DEVICE, -+ .instance_size = sizeof(TPMStateSysBus), -+ .instance_init = tpm_tis_sysbus_initfn, -+ .class_init = tpm_tis_sysbus_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_TPM_IF }, -+ { } -+ } -+}; -+ -+static void tpm_tis_sysbus_register(void) -+{ -+ type_register_static(&tpm_tis_sysbus_info); -+} -+ -+type_init(tpm_tis_sysbus_register) -diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h -index 1691b92c..f37851b1 100644 ---- a/include/sysemu/tpm.h -+++ b/include/sysemu/tpm.h -@@ -44,6 +44,7 @@ typedef struct TPMIfClass { - } TPMIfClass; - - #define TYPE_TPM_TIS_ISA "tpm-tis" -+#define TYPE_TPM_TIS_SYSBUS "tpm-tis-device" - #define TYPE_TPM_CRB "tpm-crb" - #define TYPE_TPM_SPAPR "tpm-spapr" - --- -2.23.0 - diff --git a/tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch b/tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch deleted file mode 100644 index 93139b5e7f8284cecf6faa9930eaa8e802db13d9..0000000000000000000000000000000000000000 --- a/tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch +++ /dev/null @@ -1,146 +0,0 @@ -From c6cf45f38cb6e28cf4db42296fedcd5f26ca610b Mon Sep 17 00:00:00 2001 -From: Stefan Berger -Date: Tue, 21 Jan 2020 10:29:30 -0500 -Subject: [PATCH 03/19] tpm: Move tpm_tis_show_buffer to tpm_util.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: Stefan Berger -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: David Gibson -Message-Id: <20200121152935.649898-2-stefanb@linux.ibm.com> -Signed-off-by: David Gibson -Signed-off-by: jiangfangjie ---- - hw/tpm/tpm_tis.c | 32 ++++---------------------------- - hw/tpm/tpm_util.c | 25 +++++++++++++++++++++++++ - hw/tpm/tpm_util.h | 3 +++ - hw/tpm/trace-events | 2 +- - 4 files changed, 33 insertions(+), 29 deletions(-) - -diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c -index d6b32128..96a9ac48 100644 ---- a/hw/tpm/tpm_tis.c -+++ b/hw/tpm/tpm_tis.c -@@ -104,30 +104,6 @@ static uint8_t tpm_tis_locality_from_addr(hwaddr addr) - return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7); - } - --static void tpm_tis_show_buffer(const unsigned char *buffer, -- size_t buffer_size, const char *string) --{ -- size_t len, i; -- char *line_buffer, *p; -- -- len = MIN(tpm_cmd_get_size(buffer), buffer_size); -- -- /* -- * allocate enough room for 3 chars per buffer entry plus a -- * newline after every 16 chars and a final null terminator. -- */ -- line_buffer = g_malloc(len * 3 + (len / 16) + 1); -- -- for (i = 0, p = line_buffer; i < len; i++) { -- if (i && !(i % 16)) { -- p += sprintf(p, "\n"); -- } -- p += sprintf(p, "%.2X ", buffer[i]); -- } -- trace_tpm_tis_show_buffer(string, len, line_buffer); -- -- g_free(line_buffer); --} - - /* - * Set the given flags in the STS register by clearing the register but -@@ -153,8 +129,8 @@ static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags) - */ - static void tpm_tis_tpm_send(TPMState *s, uint8_t locty) - { -- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { -- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); -+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { -+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); - } - - /* -@@ -322,8 +298,8 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret) - s->loc[locty].state = TPM_TIS_STATE_COMPLETION; - s->rw_offset = 0; - -- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { -- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); -+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { -+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); - } - - if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) { -diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c -index ee41757e..8643eb50 100644 ---- a/hw/tpm/tpm_util.c -+++ b/hw/tpm/tpm_util.c -@@ -350,3 +350,28 @@ void tpm_sized_buffer_reset(TPMSizedBuffer *tsb) - tsb->buffer = NULL; - tsb->size = 0; - } -+ -+void tpm_util_show_buffer(const unsigned char *buffer, -+ size_t buffer_size, const char *string) -+{ -+ size_t len, i; -+ char *line_buffer, *p; -+ -+ len = MIN(tpm_cmd_get_size(buffer), buffer_size); -+ -+ /* -+ * allocate enough room for 3 chars per buffer entry plus a -+ * newline after every 16 chars and a final null terminator. -+ */ -+ line_buffer = g_malloc(len * 3 + (len / 16) + 1); -+ -+ for (i = 0, p = line_buffer; i < len; i++) { -+ if (i && !(i % 16)) { -+ p += sprintf(p, "\n"); -+ } -+ p += sprintf(p, "%.2X ", buffer[i]); -+ } -+ trace_tpm_util_show_buffer(string, len, line_buffer); -+ -+ g_free(line_buffer); -+} -diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h -index f397ac21..7889081f 100644 ---- a/hw/tpm/tpm_util.h -+++ b/hw/tpm/tpm_util.h -@@ -79,4 +79,7 @@ typedef struct TPMSizedBuffer { - - void tpm_sized_buffer_reset(TPMSizedBuffer *tsb); - -+void tpm_util_show_buffer(const unsigned char *buffer, -+ size_t buffer_size, const char *string); -+ - #endif /* TPM_TPM_UTIL_H */ -diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events -index 0b94aa15..82c45ee5 100644 ---- a/hw/tpm/trace-events -+++ b/hw/tpm/trace-events -@@ -14,6 +14,7 @@ tpm_util_get_buffer_size_len(uint32_t len, size_t expected) "tpm_resp->len = %u, - tpm_util_get_buffer_size_hdr_len2(uint32_t len, size_t expected) "tpm2_resp->hdr.len = %u, expected = %zu" - tpm_util_get_buffer_size_len2(uint32_t len, size_t expected) "tpm2_resp->len = %u, expected = %zu" - tpm_util_get_buffer_size(size_t len) "buffersize of device: %zu" -+tpm_util_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" - - # tpm_emulator.c - tpm_emulator_set_locality(uint8_t locty) "setting locality to %d" -@@ -36,7 +37,6 @@ tpm_emulator_pre_save(void) "" - tpm_emulator_inst_init(void) "" - - # tpm_tis.c --tpm_tis_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\nbuf: %s" - tpm_tis_raise_irq(uint32_t irqmask) "Raising IRQ for flag 0x%08x" - tpm_tis_new_active_locality(uint8_t locty) "Active locality is now %d" - tpm_tis_abort(uint8_t locty) "New active locality is %d" --- -2.23.0 - diff --git a/tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch b/tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch deleted file mode 100644 index 97dcaa000b251bd8a4390a5c68e75011aef9401f..0000000000000000000000000000000000000000 --- a/tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 1eca7dbacabbc8ccc737f320839e7800fef5dfa1 Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Tue, 11 Aug 2020 12:42:31 +0800 -Subject: [PATCH 13/19] tpm: Separate TPM_TIS and TPM_TIS_ISA configs - MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 - Content-Transfer-Encoding: 8bit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Let's separate the compilation of tpm_tis_common.c from -the compilation of tpm_tis_isa.c - -The common part will be also compiled along with the -tpm_tis_sysbus device. - -Signed-off-by: Eric Auger -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Ard Biesheuvel -Acked-by: Ard Biesheuvel -Message-id: 20200305165149.618-5-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - default-configs/i386-softmmu.mak | 2 +- - hw/i386/Kconfig | 2 +- - hw/tpm/Kconfig | 7 ++++++- - hw/tpm/Makefile.objs | 3 ++- - tests/Makefile.include | 4 ++-- - 5 files changed, 12 insertions(+), 6 deletions(-) - -diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak -index cd5ea391..bdeef670 100644 ---- a/default-configs/i386-softmmu.mak -+++ b/default-configs/i386-softmmu.mak -@@ -17,7 +17,7 @@ - #CONFIG_SGA=n - #CONFIG_TEST_DEVICES=n - #CONFIG_TPM_CRB=n --#CONFIG_TPM_TIS=n -+#CONFIG_TPM_TIS_ISA=n - #CONFIG_VTD=n - - # Boards: -diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig -index 63504380..60334504 100644 ---- a/hw/i386/Kconfig -+++ b/hw/i386/Kconfig -@@ -17,7 +17,7 @@ config PC - imply SGA - imply TEST_DEVICES - imply TPM_CRB -- imply TPM_TIS -+ imply TPM_TIS_ISA - imply VGA_PCI - imply VIRTIO_VGA - select FDC -diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig -index 9e67d990..686f8206 100644 ---- a/hw/tpm/Kconfig -+++ b/hw/tpm/Kconfig -@@ -2,9 +2,14 @@ config TPMDEV - bool - depends on TPM - --config TPM_TIS -+config TPM_TIS_ISA - bool - depends on TPM && ISA_BUS -+ select TPM_TIS -+ -+config TPM_TIS -+ bool -+ depends on TPM - select TPMDEV - - config TPM_CRB -diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs -index fcc4c2f2..3ef2036c 100644 ---- a/hw/tpm/Makefile.objs -+++ b/hw/tpm/Makefile.objs -@@ -1,6 +1,7 @@ - common-obj-$(CONFIG_TPM) += tpm_util.o - obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o --common-obj-$(CONFIG_TPM_TIS) += tpm_tis_isa.o tpm_tis_common.o -+common-obj-$(CONFIG_TPM_TIS_ISA) += tpm_tis_isa.o -+common-obj-$(CONFIG_TPM_TIS) += tpm_tis_common.o - common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o - common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o - common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o -diff --git a/tests/Makefile.include b/tests/Makefile.include -index f3273ad3..c151de64 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -190,8 +190,8 @@ check-qtest-i386-y += tests/q35-test$(EXESUF) - check-qtest-i386-y += tests/vmgenid-test$(EXESUF) - check-qtest-i386-$(CONFIG_TPM_CRB) += tests/tpm-crb-swtpm-test$(EXESUF) - check-qtest-i386-$(CONFIG_TPM_CRB) += tests/tpm-crb-test$(EXESUF) --check-qtest-i386-$(CONFIG_TPM_TIS) += tests/tpm-tis-swtpm-test$(EXESUF) --check-qtest-i386-$(CONFIG_TPM_TIS) += tests/tpm-tis-test$(EXESUF) -+check-qtest-i386-$(CONFIG_TPM_TIS_ISA) += tests/tpm-tis-swtpm-test$(EXESUF) -+check-qtest-i386-$(CONFIG_TPM_TIS_ISA) += tests/tpm-tis-test$(EXESUF) - check-qtest-i386-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) - check-qtest-i386-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) - check-qtest-i386-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) --- -2.23.0 - diff --git a/tpm-Separate-tpm_tis-common-functions-from-isa-code.patch b/tpm-Separate-tpm_tis-common-functions-from-isa-code.patch deleted file mode 100644 index 32f180c98d784b1478268a768b4caed6c8a3fa23..0000000000000000000000000000000000000000 --- a/tpm-Separate-tpm_tis-common-functions-from-isa-code.patch +++ /dev/null @@ -1,1194 +0,0 @@ -From 425f6bc8392c71d2f29b572d19232785d0ab0b73 Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Tue, 11 Aug 2020 02:55:35 +0000 -Subject: [PATCH 12/19] tpm: Separate tpm_tis common functions from isa code - -Move the device agnostic code into tpm_tis_common.c and -put the ISA device specific code into tpm_tis_isa.c - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Tested-by: Ard Biesheuvel -Acked-by: Ard Biesheuvel -Message-id: 20200305165149.618-4-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - hw/tpm/Makefile.objs | 2 +- - hw/tpm/{tpm_tis.c => tpm_tis.c.orig} | 0 - hw/tpm/tpm_tis.h | 91 +++ - hw/tpm/tpm_tis_common.c | 869 +++++++++++++++++++++++++++ - hw/tpm/tpm_tis_isa.c | 170 ++++++ - 5 files changed, 1131 insertions(+), 1 deletion(-) - rename hw/tpm/{tpm_tis.c => tpm_tis.c.orig} (100%) - create mode 100644 hw/tpm/tpm_tis.h - create mode 100644 hw/tpm/tpm_tis_common.c - create mode 100644 hw/tpm/tpm_tis_isa.c - -diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs -index 85eb99ae..fcc4c2f2 100644 ---- a/hw/tpm/Makefile.objs -+++ b/hw/tpm/Makefile.objs -@@ -1,6 +1,6 @@ - common-obj-$(CONFIG_TPM) += tpm_util.o - obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o --common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o -+common-obj-$(CONFIG_TPM_TIS) += tpm_tis_isa.o tpm_tis_common.o - common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o - common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o - common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o -diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c.orig -similarity index 100% -rename from hw/tpm/tpm_tis.c -rename to hw/tpm/tpm_tis.c.orig -diff --git a/hw/tpm/tpm_tis.h b/hw/tpm/tpm_tis.h -new file mode 100644 -index 00000000..55549893 ---- /dev/null -+++ b/hw/tpm/tpm_tis.h -@@ -0,0 +1,91 @@ -+/* -+ * tpm_tis.h - QEMU's TPM TIS common header -+ * -+ * Copyright (C) 2006,2010-2013 IBM Corporation -+ * -+ * Authors: -+ * Stefan Berger -+ * David Safford -+ * -+ * Xen 4 support: Andrease Niederl -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ * -+ * Implementation of the TIS interface according to specs found at -+ * http://www.trustedcomputinggroup.org. This implementation currently -+ * supports version 1.3, 21 March 2013 -+ * In the developers menu choose the PC Client section then find the TIS -+ * specification. -+ * -+ * TPM TIS for TPM 2 implementation following TCG PC Client Platform -+ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 -+ */ -+#ifndef TPM_TPM_TIS_H -+#define TPM_TPM_TIS_H -+ -+#include "qemu/osdep.h" -+#include "sysemu/tpm_backend.h" -+#include "tpm_ppi.h" -+ -+#define TPM_TIS_NUM_LOCALITIES 5 /* per spec */ -+#define TPM_TIS_LOCALITY_SHIFT 12 -+#define TPM_TIS_NO_LOCALITY 0xff -+ -+#define TPM_TIS_IS_VALID_LOCTY(x) ((x) < TPM_TIS_NUM_LOCALITIES) -+ -+#define TPM_TIS_BUFFER_MAX 4096 -+ -+typedef enum { -+ TPM_TIS_STATE_IDLE = 0, -+ TPM_TIS_STATE_READY, -+ TPM_TIS_STATE_COMPLETION, -+ TPM_TIS_STATE_EXECUTION, -+ TPM_TIS_STATE_RECEPTION, -+} TPMTISState; -+ -+/* locality data -- all fields are persisted */ -+typedef struct TPMLocality { -+ TPMTISState state; -+ uint8_t access; -+ uint32_t sts; -+ uint32_t iface_id; -+ uint32_t inte; -+ uint32_t ints; -+} TPMLocality; -+ -+typedef struct TPMState { -+ MemoryRegion mmio; -+ -+ unsigned char buffer[TPM_TIS_BUFFER_MAX]; -+ uint16_t rw_offset; -+ -+ uint8_t active_locty; -+ uint8_t aborting_locty; -+ uint8_t next_locty; -+ -+ TPMLocality loc[TPM_TIS_NUM_LOCALITIES]; -+ -+ qemu_irq irq; -+ uint32_t irq_num; -+ -+ TPMBackendCmd cmd; -+ -+ TPMBackend *be_driver; -+ TPMVersion be_tpm_version; -+ -+ size_t be_buffer_size; -+ -+ bool ppi_enabled; -+ TPMPPI ppi; -+} TPMState; -+ -+extern const VMStateDescription vmstate_locty; -+extern const MemoryRegionOps tpm_tis_memory_ops; -+ -+int tpm_tis_pre_save(TPMState *s); -+void tpm_tis_reset(TPMState *s); -+enum TPMVersion tpm_tis_get_tpm_version(TPMState *s); -+void tpm_tis_request_completed(TPMState *s, int ret); -+ -+#endif /* TPM_TPM_TIS_H */ -diff --git a/hw/tpm/tpm_tis_common.c b/hw/tpm/tpm_tis_common.c -new file mode 100644 -index 00000000..9a51c71e ---- /dev/null -+++ b/hw/tpm/tpm_tis_common.c -@@ -0,0 +1,869 @@ -+/* -+ * tpm_tis_common.c - QEMU's TPM TIS interface emulator -+ * device agnostic functions -+ * -+ * Copyright (C) 2006,2010-2013 IBM Corporation -+ * -+ * Authors: -+ * Stefan Berger -+ * David Safford -+ * -+ * Xen 4 support: Andrease Niederl -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ * -+ * Implementation of the TIS interface according to specs found at -+ * http://www.trustedcomputinggroup.org. This implementation currently -+ * supports version 1.3, 21 March 2013 -+ * In the developers menu choose the PC Client section then find the TIS -+ * specification. -+ * -+ * TPM TIS for TPM 2 implementation following TCG PC Client Platform -+ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 -+ */ -+#include "qemu/osdep.h" -+#include "hw/isa/isa.h" -+#include "qapi/error.h" -+#include "qemu/module.h" -+ -+#include "hw/acpi/tpm.h" -+#include "hw/pci/pci_ids.h" -+#include "sysemu/tpm_backend.h" -+#include "tpm_int.h" -+#include "tpm_util.h" -+#include "tpm_ppi.h" -+#include "trace.h" -+ -+#include "tpm_tis.h" -+ -+#define DEBUG_TIS 0 -+ -+/* local prototypes */ -+ -+static uint64_t tpm_tis_mmio_read(void *opaque, hwaddr addr, -+ unsigned size); -+ -+/* utility functions */ -+ -+static uint8_t tpm_tis_locality_from_addr(hwaddr addr) -+{ -+ return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7); -+} -+ -+ -+/* -+ * Set the given flags in the STS register by clearing the register but -+ * preserving the SELFTEST_DONE and TPM_FAMILY_MASK flags and then setting -+ * the new flags. -+ * -+ * The SELFTEST_DONE flag is acquired from the backend that determines it by -+ * peeking into TPM commands. -+ * -+ * A VM suspend/resume will preserve the flag by storing it into the VM -+ * device state, but the backend will not remember it when QEMU is started -+ * again. Therefore, we cache the flag here. Once set, it will not be unset -+ * except by a reset. -+ */ -+static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags) -+{ -+ l->sts &= TPM_TIS_STS_SELFTEST_DONE | TPM_TIS_STS_TPM_FAMILY_MASK; -+ l->sts |= flags; -+} -+ -+/* -+ * Send a request to the TPM. -+ */ -+static void tpm_tis_tpm_send(TPMState *s, uint8_t locty) -+{ -+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { -+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); -+ } -+ -+ /* -+ * rw_offset serves as length indicator for length of data; -+ * it's reset when the response comes back -+ */ -+ s->loc[locty].state = TPM_TIS_STATE_EXECUTION; -+ -+ s->cmd = (TPMBackendCmd) { -+ .locty = locty, -+ .in = s->buffer, -+ .in_len = s->rw_offset, -+ .out = s->buffer, -+ .out_len = s->be_buffer_size, -+ }; -+ -+ tpm_backend_deliver_request(s->be_driver, &s->cmd); -+} -+ -+/* raise an interrupt if allowed */ -+static void tpm_tis_raise_irq(TPMState *s, uint8_t locty, uint32_t irqmask) -+{ -+ if (!TPM_TIS_IS_VALID_LOCTY(locty)) { -+ return; -+ } -+ -+ if ((s->loc[locty].inte & TPM_TIS_INT_ENABLED) && -+ (s->loc[locty].inte & irqmask)) { -+ trace_tpm_tis_raise_irq(irqmask); -+ qemu_irq_raise(s->irq); -+ s->loc[locty].ints |= irqmask; -+ } -+} -+ -+static uint32_t tpm_tis_check_request_use_except(TPMState *s, uint8_t locty) -+{ -+ uint8_t l; -+ -+ for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) { -+ if (l == locty) { -+ continue; -+ } -+ if ((s->loc[l].access & TPM_TIS_ACCESS_REQUEST_USE)) { -+ return 1; -+ } -+ } -+ -+ return 0; -+} -+ -+static void tpm_tis_new_active_locality(TPMState *s, uint8_t new_active_locty) -+{ -+ bool change = (s->active_locty != new_active_locty); -+ bool is_seize; -+ uint8_t mask; -+ -+ if (change && TPM_TIS_IS_VALID_LOCTY(s->active_locty)) { -+ is_seize = TPM_TIS_IS_VALID_LOCTY(new_active_locty) && -+ s->loc[new_active_locty].access & TPM_TIS_ACCESS_SEIZE; -+ -+ if (is_seize) { -+ mask = ~(TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ } else { -+ mask = ~(TPM_TIS_ACCESS_ACTIVE_LOCALITY| -+ TPM_TIS_ACCESS_REQUEST_USE); -+ } -+ /* reset flags on the old active locality */ -+ s->loc[s->active_locty].access &= mask; -+ -+ if (is_seize) { -+ s->loc[s->active_locty].access |= TPM_TIS_ACCESS_BEEN_SEIZED; -+ } -+ } -+ -+ s->active_locty = new_active_locty; -+ -+ trace_tpm_tis_new_active_locality(s->active_locty); -+ -+ if (TPM_TIS_IS_VALID_LOCTY(new_active_locty)) { -+ /* set flags on the new active locality */ -+ s->loc[new_active_locty].access |= TPM_TIS_ACCESS_ACTIVE_LOCALITY; -+ s->loc[new_active_locty].access &= ~(TPM_TIS_ACCESS_REQUEST_USE | -+ TPM_TIS_ACCESS_SEIZE); -+ } -+ -+ if (change) { -+ tpm_tis_raise_irq(s, s->active_locty, TPM_TIS_INT_LOCALITY_CHANGED); -+ } -+} -+ -+/* abort -- this function switches the locality */ -+static void tpm_tis_abort(TPMState *s) -+{ -+ s->rw_offset = 0; -+ -+ trace_tpm_tis_abort(s->next_locty); -+ -+ /* -+ * Need to react differently depending on who's aborting now and -+ * which locality will become active afterwards. -+ */ -+ if (s->aborting_locty == s->next_locty) { -+ s->loc[s->aborting_locty].state = TPM_TIS_STATE_READY; -+ tpm_tis_sts_set(&s->loc[s->aborting_locty], -+ TPM_TIS_STS_COMMAND_READY); -+ tpm_tis_raise_irq(s, s->aborting_locty, TPM_TIS_INT_COMMAND_READY); -+ } -+ -+ /* locality after abort is another one than the current one */ -+ tpm_tis_new_active_locality(s, s->next_locty); -+ -+ s->next_locty = TPM_TIS_NO_LOCALITY; -+ /* nobody's aborting a command anymore */ -+ s->aborting_locty = TPM_TIS_NO_LOCALITY; -+} -+ -+/* prepare aborting current command */ -+static void tpm_tis_prep_abort(TPMState *s, uint8_t locty, uint8_t newlocty) -+{ -+ uint8_t busy_locty; -+ -+ assert(TPM_TIS_IS_VALID_LOCTY(newlocty)); -+ -+ s->aborting_locty = locty; /* may also be TPM_TIS_NO_LOCALITY */ -+ s->next_locty = newlocty; /* locality after successful abort */ -+ -+ /* -+ * only abort a command using an interrupt if currently executing -+ * a command AND if there's a valid connection to the vTPM. -+ */ -+ for (busy_locty = 0; busy_locty < TPM_TIS_NUM_LOCALITIES; busy_locty++) { -+ if (s->loc[busy_locty].state == TPM_TIS_STATE_EXECUTION) { -+ /* -+ * request the backend to cancel. Some backends may not -+ * support it -+ */ -+ tpm_backend_cancel_cmd(s->be_driver); -+ return; -+ } -+ } -+ -+ tpm_tis_abort(s); -+} -+ -+/* -+ * Callback from the TPM to indicate that the response was received. -+ */ -+void tpm_tis_request_completed(TPMState *s, int ret) -+{ -+ uint8_t locty = s->cmd.locty; -+ uint8_t l; -+ -+ assert(TPM_TIS_IS_VALID_LOCTY(locty)); -+ -+ if (s->cmd.selftest_done) { -+ for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) { -+ s->loc[l].sts |= TPM_TIS_STS_SELFTEST_DONE; -+ } -+ } -+ -+ /* FIXME: report error if ret != 0 */ -+ tpm_tis_sts_set(&s->loc[locty], -+ TPM_TIS_STS_VALID | TPM_TIS_STS_DATA_AVAILABLE); -+ s->loc[locty].state = TPM_TIS_STATE_COMPLETION; -+ s->rw_offset = 0; -+ -+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { -+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); -+ } -+ -+ if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) { -+ tpm_tis_abort(s); -+ } -+ -+ tpm_tis_raise_irq(s, locty, -+ TPM_TIS_INT_DATA_AVAILABLE | TPM_TIS_INT_STS_VALID); -+} -+ -+/* -+ * Read a byte of response data -+ */ -+static uint32_t tpm_tis_data_read(TPMState *s, uint8_t locty) -+{ -+ uint32_t ret = TPM_TIS_NO_DATA_BYTE; -+ uint16_t len; -+ -+ if ((s->loc[locty].sts & TPM_TIS_STS_DATA_AVAILABLE)) { -+ len = MIN(tpm_cmd_get_size(&s->buffer), -+ s->be_buffer_size); -+ -+ ret = s->buffer[s->rw_offset++]; -+ if (s->rw_offset >= len) { -+ /* got last byte */ -+ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_VALID); -+ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_STS_VALID); -+ } -+ trace_tpm_tis_data_read(ret, s->rw_offset - 1); -+ } -+ -+ return ret; -+} -+ -+#ifdef DEBUG_TIS -+static void tpm_tis_dump_state(TPMState *s, hwaddr addr) -+{ -+ static const unsigned regs[] = { -+ TPM_TIS_REG_ACCESS, -+ TPM_TIS_REG_INT_ENABLE, -+ TPM_TIS_REG_INT_VECTOR, -+ TPM_TIS_REG_INT_STATUS, -+ TPM_TIS_REG_INTF_CAPABILITY, -+ TPM_TIS_REG_STS, -+ TPM_TIS_REG_DID_VID, -+ TPM_TIS_REG_RID, -+ 0xfff}; -+ int idx; -+ uint8_t locty = tpm_tis_locality_from_addr(addr); -+ hwaddr base = addr & ~0xfff; -+ -+ printf("tpm_tis: active locality : %d\n" -+ "tpm_tis: state of locality %d : %d\n" -+ "tpm_tis: register dump:\n", -+ s->active_locty, -+ locty, s->loc[locty].state); -+ -+ for (idx = 0; regs[idx] != 0xfff; idx++) { -+ printf("tpm_tis: 0x%04x : 0x%08x\n", regs[idx], -+ (int)tpm_tis_mmio_read(s, base + regs[idx], 4)); -+ } -+ -+ printf("tpm_tis: r/w offset : %d\n" -+ "tpm_tis: result buffer : ", -+ s->rw_offset); -+ for (idx = 0; -+ idx < MIN(tpm_cmd_get_size(&s->buffer), s->be_buffer_size); -+ idx++) { -+ printf("%c%02x%s", -+ s->rw_offset == idx ? '>' : ' ', -+ s->buffer[idx], -+ ((idx & 0xf) == 0xf) ? "\ntpm_tis: " : ""); -+ } -+ printf("\n"); -+} -+#endif -+ -+/* -+ * Read a register of the TIS interface -+ * See specs pages 33-63 for description of the registers -+ */ -+static uint64_t tpm_tis_mmio_read(void *opaque, hwaddr addr, -+ unsigned size) -+{ -+ TPMState *s = opaque; -+ uint16_t offset = addr & 0xffc; -+ uint8_t shift = (addr & 0x3) * 8; -+ uint32_t val = 0xffffffff; -+ uint8_t locty = tpm_tis_locality_from_addr(addr); -+ uint32_t avail; -+ uint8_t v; -+ -+ if (tpm_backend_had_startup_error(s->be_driver)) { -+ return 0; -+ } -+ -+ switch (offset) { -+ case TPM_TIS_REG_ACCESS: -+ /* never show the SEIZE flag even though we use it internally */ -+ val = s->loc[locty].access & ~TPM_TIS_ACCESS_SEIZE; -+ /* the pending flag is always calculated */ -+ if (tpm_tis_check_request_use_except(s, locty)) { -+ val |= TPM_TIS_ACCESS_PENDING_REQUEST; -+ } -+ val |= !tpm_backend_get_tpm_established_flag(s->be_driver); -+ break; -+ case TPM_TIS_REG_INT_ENABLE: -+ val = s->loc[locty].inte; -+ break; -+ case TPM_TIS_REG_INT_VECTOR: -+ val = s->irq_num; -+ break; -+ case TPM_TIS_REG_INT_STATUS: -+ val = s->loc[locty].ints; -+ break; -+ case TPM_TIS_REG_INTF_CAPABILITY: -+ switch (s->be_tpm_version) { -+ case TPM_VERSION_UNSPEC: -+ val = 0; -+ break; -+ case TPM_VERSION_1_2: -+ val = TPM_TIS_CAPABILITIES_SUPPORTED1_3; -+ break; -+ case TPM_VERSION_2_0: -+ val = TPM_TIS_CAPABILITIES_SUPPORTED2_0; -+ break; -+ } -+ break; -+ case TPM_TIS_REG_STS: -+ if (s->active_locty == locty) { -+ if ((s->loc[locty].sts & TPM_TIS_STS_DATA_AVAILABLE)) { -+ val = TPM_TIS_BURST_COUNT( -+ MIN(tpm_cmd_get_size(&s->buffer), -+ s->be_buffer_size) -+ - s->rw_offset) | s->loc[locty].sts; -+ } else { -+ avail = s->be_buffer_size - s->rw_offset; -+ /* -+ * byte-sized reads should not return 0x00 for 0x100 -+ * available bytes. -+ */ -+ if (size == 1 && avail > 0xff) { -+ avail = 0xff; -+ } -+ val = TPM_TIS_BURST_COUNT(avail) | s->loc[locty].sts; -+ } -+ } -+ break; -+ case TPM_TIS_REG_DATA_FIFO: -+ case TPM_TIS_REG_DATA_XFIFO ... TPM_TIS_REG_DATA_XFIFO_END: -+ if (s->active_locty == locty) { -+ if (size > 4 - (addr & 0x3)) { -+ /* prevent access beyond FIFO */ -+ size = 4 - (addr & 0x3); -+ } -+ val = 0; -+ shift = 0; -+ while (size > 0) { -+ switch (s->loc[locty].state) { -+ case TPM_TIS_STATE_COMPLETION: -+ v = tpm_tis_data_read(s, locty); -+ break; -+ default: -+ v = TPM_TIS_NO_DATA_BYTE; -+ break; -+ } -+ val |= (v << shift); -+ shift += 8; -+ size--; -+ } -+ shift = 0; /* no more adjustments */ -+ } -+ break; -+ case TPM_TIS_REG_INTERFACE_ID: -+ val = s->loc[locty].iface_id; -+ break; -+ case TPM_TIS_REG_DID_VID: -+ val = (TPM_TIS_TPM_DID << 16) | TPM_TIS_TPM_VID; -+ break; -+ case TPM_TIS_REG_RID: -+ val = TPM_TIS_TPM_RID; -+ break; -+#ifdef DEBUG_TIS -+ case TPM_TIS_REG_DEBUG: -+ tpm_tis_dump_state(s, addr); -+ break; -+#endif -+ } -+ -+ if (shift) { -+ val >>= shift; -+ } -+ -+ trace_tpm_tis_mmio_read(size, addr, val); -+ -+ return val; -+} -+ -+/* -+ * Write a value to a register of the TIS interface -+ * See specs pages 33-63 for description of the registers -+ */ -+static void tpm_tis_mmio_write(void *opaque, hwaddr addr, -+ uint64_t val, unsigned size) -+{ -+ TPMState *s = opaque; -+ uint16_t off = addr & 0xffc; -+ uint8_t shift = (addr & 0x3) * 8; -+ uint8_t locty = tpm_tis_locality_from_addr(addr); -+ uint8_t active_locty, l; -+ int c, set_new_locty = 1; -+ uint16_t len; -+ uint32_t mask = (size == 1) ? 0xff : ((size == 2) ? 0xffff : ~0); -+ -+ trace_tpm_tis_mmio_write(size, addr, val); -+ -+ if (locty == 4) { -+ trace_tpm_tis_mmio_write_locty4(); -+ return; -+ } -+ -+ if (tpm_backend_had_startup_error(s->be_driver)) { -+ return; -+ } -+ -+ val &= mask; -+ -+ if (shift) { -+ val <<= shift; -+ mask <<= shift; -+ } -+ -+ mask ^= 0xffffffff; -+ -+ switch (off) { -+ case TPM_TIS_REG_ACCESS: -+ -+ if ((val & TPM_TIS_ACCESS_SEIZE)) { -+ val &= ~(TPM_TIS_ACCESS_REQUEST_USE | -+ TPM_TIS_ACCESS_ACTIVE_LOCALITY); -+ } -+ -+ active_locty = s->active_locty; -+ -+ if ((val & TPM_TIS_ACCESS_ACTIVE_LOCALITY)) { -+ /* give up locality if currently owned */ -+ if (s->active_locty == locty) { -+ trace_tpm_tis_mmio_write_release_locty(locty); -+ -+ uint8_t newlocty = TPM_TIS_NO_LOCALITY; -+ /* anybody wants the locality ? */ -+ for (c = TPM_TIS_NUM_LOCALITIES - 1; c >= 0; c--) { -+ if ((s->loc[c].access & TPM_TIS_ACCESS_REQUEST_USE)) { -+ trace_tpm_tis_mmio_write_locty_req_use(c); -+ newlocty = c; -+ break; -+ } -+ } -+ trace_tpm_tis_mmio_write_next_locty(newlocty); -+ -+ if (TPM_TIS_IS_VALID_LOCTY(newlocty)) { -+ set_new_locty = 0; -+ tpm_tis_prep_abort(s, locty, newlocty); -+ } else { -+ active_locty = TPM_TIS_NO_LOCALITY; -+ } -+ } else { -+ /* not currently the owner; clear a pending request */ -+ s->loc[locty].access &= ~TPM_TIS_ACCESS_REQUEST_USE; -+ } -+ } -+ -+ if ((val & TPM_TIS_ACCESS_BEEN_SEIZED)) { -+ s->loc[locty].access &= ~TPM_TIS_ACCESS_BEEN_SEIZED; -+ } -+ -+ if ((val & TPM_TIS_ACCESS_SEIZE)) { -+ /* -+ * allow seize if a locality is active and the requesting -+ * locality is higher than the one that's active -+ * OR -+ * allow seize for requesting locality if no locality is -+ * active -+ */ -+ while ((TPM_TIS_IS_VALID_LOCTY(s->active_locty) && -+ locty > s->active_locty) || -+ !TPM_TIS_IS_VALID_LOCTY(s->active_locty)) { -+ bool higher_seize = FALSE; -+ -+ /* already a pending SEIZE ? */ -+ if ((s->loc[locty].access & TPM_TIS_ACCESS_SEIZE)) { -+ break; -+ } -+ -+ /* check for ongoing seize by a higher locality */ -+ for (l = locty + 1; l < TPM_TIS_NUM_LOCALITIES; l++) { -+ if ((s->loc[l].access & TPM_TIS_ACCESS_SEIZE)) { -+ higher_seize = TRUE; -+ break; -+ } -+ } -+ -+ if (higher_seize) { -+ break; -+ } -+ -+ /* cancel any seize by a lower locality */ -+ for (l = 0; l < locty; l++) { -+ s->loc[l].access &= ~TPM_TIS_ACCESS_SEIZE; -+ } -+ -+ s->loc[locty].access |= TPM_TIS_ACCESS_SEIZE; -+ -+ trace_tpm_tis_mmio_write_locty_seized(locty, s->active_locty); -+ trace_tpm_tis_mmio_write_init_abort(); -+ -+ set_new_locty = 0; -+ tpm_tis_prep_abort(s, s->active_locty, locty); -+ break; -+ } -+ } -+ -+ if ((val & TPM_TIS_ACCESS_REQUEST_USE)) { -+ if (s->active_locty != locty) { -+ if (TPM_TIS_IS_VALID_LOCTY(s->active_locty)) { -+ s->loc[locty].access |= TPM_TIS_ACCESS_REQUEST_USE; -+ } else { -+ /* no locality active -> make this one active now */ -+ active_locty = locty; -+ } -+ } -+ } -+ -+ if (set_new_locty) { -+ tpm_tis_new_active_locality(s, active_locty); -+ } -+ -+ break; -+ case TPM_TIS_REG_INT_ENABLE: -+ if (s->active_locty != locty) { -+ break; -+ } -+ -+ s->loc[locty].inte &= mask; -+ s->loc[locty].inte |= (val & (TPM_TIS_INT_ENABLED | -+ TPM_TIS_INT_POLARITY_MASK | -+ TPM_TIS_INTERRUPTS_SUPPORTED)); -+ break; -+ case TPM_TIS_REG_INT_VECTOR: -+ /* hard wired -- ignore */ -+ break; -+ case TPM_TIS_REG_INT_STATUS: -+ if (s->active_locty != locty) { -+ break; -+ } -+ -+ /* clearing of interrupt flags */ -+ if (((val & TPM_TIS_INTERRUPTS_SUPPORTED)) && -+ (s->loc[locty].ints & TPM_TIS_INTERRUPTS_SUPPORTED)) { -+ s->loc[locty].ints &= ~val; -+ if (s->loc[locty].ints == 0) { -+ qemu_irq_lower(s->irq); -+ trace_tpm_tis_mmio_write_lowering_irq(); -+ } -+ } -+ s->loc[locty].ints &= ~(val & TPM_TIS_INTERRUPTS_SUPPORTED); -+ break; -+ case TPM_TIS_REG_STS: -+ if (s->active_locty != locty) { -+ break; -+ } -+ -+ if (s->be_tpm_version == TPM_VERSION_2_0) { -+ /* some flags that are only supported for TPM 2 */ -+ if (val & TPM_TIS_STS_COMMAND_CANCEL) { -+ if (s->loc[locty].state == TPM_TIS_STATE_EXECUTION) { -+ /* -+ * request the backend to cancel. Some backends may not -+ * support it -+ */ -+ tpm_backend_cancel_cmd(s->be_driver); -+ } -+ } -+ -+ if (val & TPM_TIS_STS_RESET_ESTABLISHMENT_BIT) { -+ if (locty == 3 || locty == 4) { -+ tpm_backend_reset_tpm_established_flag(s->be_driver, locty); -+ } -+ } -+ } -+ -+ val &= (TPM_TIS_STS_COMMAND_READY | TPM_TIS_STS_TPM_GO | -+ TPM_TIS_STS_RESPONSE_RETRY); -+ -+ if (val == TPM_TIS_STS_COMMAND_READY) { -+ switch (s->loc[locty].state) { -+ -+ case TPM_TIS_STATE_READY: -+ s->rw_offset = 0; -+ break; -+ -+ case TPM_TIS_STATE_IDLE: -+ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_COMMAND_READY); -+ s->loc[locty].state = TPM_TIS_STATE_READY; -+ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_COMMAND_READY); -+ break; -+ -+ case TPM_TIS_STATE_EXECUTION: -+ case TPM_TIS_STATE_RECEPTION: -+ /* abort currently running command */ -+ trace_tpm_tis_mmio_write_init_abort(); -+ tpm_tis_prep_abort(s, locty, locty); -+ break; -+ -+ case TPM_TIS_STATE_COMPLETION: -+ s->rw_offset = 0; -+ /* shortcut to ready state with C/R set */ -+ s->loc[locty].state = TPM_TIS_STATE_READY; -+ if (!(s->loc[locty].sts & TPM_TIS_STS_COMMAND_READY)) { -+ tpm_tis_sts_set(&s->loc[locty], -+ TPM_TIS_STS_COMMAND_READY); -+ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_COMMAND_READY); -+ } -+ s->loc[locty].sts &= ~(TPM_TIS_STS_DATA_AVAILABLE); -+ break; -+ -+ } -+ } else if (val == TPM_TIS_STS_TPM_GO) { -+ switch (s->loc[locty].state) { -+ case TPM_TIS_STATE_RECEPTION: -+ if ((s->loc[locty].sts & TPM_TIS_STS_EXPECT) == 0) { -+ tpm_tis_tpm_send(s, locty); -+ } -+ break; -+ default: -+ /* ignore */ -+ break; -+ } -+ } else if (val == TPM_TIS_STS_RESPONSE_RETRY) { -+ switch (s->loc[locty].state) { -+ case TPM_TIS_STATE_COMPLETION: -+ s->rw_offset = 0; -+ tpm_tis_sts_set(&s->loc[locty], -+ TPM_TIS_STS_VALID| -+ TPM_TIS_STS_DATA_AVAILABLE); -+ break; -+ default: -+ /* ignore */ -+ break; -+ } -+ } -+ break; -+ case TPM_TIS_REG_DATA_FIFO: -+ case TPM_TIS_REG_DATA_XFIFO ... TPM_TIS_REG_DATA_XFIFO_END: -+ /* data fifo */ -+ if (s->active_locty != locty) { -+ break; -+ } -+ -+ if (s->loc[locty].state == TPM_TIS_STATE_IDLE || -+ s->loc[locty].state == TPM_TIS_STATE_EXECUTION || -+ s->loc[locty].state == TPM_TIS_STATE_COMPLETION) { -+ /* drop the byte */ -+ } else { -+ trace_tpm_tis_mmio_write_data2send(val, size); -+ if (s->loc[locty].state == TPM_TIS_STATE_READY) { -+ s->loc[locty].state = TPM_TIS_STATE_RECEPTION; -+ tpm_tis_sts_set(&s->loc[locty], -+ TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); -+ } -+ -+ val >>= shift; -+ if (size > 4 - (addr & 0x3)) { -+ /* prevent access beyond FIFO */ -+ size = 4 - (addr & 0x3); -+ } -+ -+ while ((s->loc[locty].sts & TPM_TIS_STS_EXPECT) && size > 0) { -+ if (s->rw_offset < s->be_buffer_size) { -+ s->buffer[s->rw_offset++] = -+ (uint8_t)val; -+ val >>= 8; -+ size--; -+ } else { -+ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_VALID); -+ } -+ } -+ -+ /* check for complete packet */ -+ if (s->rw_offset > 5 && -+ (s->loc[locty].sts & TPM_TIS_STS_EXPECT)) { -+ /* we have a packet length - see if we have all of it */ -+ bool need_irq = !(s->loc[locty].sts & TPM_TIS_STS_VALID); -+ -+ len = tpm_cmd_get_size(&s->buffer); -+ if (len > s->rw_offset) { -+ tpm_tis_sts_set(&s->loc[locty], -+ TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); -+ } else { -+ /* packet complete */ -+ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_VALID); -+ } -+ if (need_irq) { -+ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_STS_VALID); -+ } -+ } -+ } -+ break; -+ case TPM_TIS_REG_INTERFACE_ID: -+ if (val & TPM_TIS_IFACE_ID_INT_SEL_LOCK) { -+ for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) { -+ s->loc[l].iface_id |= TPM_TIS_IFACE_ID_INT_SEL_LOCK; -+ } -+ } -+ break; -+ } -+} -+ -+const MemoryRegionOps tpm_tis_memory_ops = { -+ .read = tpm_tis_mmio_read, -+ .write = tpm_tis_mmio_write, -+ .endianness = DEVICE_LITTLE_ENDIAN, -+ .valid = { -+ .min_access_size = 1, -+ .max_access_size = 4, -+ }, -+}; -+ -+/* -+ * Get the TPMVersion of the backend device being used -+ */ -+enum TPMVersion tpm_tis_get_tpm_version(TPMState *s) -+{ -+ if (tpm_backend_had_startup_error(s->be_driver)) { -+ return TPM_VERSION_UNSPEC; -+ } -+ -+ return tpm_backend_get_tpm_version(s->be_driver); -+} -+ -+/* -+ * This function is called when the machine starts, resets or due to -+ * S3 resume. -+ */ -+void tpm_tis_reset(TPMState *s) -+{ -+ int c; -+ -+ s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); -+ s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver), -+ TPM_TIS_BUFFER_MAX); -+ -+ if (s->ppi_enabled) { -+ tpm_ppi_reset(&s->ppi); -+ } -+ tpm_backend_reset(s->be_driver); -+ -+ s->active_locty = TPM_TIS_NO_LOCALITY; -+ s->next_locty = TPM_TIS_NO_LOCALITY; -+ s->aborting_locty = TPM_TIS_NO_LOCALITY; -+ -+ for (c = 0; c < TPM_TIS_NUM_LOCALITIES; c++) { -+ s->loc[c].access = TPM_TIS_ACCESS_TPM_REG_VALID_STS; -+ switch (s->be_tpm_version) { -+ case TPM_VERSION_UNSPEC: -+ break; -+ case TPM_VERSION_1_2: -+ s->loc[c].sts = TPM_TIS_STS_TPM_FAMILY1_2; -+ s->loc[c].iface_id = TPM_TIS_IFACE_ID_SUPPORTED_FLAGS1_3; -+ break; -+ case TPM_VERSION_2_0: -+ s->loc[c].sts = TPM_TIS_STS_TPM_FAMILY2_0; -+ s->loc[c].iface_id = TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0; -+ break; -+ } -+ s->loc[c].inte = TPM_TIS_INT_POLARITY_LOW_LEVEL; -+ s->loc[c].ints = 0; -+ s->loc[c].state = TPM_TIS_STATE_IDLE; -+ -+ s->rw_offset = 0; -+ } -+ -+ if (tpm_backend_startup_tpm(s->be_driver, s->be_buffer_size) < 0) { -+ exit(1); -+ } -+} -+ -+/* persistent state handling */ -+ -+int tpm_tis_pre_save(TPMState *s) -+{ -+ uint8_t locty = s->active_locty; -+ -+ trace_tpm_tis_pre_save(locty, s->rw_offset); -+ -+ if (DEBUG_TIS) { -+ tpm_tis_dump_state(s, 0); -+ } -+ -+ /* -+ * Synchronize with backend completion. -+ */ -+ tpm_backend_finish_sync(s->be_driver); -+ -+ return 0; -+} -+ -+const VMStateDescription vmstate_locty = { -+ .name = "tpm-tis/locty", -+ .version_id = 0, -+ .fields = (VMStateField[]) { -+ VMSTATE_UINT32(state, TPMLocality), -+ VMSTATE_UINT32(inte, TPMLocality), -+ VMSTATE_UINT32(ints, TPMLocality), -+ VMSTATE_UINT8(access, TPMLocality), -+ VMSTATE_UINT32(sts, TPMLocality), -+ VMSTATE_UINT32(iface_id, TPMLocality), -+ VMSTATE_END_OF_LIST(), -+ } -+}; -+ -diff --git a/hw/tpm/tpm_tis_isa.c b/hw/tpm/tpm_tis_isa.c -new file mode 100644 -index 00000000..45e25c02 ---- /dev/null -+++ b/hw/tpm/tpm_tis_isa.c -@@ -0,0 +1,170 @@ -+/* -+ * tpm_tis_isa.c - QEMU's TPM TIS ISA Device -+ * -+ * Copyright (C) 2006,2010-2013 IBM Corporation -+ * -+ * Authors: -+ * Stefan Berger -+ * David Safford -+ * -+ * Xen 4 support: Andrease Niederl -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ * -+ * Implementation of the TIS interface according to specs found at -+ * http://www.trustedcomputinggroup.org. This implementation currently -+ * supports version 1.3, 21 March 2013 -+ * In the developers menu choose the PC Client section then find the TIS -+ * specification. -+ * -+ * TPM TIS for TPM 2 implementation following TCG PC Client Platform -+ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 -+ */ -+ -+#include "qemu/osdep.h" -+#include "hw/isa/isa.h" -+#include "hw/qdev-properties.h" -+#include "migration/vmstate.h" -+#include "tpm_util.h" -+#include "tpm_tis.h" -+ -+typedef struct TPMStateISA { -+ /*< private >*/ -+ ISADevice parent_obj; -+ -+ /*< public >*/ -+ TPMState state; /* not a QOM object */ -+} TPMStateISA; -+ -+#define TPM_TIS_ISA(obj) OBJECT_CHECK(TPMStateISA, (obj), TYPE_TPM_TIS_ISA) -+ -+static int tpm_tis_pre_save_isa(void *opaque) -+{ -+ TPMStateISA *isadev = opaque; -+ -+ return tpm_tis_pre_save(&isadev->state); -+} -+ -+static const VMStateDescription vmstate_tpm_tis_isa = { -+ .name = "tpm-tis", -+ .version_id = 0, -+ .pre_save = tpm_tis_pre_save_isa, -+ .fields = (VMStateField[]) { -+ VMSTATE_BUFFER(state.buffer, TPMStateISA), -+ VMSTATE_UINT16(state.rw_offset, TPMStateISA), -+ VMSTATE_UINT8(state.active_locty, TPMStateISA), -+ VMSTATE_UINT8(state.aborting_locty, TPMStateISA), -+ VMSTATE_UINT8(state.next_locty, TPMStateISA), -+ -+ VMSTATE_STRUCT_ARRAY(state.loc, TPMStateISA, TPM_TIS_NUM_LOCALITIES, 0, -+ vmstate_locty, TPMLocality), -+ -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ -+static void tpm_tis_isa_request_completed(TPMIf *ti, int ret) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(ti); -+ TPMState *s = &isadev->state; -+ -+ tpm_tis_request_completed(s, ret); -+} -+ -+static enum TPMVersion tpm_tis_isa_get_tpm_version(TPMIf *ti) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(ti); -+ TPMState *s = &isadev->state; -+ -+ return tpm_tis_get_tpm_version(s); -+} -+ -+static void tpm_tis_isa_reset(DeviceState *dev) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(dev); -+ TPMState *s = &isadev->state; -+ -+ return tpm_tis_reset(s); -+} -+ -+static Property tpm_tis_isa_properties[] = { -+ DEFINE_PROP_UINT32("irq", TPMStateISA, state.irq_num, TPM_TIS_IRQ), -+ DEFINE_PROP_TPMBE("tpmdev", TPMStateISA, state.be_driver), -+ DEFINE_PROP_BOOL("ppi", TPMStateISA, state.ppi_enabled, true), -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ -+static void tpm_tis_isa_initfn(Object *obj) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(obj); -+ TPMState *s = &isadev->state; -+ -+ memory_region_init_io(&s->mmio, obj, &tpm_tis_memory_ops, -+ s, "tpm-tis-mmio", -+ TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); -+} -+ -+static void tpm_tis_isa_realizefn(DeviceState *dev, Error **errp) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(dev); -+ TPMState *s = &isadev->state; -+ -+ if (!tpm_find()) { -+ error_setg(errp, "at most one TPM device is permitted"); -+ return; -+ } -+ -+ if (!s->be_driver) { -+ error_setg(errp, "'tpmdev' property is required"); -+ return; -+ } -+ if (s->irq_num > 15) { -+ error_setg(errp, "IRQ %d is outside valid range of 0 to 15", -+ s->irq_num); -+ return; -+ } -+ -+ isa_init_irq(ISA_DEVICE(dev), &s->irq, s->irq_num); -+ -+ memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)), -+ TPM_TIS_ADDR_BASE, &s->mmio); -+ -+ if (s->ppi_enabled) { -+ tpm_ppi_init(&s->ppi, isa_address_space(ISA_DEVICE(dev)), -+ TPM_PPI_ADDR_BASE, OBJECT(dev)); -+ } -+} -+ -+static void tpm_tis_isa_class_init(ObjectClass *klass, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(klass); -+ TPMIfClass *tc = TPM_IF_CLASS(klass); -+ -+ dc->props = tpm_tis_isa_properties; -+ dc->vmsd = &vmstate_tpm_tis_isa; -+ tc->model = TPM_MODEL_TPM_TIS; -+ dc->realize = tpm_tis_isa_realizefn; -+ dc->reset = tpm_tis_isa_reset; -+ tc->request_completed = tpm_tis_isa_request_completed; -+ tc->get_version = tpm_tis_isa_get_tpm_version; -+} -+ -+static const TypeInfo tpm_tis_isa_info = { -+ .name = TYPE_TPM_TIS_ISA, -+ .parent = TYPE_ISA_DEVICE, -+ .instance_size = sizeof(TPMStateISA), -+ .instance_init = tpm_tis_isa_initfn, -+ .class_init = tpm_tis_isa_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_TPM_IF }, -+ { } -+ } -+}; -+ -+static void tpm_tis_isa_register(void) -+{ -+ type_register_static(&tpm_tis_isa_info); -+} -+ -+type_init(tpm_tis_isa_register) --- -2.23.0 - diff --git a/tpm-Use-TPMState-as-a-common-struct.patch b/tpm-Use-TPMState-as-a-common-struct.patch deleted file mode 100644 index 61a1dd037bb2356cb7307d53f82732af404ed4e2..0000000000000000000000000000000000000000 --- a/tpm-Use-TPMState-as-a-common-struct.patch +++ /dev/null @@ -1,314 +0,0 @@ -From c57e57c86f9d3c13b33746436bc1f09db88d4d42 Mon Sep 17 00:00:00 2001 -From: jiangfangjie -Date: Tue, 11 Aug 2020 02:52:12 +0000 -Subject: [PATCH 11/19] tpm: Use TPMState as a common struct - -As we plan to introduce a SysBus TPM TIS device, let's -make the TPMState a common struct usable by both the -ISADevice and the SysBusDevice. TPMStateISA embeds the -struct and inherits from the ISADevice. - -The prototype of functions bound to be used by both -the ISA and SysBus devices is changed to take TPMState -handle. - -A bunch of structs also are renamed to be specialized -for the ISA device. Besides those transformations, no -functional change is expected. - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Tested-by: Ard Biesheuvel -Acked-by: Ard Biesheuvel -Message-id: 20200305165149.618-3-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - hw/tpm/tpm_tis.c | 147 +++++++++++++++++++++++++++++------------------ - 1 file changed, 92 insertions(+), 55 deletions(-) - -diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c -index 49d44652..735a528f 100644 ---- a/hw/tpm/tpm_tis.c -+++ b/hw/tpm/tpm_tis.c -@@ -62,7 +62,6 @@ typedef struct TPMLocality { - } TPMLocality; - - typedef struct TPMState { -- ISADevice busdev; - MemoryRegion mmio; - - unsigned char buffer[TPM_TIS_BUFFER_MAX]; -@@ -88,7 +87,15 @@ typedef struct TPMState { - TPMPPI ppi; - } TPMState; - --#define TPM(obj) OBJECT_CHECK(TPMState, (obj), TYPE_TPM_TIS_ISA) -+typedef struct TPMStateISA { -+ /*< private >*/ -+ ISADevice parent_obj; -+ -+ /*< public >*/ -+ TPMState state; /* not a QOM object */ -+} TPMStateISA; -+ -+#define TPM_TIS_ISA(obj) OBJECT_CHECK(TPMStateISA, (obj), TYPE_TPM_TIS_ISA) - - #define DEBUG_TIS 0 - -@@ -278,9 +285,8 @@ static void tpm_tis_prep_abort(TPMState *s, uint8_t locty, uint8_t newlocty) - /* - * Callback from the TPM to indicate that the response was received. - */ --static void tpm_tis_request_completed(TPMIf *ti, int ret) -+static void tpm_tis_request_completed(TPMState *s, int ret) - { -- TPMState *s = TPM(ti); - uint8_t locty = s->cmd.locty; - uint8_t l; - -@@ -335,7 +341,7 @@ static uint32_t tpm_tis_data_read(TPMState *s, uint8_t locty) - } - - #ifdef DEBUG_TIS --static void tpm_tis_dump_state(void *opaque, hwaddr addr) -+static void tpm_tis_dump_state(TPMState *s, hwaddr addr) - { - static const unsigned regs[] = { - TPM_TIS_REG_ACCESS, -@@ -350,7 +356,6 @@ static void tpm_tis_dump_state(void *opaque, hwaddr addr) - int idx; - uint8_t locty = tpm_tis_locality_from_addr(addr); - hwaddr base = addr & ~0xfff; -- TPMState *s = opaque; - - printf("tpm_tis: active locality : %d\n" - "tpm_tis: state of locality %d : %d\n" -@@ -360,7 +365,7 @@ static void tpm_tis_dump_state(void *opaque, hwaddr addr) - - for (idx = 0; regs[idx] != 0xfff; idx++) { - printf("tpm_tis: 0x%04x : 0x%08x\n", regs[idx], -- (int)tpm_tis_mmio_read(opaque, base + regs[idx], 4)); -+ (int)tpm_tis_mmio_read(s, base + regs[idx], 4)); - } - - printf("tpm_tis: r/w offset : %d\n" -@@ -485,7 +490,7 @@ static uint64_t tpm_tis_mmio_read(void *opaque, hwaddr addr, - break; - #ifdef DEBUG_TIS - case TPM_TIS_REG_DEBUG: -- tpm_tis_dump_state(opaque, addr); -+ tpm_tis_dump_state(s, addr); - break; - #endif - } -@@ -832,10 +837,8 @@ static const MemoryRegionOps tpm_tis_memory_ops = { - /* - * Get the TPMVersion of the backend device being used - */ --static enum TPMVersion tpm_tis_get_tpm_version(TPMIf *ti) -+static enum TPMVersion tpm_tis_get_tpm_version(TPMState *s) - { -- TPMState *s = TPM(ti); -- - if (tpm_backend_had_startup_error(s->be_driver)) { - return TPM_VERSION_UNSPEC; - } -@@ -847,9 +850,8 @@ static enum TPMVersion tpm_tis_get_tpm_version(TPMIf *ti) - * This function is called when the machine starts, resets or due to - * S3 resume. - */ --static void tpm_tis_reset(DeviceState *dev) -+static void tpm_tis_reset(TPMState *s) - { -- TPMState *s = TPM(dev); - int c; - - s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); -@@ -893,15 +895,14 @@ static void tpm_tis_reset(DeviceState *dev) - - /* persistent state handling */ - --static int tpm_tis_pre_save(void *opaque) -+static int tpm_tis_pre_save(TPMState *s) - { -- TPMState *s = opaque; - uint8_t locty = s->active_locty; - - trace_tpm_tis_pre_save(locty, s->rw_offset); - - if (DEBUG_TIS) { -- tpm_tis_dump_state(opaque, 0); -+ tpm_tis_dump_state(s, 0); - } - - /* -@@ -926,34 +927,78 @@ static const VMStateDescription vmstate_locty = { - } - }; - --static const VMStateDescription vmstate_tpm_tis = { -+/* ISA */ -+ -+static int tpm_tis_pre_save_isa(void *opaque) -+{ -+ TPMStateISA *isadev = opaque; -+ -+ return tpm_tis_pre_save(&isadev->state); -+} -+ -+static const VMStateDescription vmstate_tpm_tis_isa = { - .name = "tpm-tis", - .version_id = 0, -- .pre_save = tpm_tis_pre_save, -+ .pre_save = tpm_tis_pre_save_isa, - .fields = (VMStateField[]) { -- VMSTATE_BUFFER(buffer, TPMState), -- VMSTATE_UINT16(rw_offset, TPMState), -- VMSTATE_UINT8(active_locty, TPMState), -- VMSTATE_UINT8(aborting_locty, TPMState), -- VMSTATE_UINT8(next_locty, TPMState), -+ VMSTATE_BUFFER(state.buffer, TPMStateISA), -+ VMSTATE_UINT16(state.rw_offset, TPMStateISA), -+ VMSTATE_UINT8(state.active_locty, TPMStateISA), -+ VMSTATE_UINT8(state.aborting_locty, TPMStateISA), -+ VMSTATE_UINT8(state.next_locty, TPMStateISA), - -- VMSTATE_STRUCT_ARRAY(loc, TPMState, TPM_TIS_NUM_LOCALITIES, 0, -+ VMSTATE_STRUCT_ARRAY(state.loc, TPMStateISA, TPM_TIS_NUM_LOCALITIES, 0, - vmstate_locty, TPMLocality), - - VMSTATE_END_OF_LIST() - } - }; - --static Property tpm_tis_properties[] = { -- DEFINE_PROP_UINT32("irq", TPMState, irq_num, TPM_TIS_IRQ), -- DEFINE_PROP_TPMBE("tpmdev", TPMState, be_driver), -- DEFINE_PROP_BOOL("ppi", TPMState, ppi_enabled, true), -+static void tpm_tis_isa_request_completed(TPMIf *ti, int ret) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(ti); -+ TPMState *s = &isadev->state; -+ -+ tpm_tis_request_completed(s, ret); -+} -+ -+static enum TPMVersion tpm_tis_isa_get_tpm_version(TPMIf *ti) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(ti); -+ TPMState *s = &isadev->state; -+ -+ return tpm_tis_get_tpm_version(s); -+} -+ -+static void tpm_tis_isa_reset(DeviceState *dev) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(dev); -+ TPMState *s = &isadev->state; -+ -+ return tpm_tis_reset(s); -+} -+ -+static Property tpm_tis_isa_properties[] = { -+ DEFINE_PROP_UINT32("irq", TPMStateISA, state.irq_num, TPM_TIS_IRQ), -+ DEFINE_PROP_TPMBE("tpmdev", TPMStateISA, state.be_driver), -+ DEFINE_PROP_BOOL("ppi", TPMStateISA, state.ppi_enabled, true), - DEFINE_PROP_END_OF_LIST(), - }; - --static void tpm_tis_realizefn(DeviceState *dev, Error **errp) -+static void tpm_tis_isa_initfn(Object *obj) - { -- TPMState *s = TPM(dev); -+ TPMStateISA *isadev = TPM_TIS_ISA(obj); -+ TPMState *s = &isadev->state; -+ -+ memory_region_init_io(&s->mmio, obj, &tpm_tis_memory_ops, -+ s, "tpm-tis-mmio", -+ TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); -+} -+ -+static void tpm_tis_isa_realizefn(DeviceState *dev, Error **errp) -+{ -+ TPMStateISA *isadev = TPM_TIS_ISA(dev); -+ TPMState *s = &isadev->state; - - if (!tpm_find()) { - error_setg(errp, "at most one TPM device is permitted"); -@@ -970,55 +1015,47 @@ static void tpm_tis_realizefn(DeviceState *dev, Error **errp) - return; - } - -- isa_init_irq(&s->busdev, &s->irq, s->irq_num); -+ isa_init_irq(ISA_DEVICE(dev), &s->irq, s->irq_num); - - memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)), - TPM_TIS_ADDR_BASE, &s->mmio); - - if (s->ppi_enabled) { - tpm_ppi_init(&s->ppi, isa_address_space(ISA_DEVICE(dev)), -- TPM_PPI_ADDR_BASE, OBJECT(s)); -+ TPM_PPI_ADDR_BASE, OBJECT(dev)); - } - } - --static void tpm_tis_initfn(Object *obj) --{ -- TPMState *s = TPM(obj); -- -- memory_region_init_io(&s->mmio, OBJECT(s), &tpm_tis_memory_ops, -- s, "tpm-tis-mmio", -- TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); --} -- --static void tpm_tis_class_init(ObjectClass *klass, void *data) -+static void tpm_tis_isa_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); - TPMIfClass *tc = TPM_IF_CLASS(klass); - -- dc->realize = tpm_tis_realizefn; -- dc->props = tpm_tis_properties; -- dc->reset = tpm_tis_reset; -- dc->vmsd = &vmstate_tpm_tis; -+ dc->props = tpm_tis_isa_properties; -+ dc->vmsd = &vmstate_tpm_tis_isa; - tc->model = TPM_MODEL_TPM_TIS; -- tc->get_version = tpm_tis_get_tpm_version; -- tc->request_completed = tpm_tis_request_completed; -+ dc->realize = tpm_tis_isa_realizefn; -+ dc->reset = tpm_tis_isa_reset; -+ tc->request_completed = tpm_tis_isa_request_completed; -+ tc->get_version = tpm_tis_isa_get_tpm_version; -+ - } - --static const TypeInfo tpm_tis_info = { -+static const TypeInfo tpm_tis_isa_info = { - .name = TYPE_TPM_TIS_ISA, - .parent = TYPE_ISA_DEVICE, -- .instance_size = sizeof(TPMState), -- .instance_init = tpm_tis_initfn, -- .class_init = tpm_tis_class_init, -+ .instance_size = sizeof(TPMStateISA), -+ .instance_init = tpm_tis_isa_initfn, -+ .class_init = tpm_tis_isa_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_TPM_IF }, - { } - } - }; - --static void tpm_tis_register(void) -+static void tpm_tis_isa_register(void) - { -- type_register_static(&tpm_tis_info); -+ type_register_static(&tpm_tis_isa_info); - } - --type_init(tpm_tis_register) -+type_init(tpm_tis_isa_register) --- -2.23.0 - diff --git a/tpm-ppi-page-align-PPI-RAM.patch b/tpm-ppi-page-align-PPI-RAM.patch deleted file mode 100644 index d7ba2c876605392e79d887b89a4a274cb51660d4..0000000000000000000000000000000000000000 --- a/tpm-ppi-page-align-PPI-RAM.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 26b54c545f253049faa633ff886132602ff47241 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 3 Jan 2020 11:39:59 +0400 -Subject: [PATCH 02/19] tpm-ppi: page-align PPI RAM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -post-copy migration fails on destination with error such as: -2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: -Unaligned start address: 0x559d2afae9a0 - -Use qemu_memalign() to constrain the PPI RAM memory alignment. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Marc-André Lureau -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Stefan Berger -Signed-off-by: Stefan Berger -Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com -Signed-off-by: jiangfangjie ---- - hw/tpm/tpm_ppi.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c -index cd8205f2..6509ffd4 100644 ---- a/hw/tpm/tpm_ppi.c -+++ b/hw/tpm/tpm_ppi.c -@@ -44,7 +44,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) - void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, - hwaddr addr, Object *obj) - { -- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); -+ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, -+ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); - memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", - TPM_PPI_ADDR_SIZE, tpmppi->buf); - vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); --- -2.23.0 - diff --git a/tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch b/tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch deleted file mode 100644 index ea6e1d28a10cb6d29ba1c1c76245ef6749825ba2..0000000000000000000000000000000000000000 --- a/tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 7974f8ffd75171be106a1ce2705878abbb6c4477 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 5 Mar 2020 17:51:40 +0100 -Subject: [PATCH 10/19] tpm: rename TPM_TIS into TPM_TIS_ISA -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -As we plan to introduce a sysbus TPM_TIS, let's rename -TPM_TIS into TPM_TIS_ISA. - -Signed-off-by: Eric Auger -Reviewed-by: Stefan Berger -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Ard Biesheuvel -Acked-by: Ard Biesheuvel -Message-id: 20200305165149.618-2-eric.auger@redhat.com -Signed-off-by: Stefan Berger -Signed-off-by: jiangfangjie ---- - hw/i386/acpi-build.c | 6 +++--- - hw/tpm/tpm_tis.c | 4 ++-- - include/sysemu/tpm.h | 6 +++--- - 3 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index c97731ec..093f7d93 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -2007,7 +2007,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, - } - } - -- if (TPM_IS_TIS(tpm_find())) { -+ if (TPM_IS_TIS_ISA(tpm_find())) { - aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE, - TPM_TIS_ADDR_SIZE, AML_READ_WRITE)); - } -@@ -2178,7 +2178,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, - /* Scan all PCI buses. Generate tables to support hotplug. */ - build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en); - -- if (TPM_IS_TIS(tpm)) { -+ if (TPM_IS_TIS_ISA(tpm)) { - if (misc->tpm_version == TPM_VERSION_2_0) { - dev = aml_device("TPM"); - aml_append(dev, aml_name_decl("_HID", -@@ -2285,7 +2285,7 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) - (char *)&tpm2_ptr->log_area_start_address - table_data->data; - - tpm2_ptr->platform_class = cpu_to_le16(TPM2_ACPI_CLASS_CLIENT); -- if (TPM_IS_TIS(tpm_find())) { -+ if (TPM_IS_TIS_ISA(tpm_find())) { - tpm2_ptr->control_area_address = cpu_to_le64(0); - tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_MMIO); - } else if (TPM_IS_CRB(tpm_find())) { -diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c -index 96a9ac48..49d44652 100644 ---- a/hw/tpm/tpm_tis.c -+++ b/hw/tpm/tpm_tis.c -@@ -88,7 +88,7 @@ typedef struct TPMState { - TPMPPI ppi; - } TPMState; - --#define TPM(obj) OBJECT_CHECK(TPMState, (obj), TYPE_TPM_TIS) -+#define TPM(obj) OBJECT_CHECK(TPMState, (obj), TYPE_TPM_TIS_ISA) - - #define DEBUG_TIS 0 - -@@ -1005,7 +1005,7 @@ static void tpm_tis_class_init(ObjectClass *klass, void *data) - } - - static const TypeInfo tpm_tis_info = { -- .name = TYPE_TPM_TIS, -+ .name = TYPE_TPM_TIS_ISA, - .parent = TYPE_ISA_DEVICE, - .instance_size = sizeof(TPMState), - .instance_init = tpm_tis_initfn, -diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h -index 15979a36..1691b92c 100644 ---- a/include/sysemu/tpm.h -+++ b/include/sysemu/tpm.h -@@ -43,12 +43,12 @@ typedef struct TPMIfClass { - enum TPMVersion (*get_version)(TPMIf *obj); - } TPMIfClass; - --#define TYPE_TPM_TIS "tpm-tis" -+#define TYPE_TPM_TIS_ISA "tpm-tis" - #define TYPE_TPM_CRB "tpm-crb" - #define TYPE_TPM_SPAPR "tpm-spapr" - --#define TPM_IS_TIS(chr) \ -- object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS) -+#define TPM_IS_TIS_ISA(chr) \ -+ object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_ISA) - #define TPM_IS_CRB(chr) \ - object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) - #define TPM_IS_SPAPR(chr) \ --- -2.23.0 - diff --git a/tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch b/tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch deleted file mode 100644 index ffc0b62ed7ef655056cfd1280282b768f22ad501..0000000000000000000000000000000000000000 --- a/tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch +++ /dev/null @@ -1,552 +0,0 @@ -From 14402a8ca57fb722eb324d141fafb41ef06f4c2b Mon Sep 17 00:00:00 2001 -From: Stefan Berger -Date: Tue, 21 Jan 2020 10:29:32 -0500 -Subject: [PATCH 06/19] tpm_spapr: Support TPM for ppc64 using CRQ based - interface - -Implement support for TPM on ppc64 by implementing the vTPM CRQ interface -as a frontend. It can use the tpm_emulator driver backend with the external -swtpm. - -The Linux vTPM driver for ppc64 works with this emulation. - -This TPM emulator also handles the TPM 2 case. - -Signed-off-by: Stefan Berger -Reviewed-by: David Gibson -Message-Id: <20200121152935.649898-4-stefanb@linux.ibm.com> -Signed-off-by: David Gibson -Signed-off-by: jiangfangjie ---- - docs/specs/tpm.txt | 20 ++- - hw/tpm/Kconfig | 6 + - hw/tpm/Makefile.objs | 1 + - hw/tpm/tpm_spapr.c | 379 +++++++++++++++++++++++++++++++++++++++++++ - hw/tpm/trace-events | 12 ++ - include/sysemu/tpm.h | 3 + - qapi/tpm.json | 6 +- - 7 files changed, 423 insertions(+), 4 deletions(-) - create mode 100644 hw/tpm/tpm_spapr.c - -diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt -index 9c8cca04..9c3e67d8 100644 ---- a/docs/specs/tpm.txt -+++ b/docs/specs/tpm.txt -@@ -34,6 +34,12 @@ The CRB interface makes a memory mapped IO region in the area 0xfed40000 - - QEMU files related to TPM CRB interface: - - hw/tpm/tpm_crb.c - -+ -+pSeries (ppc64) machines offer a tpm-spapr device model. -+ -+QEMU files related to the SPAPR interface: -+ - hw/tpm/tpm_spapr.c -+ - = fw_cfg interface = - - The bios/firmware may read the "etc/tpm/config" fw_cfg entry for -@@ -281,7 +287,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ - --log level=20 - - Command line to start QEMU with the TPM emulator device communicating with --the swtpm: -+the swtpm (x86): - - qemu-system-x86_64 -display sdl -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -@@ -289,6 +295,18 @@ qemu-system-x86_64 -display sdl -accel kvm \ - -tpmdev emulator,id=tpm0,chardev=chrtpm \ - -device tpm-tis,tpmdev=tpm0 test.img - -+In case a pSeries machine is emulated, use the following command line: -+ -+qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \ -+ -m 1024 -bios slof.bin -boot menu=on \ -+ -nodefaults -device VGA -device pci-ohci -device usb-kbd \ -+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -+ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -+ -device tpm-spapr,tpmdev=tpm0 \ -+ -device spapr-vscsi,id=scsi0,reg=0x00002000 \ -+ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ -+ -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 -+ - - In case SeaBIOS is used as firmware, it should show the TPM menu item - after entering the menu with 'ESC'. -diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig -index 4c8ee87d..4d4ab085 100644 ---- a/hw/tpm/Kconfig -+++ b/hw/tpm/Kconfig -@@ -22,3 +22,9 @@ config TPM_EMULATOR - bool - default y - depends on TPMDEV -+ -+config TPM_SPAPR -+ bool -+ default n -+ depends on TPM && PSERIES -+ select TPMDEV -diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs -index de0b85d0..85eb99ae 100644 ---- a/hw/tpm/Makefile.objs -+++ b/hw/tpm/Makefile.objs -@@ -4,3 +4,4 @@ common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o - common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o - common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o - common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o -+obj-$(CONFIG_TPM_SPAPR) += tpm_spapr.o -diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c -new file mode 100644 -index 00000000..1db9696a ---- /dev/null -+++ b/hw/tpm/tpm_spapr.c -@@ -0,0 +1,379 @@ -+/* -+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator -+ * -+ * PAPR Virtual TPM -+ * -+ * Copyright (c) 2015, 2017, 2019 IBM Corporation. -+ * -+ * Authors: -+ * Stefan Berger -+ * -+ * This code is licensed under the GPL version 2 or later. See the -+ * COPYING file in the top-level directory. -+ * -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/error-report.h" -+#include "qapi/error.h" -+#include "hw/qdev-properties.h" -+#include "migration/vmstate.h" -+ -+#include "sysemu/tpm_backend.h" -+#include "tpm_int.h" -+#include "tpm_util.h" -+ -+#include "hw/ppc/spapr.h" -+#include "hw/ppc/spapr_vio.h" -+#include "trace.h" -+ -+#define DEBUG_SPAPR 0 -+ -+#define VIO_SPAPR_VTPM(obj) \ -+ OBJECT_CHECK(SpaprTpmState, (obj), TYPE_TPM_SPAPR) -+ -+typedef struct TpmCrq { -+ uint8_t valid; /* 0x80: cmd; 0xc0: init crq */ -+ /* 0x81-0x83: CRQ message response */ -+ uint8_t msg; /* see below */ -+ uint16_t len; /* len of TPM request; len of TPM response */ -+ uint32_t data; /* rtce_dma_handle when sending TPM request */ -+ uint64_t reserved; -+} TpmCrq; -+ -+#define SPAPR_VTPM_VALID_INIT_CRQ_COMMAND 0xC0 -+#define SPAPR_VTPM_VALID_COMMAND 0x80 -+#define SPAPR_VTPM_MSG_RESULT 0x80 -+ -+/* msg types for valid = SPAPR_VTPM_VALID_INIT_CRQ */ -+#define SPAPR_VTPM_INIT_CRQ_RESULT 0x1 -+#define SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT 0x2 -+ -+/* msg types for valid = SPAPR_VTPM_VALID_CMD */ -+#define SPAPR_VTPM_GET_VERSION 0x1 -+#define SPAPR_VTPM_TPM_COMMAND 0x2 -+#define SPAPR_VTPM_GET_RTCE_BUFFER_SIZE 0x3 -+#define SPAPR_VTPM_PREPARE_TO_SUSPEND 0x4 -+ -+/* response error messages */ -+#define SPAPR_VTPM_VTPM_ERROR 0xff -+ -+/* error codes */ -+#define SPAPR_VTPM_ERR_COPY_IN_FAILED 0x3 -+#define SPAPR_VTPM_ERR_COPY_OUT_FAILED 0x4 -+ -+#define TPM_SPAPR_BUFFER_MAX 4096 -+ -+typedef struct { -+ SpaprVioDevice vdev; -+ -+ TpmCrq crq; /* track single TPM command */ -+ -+ uint8_t state; -+#define SPAPR_VTPM_STATE_NONE 0 -+#define SPAPR_VTPM_STATE_EXECUTION 1 -+#define SPAPR_VTPM_STATE_COMPLETION 2 -+ -+ unsigned char *buffer; -+ -+ TPMBackendCmd cmd; -+ -+ TPMBackend *be_driver; -+ TPMVersion be_tpm_version; -+ -+ size_t be_buffer_size; -+} SpaprTpmState; -+ -+/* -+ * Send a request to the TPM. -+ */ -+static void tpm_spapr_tpm_send(SpaprTpmState *s) -+{ -+ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { -+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); -+ } -+ -+ s->state = SPAPR_VTPM_STATE_EXECUTION; -+ s->cmd = (TPMBackendCmd) { -+ .locty = 0, -+ .in = s->buffer, -+ .in_len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size), -+ .out = s->buffer, -+ .out_len = s->be_buffer_size, -+ }; -+ -+ tpm_backend_deliver_request(s->be_driver, &s->cmd); -+} -+ -+static int tpm_spapr_process_cmd(SpaprTpmState *s, uint64_t dataptr) -+{ -+ long rc; -+ -+ /* a max. of be_buffer_size bytes can be transported */ -+ rc = spapr_vio_dma_read(&s->vdev, dataptr, -+ s->buffer, s->be_buffer_size); -+ if (rc) { -+ error_report("tpm_spapr_got_payload: DMA read failure"); -+ } -+ /* let vTPM handle any malformed request */ -+ tpm_spapr_tpm_send(s); -+ -+ return rc; -+} -+ -+static inline int spapr_tpm_send_crq(struct SpaprVioDevice *dev, TpmCrq *crq) -+{ -+ return spapr_vio_send_crq(dev, (uint8_t *)crq); -+} -+ -+static int tpm_spapr_do_crq(struct SpaprVioDevice *dev, uint8_t *crq_data) -+{ -+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); -+ TpmCrq local_crq; -+ TpmCrq *crq = &s->crq; /* requests only */ -+ int rc; -+ uint8_t valid = crq_data[0]; -+ uint8_t msg = crq_data[1]; -+ -+ trace_tpm_spapr_do_crq(valid, msg); -+ -+ switch (valid) { -+ case SPAPR_VTPM_VALID_INIT_CRQ_COMMAND: /* Init command/response */ -+ -+ /* Respond to initialization request */ -+ switch (msg) { -+ case SPAPR_VTPM_INIT_CRQ_RESULT: -+ trace_tpm_spapr_do_crq_crq_result(); -+ memset(&local_crq, 0, sizeof(local_crq)); -+ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; -+ local_crq.msg = SPAPR_VTPM_INIT_CRQ_RESULT; -+ spapr_tpm_send_crq(dev, &local_crq); -+ break; -+ -+ case SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT: -+ trace_tpm_spapr_do_crq_crq_complete_result(); -+ memset(&local_crq, 0, sizeof(local_crq)); -+ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; -+ local_crq.msg = SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT; -+ spapr_tpm_send_crq(dev, &local_crq); -+ break; -+ } -+ -+ break; -+ case SPAPR_VTPM_VALID_COMMAND: /* Payloads */ -+ switch (msg) { -+ case SPAPR_VTPM_TPM_COMMAND: -+ trace_tpm_spapr_do_crq_tpm_command(); -+ if (s->state == SPAPR_VTPM_STATE_EXECUTION) { -+ return H_BUSY; -+ } -+ memcpy(crq, crq_data, sizeof(*crq)); -+ -+ rc = tpm_spapr_process_cmd(s, be32_to_cpu(crq->data)); -+ -+ if (rc == H_SUCCESS) { -+ crq->valid = be16_to_cpu(0); -+ } else { -+ local_crq.valid = SPAPR_VTPM_MSG_RESULT; -+ local_crq.msg = SPAPR_VTPM_VTPM_ERROR; -+ local_crq.len = cpu_to_be16(0); -+ local_crq.data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_IN_FAILED); -+ spapr_tpm_send_crq(dev, &local_crq); -+ } -+ break; -+ -+ case SPAPR_VTPM_GET_RTCE_BUFFER_SIZE: -+ trace_tpm_spapr_do_crq_tpm_get_rtce_buffer_size(s->be_buffer_size); -+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND; -+ local_crq.msg = SPAPR_VTPM_GET_RTCE_BUFFER_SIZE | -+ SPAPR_VTPM_MSG_RESULT; -+ local_crq.len = cpu_to_be16(s->be_buffer_size); -+ spapr_tpm_send_crq(dev, &local_crq); -+ break; -+ -+ case SPAPR_VTPM_GET_VERSION: -+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND; -+ local_crq.msg = SPAPR_VTPM_GET_VERSION | SPAPR_VTPM_MSG_RESULT; -+ local_crq.len = cpu_to_be16(0); -+ switch (s->be_tpm_version) { -+ case TPM_VERSION_1_2: -+ local_crq.data = cpu_to_be32(1); -+ break; -+ case TPM_VERSION_2_0: -+ local_crq.data = cpu_to_be32(2); -+ break; -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ trace_tpm_spapr_do_crq_get_version(be32_to_cpu(local_crq.data)); -+ spapr_tpm_send_crq(dev, &local_crq); -+ break; -+ -+ case SPAPR_VTPM_PREPARE_TO_SUSPEND: -+ trace_tpm_spapr_do_crq_prepare_to_suspend(); -+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND; -+ local_crq.msg = SPAPR_VTPM_PREPARE_TO_SUSPEND | -+ SPAPR_VTPM_MSG_RESULT; -+ spapr_tpm_send_crq(dev, &local_crq); -+ break; -+ -+ default: -+ trace_tpm_spapr_do_crq_unknown_msg_type(crq->msg); -+ } -+ break; -+ default: -+ trace_tpm_spapr_do_crq_unknown_crq(valid, msg); -+ }; -+ -+ return H_SUCCESS; -+} -+ -+static void tpm_spapr_request_completed(TPMIf *ti, int ret) -+{ -+ SpaprTpmState *s = VIO_SPAPR_VTPM(ti); -+ TpmCrq *crq = &s->crq; -+ uint32_t len; -+ int rc; -+ -+ s->state = SPAPR_VTPM_STATE_COMPLETION; -+ -+ /* a max. of be_buffer_size bytes can be transported */ -+ len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size); -+ rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data), -+ s->buffer, len); -+ -+ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { -+ tpm_util_show_buffer(s->buffer, len, "From TPM"); -+ } -+ -+ crq->valid = SPAPR_VTPM_MSG_RESULT; -+ if (rc == H_SUCCESS) { -+ crq->msg = SPAPR_VTPM_TPM_COMMAND | SPAPR_VTPM_MSG_RESULT; -+ crq->len = cpu_to_be16(len); -+ } else { -+ error_report("%s: DMA write failure", __func__); -+ crq->msg = SPAPR_VTPM_VTPM_ERROR; -+ crq->len = cpu_to_be16(0); -+ crq->data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_OUT_FAILED); -+ } -+ -+ rc = spapr_tpm_send_crq(&s->vdev, crq); -+ if (rc) { -+ error_report("%s: Error sending response", __func__); -+ } -+} -+ -+static int tpm_spapr_do_startup_tpm(SpaprTpmState *s, size_t buffersize) -+{ -+ return tpm_backend_startup_tpm(s->be_driver, buffersize); -+} -+ -+static const char *tpm_spapr_get_dt_compatible(SpaprVioDevice *dev) -+{ -+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); -+ -+ switch (s->be_tpm_version) { -+ case TPM_VERSION_1_2: -+ return "IBM,vtpm"; -+ case TPM_VERSION_2_0: -+ return "IBM,vtpm20"; -+ default: -+ g_assert_not_reached(); -+ } -+} -+ -+static void tpm_spapr_reset(SpaprVioDevice *dev) -+{ -+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); -+ -+ s->state = SPAPR_VTPM_STATE_NONE; -+ -+ s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); -+ -+ s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver), -+ TPM_SPAPR_BUFFER_MAX); -+ -+ tpm_backend_reset(s->be_driver); -+ tpm_spapr_do_startup_tpm(s, s->be_buffer_size); -+} -+ -+static enum TPMVersion tpm_spapr_get_version(TPMIf *ti) -+{ -+ SpaprTpmState *s = VIO_SPAPR_VTPM(ti); -+ -+ if (tpm_backend_had_startup_error(s->be_driver)) { -+ return TPM_VERSION_UNSPEC; -+ } -+ -+ return tpm_backend_get_tpm_version(s->be_driver); -+} -+ -+static const VMStateDescription vmstate_spapr_vtpm = { -+ .name = "tpm-spapr", -+ .unmigratable = 1, -+}; -+ -+static Property tpm_spapr_properties[] = { -+ DEFINE_SPAPR_PROPERTIES(SpaprTpmState, vdev), -+ DEFINE_PROP_TPMBE("tpmdev", SpaprTpmState, be_driver), -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ -+static void tpm_spapr_realizefn(SpaprVioDevice *dev, Error **errp) -+{ -+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); -+ -+ if (!tpm_find()) { -+ error_setg(errp, "at most one TPM device is permitted"); -+ return; -+ } -+ -+ dev->crq.SendFunc = tpm_spapr_do_crq; -+ -+ if (!s->be_driver) { -+ error_setg(errp, "'tpmdev' property is required"); -+ return; -+ } -+ s->buffer = g_malloc(TPM_SPAPR_BUFFER_MAX); -+} -+ -+static void tpm_spapr_class_init(ObjectClass *klass, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(klass); -+ SpaprVioDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass); -+ TPMIfClass *tc = TPM_IF_CLASS(klass); -+ -+ k->realize = tpm_spapr_realizefn; -+ k->reset = tpm_spapr_reset; -+ k->dt_name = "vtpm"; -+ k->dt_type = "IBM,vtpm"; -+ k->get_dt_compatible = tpm_spapr_get_dt_compatible; -+ k->signal_mask = 0x00000001; -+ set_bit(DEVICE_CATEGORY_MISC, dc->categories); -+ dc->props = tpm_spapr_properties; -+ k->rtce_window_size = 0x10000000; -+ dc->vmsd = &vmstate_spapr_vtpm; -+ -+ tc->model = TPM_MODEL_TPM_SPAPR; -+ tc->get_version = tpm_spapr_get_version; -+ tc->request_completed = tpm_spapr_request_completed; -+} -+ -+static const TypeInfo tpm_spapr_info = { -+ .name = TYPE_TPM_SPAPR, -+ .parent = TYPE_VIO_SPAPR_DEVICE, -+ .instance_size = sizeof(SpaprTpmState), -+ .class_init = tpm_spapr_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_TPM_IF }, -+ { } -+ } -+}; -+ -+static void tpm_spapr_register_types(void) -+{ -+ type_register_static(&tpm_spapr_info); -+} -+ -+type_init(tpm_spapr_register_types) -diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events -index 82c45ee5..edbe1bd7 100644 ---- a/hw/tpm/trace-events -+++ b/hw/tpm/trace-events -@@ -55,3 +55,15 @@ tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u" - - # tpm_ppi.c - tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu" -+ -+# hw/tpm/tpm_spapr.c -+tpm_spapr_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" -+tpm_spapr_do_crq(uint8_t raw1, uint8_t raw2) "1st 2 bytes in CRQ: 0x%02x 0x%02x" -+tpm_spapr_do_crq_crq_result(void) "SPAPR_VTPM_INIT_CRQ_RESULT" -+tpm_spapr_do_crq_crq_complete_result(void) "SPAPR_VTPM_INIT_CRQ_COMP_RESULT" -+tpm_spapr_do_crq_tpm_command(void) "got TPM command payload" -+tpm_spapr_do_crq_tpm_get_rtce_buffer_size(size_t buffersize) "response: buffer size is %zu" -+tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u" -+tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend" -+tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x" -+tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..." -diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h -index 5b541a71..15979a36 100644 ---- a/include/sysemu/tpm.h -+++ b/include/sysemu/tpm.h -@@ -45,11 +45,14 @@ typedef struct TPMIfClass { - - #define TYPE_TPM_TIS "tpm-tis" - #define TYPE_TPM_CRB "tpm-crb" -+#define TYPE_TPM_SPAPR "tpm-spapr" - - #define TPM_IS_TIS(chr) \ - object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS) - #define TPM_IS_CRB(chr) \ - object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) -+#define TPM_IS_SPAPR(chr) \ -+ object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR) - - /* returns NULL unless there is exactly one TPM device */ - static inline TPMIf *tpm_find(void) -diff --git a/qapi/tpm.json b/qapi/tpm.json -index b30323bb..63878aa0 100644 ---- a/qapi/tpm.json -+++ b/qapi/tpm.json -@@ -12,11 +12,11 @@ - # - # @tpm-tis: TPM TIS model - # @tpm-crb: TPM CRB model (since 2.12) -+# @tpm-spapr: TPM SPAPR model (since 5.0) - # - # Since: 1.5 - ## --{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb' ] } -- -+{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ] } - ## - # @query-tpm-models: - # -@@ -29,7 +29,7 @@ - # Example: - # - # -> { "execute": "query-tpm-models" } --# <- { "return": [ "tpm-tis", "tpm-crb" ] } -+# <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] } - # - ## - { 'command': 'query-tpm-models', 'returns': ['TpmModel'] } --- -2.23.0 - diff --git a/tpm_spapr-Support-suspend-and-resume.patch b/tpm_spapr-Support-suspend-and-resume.patch deleted file mode 100644 index 55ed521a261fe5c058d9f6b95334c0884cdfd7ea..0000000000000000000000000000000000000000 --- a/tpm_spapr-Support-suspend-and-resume.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 2948d9712a7058bcdca6732101874beb1a6e00a9 Mon Sep 17 00:00:00 2001 -From: Stefan Berger -Date: Tue, 21 Jan 2020 10:29:33 -0500 -Subject: [PATCH 07/19] tpm_spapr: Support suspend and resume -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Extend the tpm_spapr frontend with VM suspend and resume support. - -Signed-off-by: Stefan Berger -Message-Id: <20200121152935.649898-5-stefanb@linux.ibm.com> -Reviewed-by: Marc-André Lureau -Signed-off-by: David Gibson -Signed-off-by: jiangfangjie ---- - hw/tpm/tpm_spapr.c | 52 ++++++++++++++++++++++++++++++++++++++++++++- - hw/tpm/trace-events | 2 ++ - 2 files changed, 53 insertions(+), 1 deletion(-) - -diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c -index 1db9696a..8ba561f4 100644 ---- a/hw/tpm/tpm_spapr.c -+++ b/hw/tpm/tpm_spapr.c -@@ -76,6 +76,8 @@ typedef struct { - - unsigned char *buffer; - -+ uint32_t numbytes; /* number of bytes to deliver on resume */ -+ - TPMBackendCmd cmd; - - TPMBackend *be_driver; -@@ -240,6 +242,14 @@ static void tpm_spapr_request_completed(TPMIf *ti, int ret) - - /* a max. of be_buffer_size bytes can be transported */ - len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size); -+ -+ if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { -+ trace_tpm_spapr_caught_response(len); -+ /* defer delivery of response until .post_load */ -+ s->numbytes = len; -+ return; -+ } -+ - rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data), - s->buffer, len); - -@@ -288,6 +298,7 @@ static void tpm_spapr_reset(SpaprVioDevice *dev) - SpaprTpmState *s = VIO_SPAPR_VTPM(dev); - - s->state = SPAPR_VTPM_STATE_NONE; -+ s->numbytes = 0; - - s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); - -@@ -309,9 +320,48 @@ static enum TPMVersion tpm_spapr_get_version(TPMIf *ti) - return tpm_backend_get_tpm_version(s->be_driver); - } - -+/* persistent state handling */ -+ -+static int tpm_spapr_pre_save(void *opaque) -+{ -+ SpaprTpmState *s = opaque; -+ -+ tpm_backend_finish_sync(s->be_driver); -+ /* -+ * we cannot deliver the results to the VM since DMA would touch VM memory -+ */ -+ -+ return 0; -+} -+ -+static int tpm_spapr_post_load(void *opaque, int version_id) -+{ -+ SpaprTpmState *s = opaque; -+ -+ if (s->numbytes) { -+ trace_tpm_spapr_post_load(); -+ /* deliver the results to the VM via DMA */ -+ tpm_spapr_request_completed(TPM_IF(s), 0); -+ s->numbytes = 0; -+ } -+ -+ return 0; -+} -+ - static const VMStateDescription vmstate_spapr_vtpm = { - .name = "tpm-spapr", -- .unmigratable = 1, -+ .pre_save = tpm_spapr_pre_save, -+ .post_load = tpm_spapr_post_load, -+ .fields = (VMStateField[]) { -+ VMSTATE_SPAPR_VIO(vdev, SpaprTpmState), -+ -+ VMSTATE_UINT8(state, SpaprTpmState), -+ VMSTATE_UINT32(numbytes, SpaprTpmState), -+ VMSTATE_VBUFFER_UINT32(buffer, SpaprTpmState, 0, NULL, numbytes), -+ /* remember DMA address */ -+ VMSTATE_UINT32(crq.data, SpaprTpmState), -+ VMSTATE_END_OF_LIST(), -+ } - }; - - static Property tpm_spapr_properties[] = { -diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events -index edbe1bd7..b97eea24 100644 ---- a/hw/tpm/trace-events -+++ b/hw/tpm/trace-events -@@ -67,3 +67,5 @@ tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u" - tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend" - tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x" - tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..." -+tpm_spapr_post_load(void) "Delivering TPM response after resume" -+tpm_spapr_caught_response(uint32_t v) "Caught response to deliver after resume: %u bytes" --- -2.23.0 - diff --git a/travis-ci-Rename-SOFTMMU-SYSTEM.patch b/travis-ci-Rename-SOFTMMU-SYSTEM.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa788135e89c9c9c09946848f47064de448e0545 --- /dev/null +++ b/travis-ci-Rename-SOFTMMU-SYSTEM.patch @@ -0,0 +1,67 @@ +From c03415f3b75e6a37c7eb392ef62bf92b94267b4d Mon Sep 17 00:00:00 2001 +From: gaojiazhen +Date: Mon, 25 Mar 2024 17:26:52 +0800 +Subject: [PATCH] travis-ci: Rename SOFTMMU -> SYSTEM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 47833f817cc597db124c690bd14600bb5d00e824 + +Since we *might* have user emulation with softmmu, +rename MAIN_SOFTMMU_TARGETS as MAIN_SYSTEM_TARGETS +to express 'system emulation targets'. + +Signed-off-by: Philippe Mathieu-Daudé +Message-ID: <20240313213339.82071-3-philmd@linaro.org> +Reviewed-by: Thomas Huth +Reviewed-by: Richard Henderson +Signed-off-by: Thomas Huth +Signed-off-by: Gao Jiazhen +--- + .travis.yml | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/.travis.yml b/.travis.yml +index 76859d48da..597d151b80 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -35,7 +35,7 @@ env: + - TEST_BUILD_CMD="" + - TEST_CMD="make check V=1" + # This is broadly a list of "mainline" system targets which have support across the major distros +- - MAIN_SOFTMMU_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu" ++ - MAIN_SYSTEM_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu" + - CCACHE_SLOPPINESS="include_file_ctime,include_file_mtime" + - CCACHE_MAXSIZE=1G + - G_MESSAGES_DEBUG=error +@@ -114,7 +114,7 @@ jobs: + env: + - TEST_CMD="make check check-tcg V=1" + - CONFIG="--disable-containers --enable-fdt=system +- --target-list=${MAIN_SOFTMMU_TARGETS} --cxx=/bin/false" ++ --target-list=${MAIN_SYSTEM_TARGETS} --cxx=/bin/false" + - UNRELIABLE=true + + - name: "[ppc64] GCC check-tcg" +@@ -185,7 +185,7 @@ jobs: + env: + - TEST_CMD="make check check-tcg V=1" + - CONFIG="--disable-containers --enable-fdt=system +- --target-list=${MAIN_SOFTMMU_TARGETS},s390x-linux-user" ++ --target-list=${MAIN_SYSTEM_TARGETS},s390x-linux-user" + - UNRELIABLE=true + script: + - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$? +@@ -226,7 +226,7 @@ jobs: + - genisoimage + env: + - CONFIG="--disable-containers --enable-fdt=system --audio-drv-list=sdl +- --disable-user --target-list-exclude=${MAIN_SOFTMMU_TARGETS}" ++ --disable-user --target-list-exclude=${MAIN_SYSTEM_TARGETS}" + + - name: "[s390x] GCC (user)" + arch: s390x +-- +2.27.0 + diff --git a/tz-ppc-add-dummy-read-write-methods.patch b/tz-ppc-add-dummy-read-write-methods.patch deleted file mode 100644 index ee8fa6b096bf8e359cb326c581d0a72733c8c1c4..0000000000000000000000000000000000000000 --- a/tz-ppc-add-dummy-read-write-methods.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 52d1c1a258aef2b8ace50bb202ee7338ed0060f0 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:27:07 +0800 -Subject: [PATCH] tz-ppc: add dummy read/write methods - -fix CVE-2020-15469 - -Add tz-ppc-dummy mmio read/write methods to avoid assert failure -during initialisation. - -Signed-off-by: Prasad J Pandit - -Signed-off-by: Jiajie Li ---- - hw/misc/tz-ppc.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/hw/misc/tz-ppc.c b/hw/misc/tz-ppc.c -index 2a14a26f29..5b7b883866 100644 ---- a/hw/misc/tz-ppc.c -+++ b/hw/misc/tz-ppc.c -@@ -193,7 +193,20 @@ static bool tz_ppc_dummy_accepts(void *opaque, hwaddr addr, - g_assert_not_reached(); - } - -+static uint64_t tz_ppc_dummy_read(void *opaque, hwaddr addr, unsigned size) -+{ -+ g_assert_not_reached(); -+} -+ -+static void tz_ppc_dummy_write(void *opaque, hwaddr addr, -+ uint64_t data, unsigned size) -+{ -+ g_assert_not_reached(); -+} -+ - static const MemoryRegionOps tz_ppc_dummy_ops = { -+ .read = tz_ppc_dummy_read, -+ .write = tz_ppc_dummy_write, - .valid.accepts = tz_ppc_dummy_accepts, - }; - --- -2.27.0 - diff --git a/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch b/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch deleted file mode 100644 index 30724cedb050b52be09a0b081ffe36cd7599d268..0000000000000000000000000000000000000000 --- a/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 6705b9344f8d6f134f612c2e35e87cdda5aa6284 Mon Sep 17 00:00:00 2001 -From: Hikaru Nishida -Date: Tue, 15 Oct 2019 10:07:34 +0900 -Subject: [PATCH] ui: Fix hanging up Cocoa display on macOS 10.15 (Catalina) - -macOS API documentation says that before applicationDidFinishLaunching -is called, any events will not be processed. However, some events are -fired before it is called in macOS Catalina. This causes deadlock of -iothread_lock in handleEvent while it will be released after the -app_started_sem is posted. -This patch avoids processing events before the app_started_sem is -posted to prevent this deadlock. - -Buglink: https://bugs.launchpad.net/qemu/+bug/1847906 -Signed-off-by: Hikaru Nishida -Message-id: 20191015010734.85229-1-hikarupsp@gmail.com -Signed-off-by: Gerd Hoffmann -(cherry picked from commit dff742ad27efa474ec04accdbf422c9acfd3e30e) -Signed-off-by: Michael Roth ---- - ui/cocoa.m | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/ui/cocoa.m b/ui/cocoa.m -index c2984028c5..3026ead621 100644 ---- a/ui/cocoa.m -+++ b/ui/cocoa.m -@@ -132,6 +132,7 @@ NSArray * supportedImageFileTypes; - - static QemuSemaphore display_init_sem; - static QemuSemaphore app_started_sem; -+static bool allow_events; - - // Utility functions to run specified code block with iothread lock held - typedef void (^CodeBlock)(void); -@@ -727,6 +728,16 @@ QemuCocoaView *cocoaView; - - - (bool) handleEvent:(NSEvent *)event - { -+ if(!allow_events) { -+ /* -+ * Just let OSX have all events that arrive before -+ * applicationDidFinishLaunching. -+ * This avoids a deadlock on the iothread lock, which cocoa_display_init() -+ * will not drop until after the app_started_sem is posted. (In theory -+ * there should not be any such events, but OSX Catalina now emits some.) -+ */ -+ return false; -+ } - return bool_with_iothread_lock(^{ - return [self handleEventLocked:event]; - }); -@@ -1154,6 +1165,7 @@ QemuCocoaView *cocoaView; - - (void)applicationDidFinishLaunching: (NSNotification *) note - { - COCOA_DEBUG("QemuCocoaAppController: applicationDidFinishLaunching\n"); -+ allow_events = true; - /* Tell cocoa_display_init to proceed */ - qemu_sem_post(&app_started_sem); - } --- -2.23.0 diff --git a/ui-clipboard-mark-type-as-not-available-when-there-i.patch b/ui-clipboard-mark-type-as-not-available-when-there-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..843479a9c61b25ad1857352b4dafae1c5c2f4feb --- /dev/null +++ b/ui-clipboard-mark-type-as-not-available-when-there-i.patch @@ -0,0 +1,89 @@ +From 855f7f30de962f79393f0b9f8b0355b996d72de7 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Wed, 24 Jan 2024 11:57:48 +0100 +Subject: [PATCH] ui/clipboard: mark type as not available when there is no + data (CVE-2023-6683) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT +message with len=0. In qemu_clipboard_set_data(), the clipboard info +will be updated setting data to NULL (because g_memdup(data, size) +returns NULL when size is 0). If the client does not set the +VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then +the 'request' callback for the clipboard peer is not initialized. +Later, because data is NULL, qemu_clipboard_request() can be reached +via vdagent_chr_write() and vdagent_clipboard_recv_request() and +there, the clipboard owner's 'request' callback will be attempted to +be called, but that is a NULL pointer. + +In particular, this can happen when using the KRDC (22.12.3) VNC +client. + +Another scenario leading to the same issue is with two clients (say +noVNC and KRDC): + +The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and +initializes its cbpeer. + +The KRDC client does not, but triggers a vnc_client_cut_text() (note +it's not the _ext variant)). There, a new clipboard info with it as +the 'owner' is created and via qemu_clipboard_set_data() is called, +which in turn calls qemu_clipboard_update() with that info. + +In qemu_clipboard_update(), the notifier for the noVNC client will be +called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the +noVNC client. The 'owner' in that clipboard info is the clipboard peer +for the KRDC client, which did not initialize the 'request' function. +That sounds correct to me, it is the owner of that clipboard info. + +Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set +the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it +passes), that clipboard info is passed to qemu_clipboard_request() and +the original segfault still happens. + +Fix the issue by handling updates with size 0 differently. In +particular, mark in the clipboard info that the type is not available. + +While at it, switch to g_memdup2(), because g_memdup() is deprecated. + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2023-6683 +Reported-by: Markus Frank +Suggested-by: Marc-André Lureau +Signed-off-by: Fiona Ebner +Reviewed-by: Marc-André Lureau +Tested-by: Markus Frank +Message-ID: <20240124105749.204610-1-f.ebner@proxmox.com> +Signed-off-by: liuxiangdong +--- + ui/clipboard.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index 3d14bffaf8..b3f6fa3c9e 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer, + } + + g_free(info->types[type].data); +- info->types[type].data = g_memdup(data, size); +- info->types[type].size = size; +- info->types[type].available = true; ++ if (size) { ++ info->types[type].data = g_memdup2(data, size); ++ info->types[type].size = size; ++ info->types[type].available = true; ++ } else { ++ info->types[type].data = NULL; ++ info->types[type].size = 0; ++ info->types[type].available = false; ++ } + + if (update) { + qemu_clipboard_update(info); +-- +2.27.0 + diff --git a/ui-console-vc-Silence-warning-about-sprintf-on-OpenB.patch b/ui-console-vc-Silence-warning-about-sprintf-on-OpenB.patch new file mode 100644 index 0000000000000000000000000000000000000000..b82b1c57ab9dcb3e1d34f3c35ab72fbd81ecadcf --- /dev/null +++ b/ui-console-vc-Silence-warning-about-sprintf-on-OpenB.patch @@ -0,0 +1,52 @@ +From e41395594aab30a22ffaf1556d19ee623a33e6ec Mon Sep 17 00:00:00 2001 +From: Susanooo +Date: Fri, 25 Oct 2024 09:33:41 +0800 +Subject: [PATCH] ui/console-vc: Silence warning about sprintf() on OpenBSD +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The linker on OpenBSD complains: + + ld: warning: console-vc.c:824 (../src/ui/console-vc.c:824)([...]): + warning: sprintf() is often misused, please use snprintf() + +Using g_strdup_printf() is certainly better here, so let's switch +to that function instead. + +Signed-off-by: Thomas Huth +Reviewed-by: Marc-André Lureau +Reviewed-by: Alex Bennée +Reviewed-by: Richard Henderson +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +Signed-off-by: zhangchujun +--- + ui/console-vc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ui/console-vc.c b/ui/console-vc.c +index 9c13cc2981..b1903c3e48 100644 +--- a/ui/console-vc.c ++++ b/ui/console-vc.c +@@ -648,7 +648,7 @@ static void vc_putchar(VCChardev *vc, int ch) + QemuTextConsole *s = vc->console; + int i; + int x, y; +- char response[40]; ++ g_autofree char *response = NULL; + + switch(vc->state) { + case TTY_STATE_NORM: +@@ -821,7 +821,7 @@ static void vc_putchar(VCChardev *vc, int ch) + break; + case 6: + /* report cursor position */ +- sprintf(response, "\033[%d;%dR", ++ response = g_strdup_printf("\033[%d;%dR", + (s->y_base + s->y) % s->total_height + 1, + s->x + 1); + vc_respond_str(vc, response); +-- +2.41.0.windows.1 + diff --git a/ui-gtk-Draw-guest-frame-at-refresh-cycle.patch b/ui-gtk-Draw-guest-frame-at-refresh-cycle.patch new file mode 100644 index 0000000000000000000000000000000000000000..3aeb1db60948c24443bed5a0de4b8a5756d3e31e --- /dev/null +++ b/ui-gtk-Draw-guest-frame-at-refresh-cycle.patch @@ -0,0 +1,59 @@ +From 228e14db9a85e7e978c38b97ae622302a0d4f784 Mon Sep 17 00:00:00 2001 +From: Dongwon Kim +Date: Fri, 26 Apr 2024 15:50:59 -0700 +Subject: [PATCH] ui/gtk: Draw guest frame at refresh cycle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Draw routine needs to be manually invoked in the next refresh +if there is a scanout blob from the guest. This is to prevent +a situation where there is a scheduled draw event but it won't +happen bacause the window is currently in inactive state +(minimized or tabified). If draw is not done for a long time, +gl_block timeout and/or fence timeout (on the guest) will happen +eventually. + +v2: Use gd_gl_area_draw(vc) in gtk-gl-area.c + +Suggested-by: Vivek Kasireddy +Cc: Gerd Hoffmann +Cc: Marc-André Lureau +Cc: Daniel P. Berrangé +Signed-off-by: Dongwon Kim +Acked-by: Marc-André Lureau +Message-Id: <20240426225059.3871283-1-dongwon.kim@intel.com> +(cherry picked from commit 77bf310084dad38b3a2badf01766c659056f1cf2) +Signed-off-by: zhujun2 +--- + ui/gtk-egl.c | 1 + + ui/gtk-gl-area.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c +index 3af5ac5bcf..75f6b9011a 100644 +--- a/ui/gtk-egl.c ++++ b/ui/gtk-egl.c +@@ -150,6 +150,7 @@ void gd_egl_refresh(DisplayChangeListener *dcl) + vc, vc->window ? vc->window : vc->gfx.drawing_area); + + if (vc->gfx.guest_fb.dmabuf && vc->gfx.guest_fb.dmabuf->draw_submitted) { ++ gd_egl_draw(vc); + return; + } + +diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c +index 52dcac161e..4fff957c3f 100644 +--- a/ui/gtk-gl-area.c ++++ b/ui/gtk-gl-area.c +@@ -126,6 +126,7 @@ void gd_gl_area_refresh(DisplayChangeListener *dcl) + gd_update_monitor_refresh_rate(vc, vc->window ? vc->window : vc->gfx.drawing_area); + + if (vc->gfx.guest_fb.dmabuf && vc->gfx.guest_fb.dmabuf->draw_submitted) { ++ gd_gl_area_draw(vc); + return; + } + +-- +2.41.0.windows.1 + diff --git a/ui-gtk-Fix-mouse-motion-event-scaling-issue-with-GTK.patch b/ui-gtk-Fix-mouse-motion-event-scaling-issue-with-GTK.patch new file mode 100644 index 0000000000000000000000000000000000000000..fda092eaab9b1641a4bf7fa8121a5589c5d31251 --- /dev/null +++ b/ui-gtk-Fix-mouse-motion-event-scaling-issue-with-GTK.patch @@ -0,0 +1,82 @@ +From 2e5fd7f2e6027899e84984bc31f52d4dda3b89ed Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 21 May 2024 14:35:19 +0800 +Subject: [PATCH] ui/gtk: Fix mouse/motion event scaling issue with GTK display + backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 37e91415018db3656b46cdea8f9e4d47b3ff130d + +Remove gtk_widget_get_scale_factor() usage from the calculation of +the motion events in the GTK backend to make it work correctly on +environments that have `gtk_widget_get_scale_factor() != 1`. + +This scale factor usage had been introduced in the commit f14aab420c and +at that time the window size was used for calculating the things and it +was working correctly. However, in the commit 2f31663ed4 the logic +switched to use the widget size instead of window size and because of +the change the usage of scale factor becomes invalid (since widgets use +`vc->gfx.scale_{x, y}` for scaling). + +Tested on Crostini on ChromeOS (15823.51.0) with an external display. + +Fixes: 2f31663ed4 ("ui/gtk: use widget size for cursor motion event") +Fixes: f14aab420c ("ui: fix incorrect pointer position on highdpi with +gtk") + +Signed-off-by: hikalium +Acked-by: Marc-André Lureau +Message-Id: <20240512111435.30121-3-hikalium@hikalium.com> +Signed-off-by: qihao_yewu +--- + ui/gtk.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/ui/gtk.c b/ui/gtk.c +index 810d7fc796..1a69f6fc37 100644 +--- a/ui/gtk.c ++++ b/ui/gtk.c +@@ -887,7 +887,7 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, + int x, y; + int mx, my; + int fbh, fbw; +- int ww, wh, ws; ++ int ww, wh; + + if (!vc->gfx.ds) { + return TRUE; +@@ -898,8 +898,13 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, + + ww = gtk_widget_get_allocated_width(widget); + wh = gtk_widget_get_allocated_height(widget); +- ws = gtk_widget_get_scale_factor(widget); + ++ /* ++ * `widget` may not have the same size with the frame buffer. ++ * In such cases, some paddings are needed around the `vc`. ++ * To achieve that, `vc` will be displayed at (mx, my) ++ * so that it is displayed at the center of the widget. ++ */ + mx = my = 0; + if (ww > fbw) { + mx = (ww - fbw) / 2; +@@ -908,8 +913,12 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, + my = (wh - fbh) / 2; + } + +- x = (motion->x - mx) / vc->gfx.scale_x * ws; +- y = (motion->y - my) / vc->gfx.scale_y * ws; ++ /* ++ * `motion` is reported in `widget` coordinates ++ * so translating it to the coordinates in `vc`. ++ */ ++ x = (motion->x - mx) / vc->gfx.scale_x; ++ y = (motion->y - my) / vc->gfx.scale_y; + + if (qemu_input_is_absolute(vc->gfx.dcl.con)) { + if (x < 0 || y < 0 || +-- +2.41.0.windows.1 + diff --git a/ui-remove-break-after-g_assert_not_reached.patch b/ui-remove-break-after-g_assert_not_reached.patch new file mode 100644 index 0000000000000000000000000000000000000000..0f026ced08900d247cc3b641f7e489ce167dda59 --- /dev/null +++ b/ui-remove-break-after-g_assert_not_reached.patch @@ -0,0 +1,48 @@ +From c761dac5d72f0d7c4643125e0611c75334b4ec4e Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Mon, 21 Oct 2024 15:58:54 +0800 +Subject: [PATCH] ui: remove break after g_assert_not_reached() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from b3372e0ec818d7747963a2ec7ae04fd1a8152afd + +Use of assert(false) can trip spurious control flow warnings from +some versions of GCC (i.e. using -fsanitize=thread with gcc-12): + + error: control reaches end of non-void function [-Werror=return-type] + default: + g_assert_not_reached(); + break; + | ^^^^^ + +Solve that by removing the unreachable 'break' statement, unifying +the code base on g_assert_not_reached() instead. + +Signed-off-by: Pierrick Bouvier +Reviewed-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240910221606.1817478-37-pierrick.bouvier@linaro.org> +[PMD: Add description suggested by Eric Blake] +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Jiao +--- + ui/qemu-pixman.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/ui/qemu-pixman.c b/ui/qemu-pixman.c +index 5ca55dd199..6cada8b45e 100644 +--- a/ui/qemu-pixman.c ++++ b/ui/qemu-pixman.c +@@ -49,7 +49,6 @@ PixelFormat qemu_pixelformat_from_pixman(pixman_format_code_t format) + break; + default: + g_assert_not_reached(); +- break; + } + + pf.amax = (1 << pf.abits) - 1; +-- +2.41.0.windows.1 + diff --git a/ui-sdl2-set-swap-interval-explicitly-when-OpenGL-is-.patch b/ui-sdl2-set-swap-interval-explicitly-when-OpenGL-is-.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2a5954c5714f7f8fd262ba4c80d7c0a2db3ef5f --- /dev/null +++ b/ui-sdl2-set-swap-interval-explicitly-when-OpenGL-is-.patch @@ -0,0 +1,48 @@ +From 855f389c98787baaa8afd1139fb82e0710167d9a Mon Sep 17 00:00:00 2001 +From: Gert Wollny +Date: Wed, 11 Sep 2024 09:14:30 +0000 +Subject: [PATCH] ui/sdl2: set swap interval explicitly when OpenGL is enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Before 176e3783f2ab (ui/sdl2: OpenGL window context) +SDL_CreateRenderer was called unconditionally setting +the swap interval to 0. Since SDL_CreateRenderer is now no +longer called when OpenGL is enabled, the swap interval is +no longer set explicitly and vsync handling depends on +the environment settings which may lead to a performance +regression with virgl as reported in + https://gitlab.com/qemu-project/qemu/-/issues/2565 + +Restore the old vsync handling by explicitly calling +SDL_GL_SetSwapInterval if OpenGL is enabled. + +Fixes: 176e3783f2ab (ui/sdl2: OpenGL window context) +Closes: https://gitlab.com/qemu-project/qemu/-/issues/2565 + +Signed-off-by: Gert Wollny +Acked-by: Marc-André Lureau +Message-ID: <01020191e05ce6df-84da6386-62c2-4ce8-840e-ad216ac253dd-000000@eu-west-1.amazonses.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit ae23cd00170baaa2777eb1ee87b70f472dbb3c44) +Signed-off-by: zhujun2 +--- + ui/sdl2.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/ui/sdl2.c b/ui/sdl2.c +index 4971963f00..cc44d2708b 100644 +--- a/ui/sdl2.c ++++ b/ui/sdl2.c +@@ -115,6 +115,7 @@ void sdl2_window_create(struct sdl2_console *scon) + SDL_SetHint(SDL_HINT_RENDER_BATCHING, "1"); + + scon->winctx = SDL_GL_CreateContext(scon->real_window); ++ SDL_GL_SetSwapInterval(0); + } else { + /* The SDL renderer is only used by sdl2-2D, when OpenGL is disabled */ + scon->real_renderer = SDL_CreateRenderer(scon->real_window, -1, 0); +-- +2.41.0.windows.1 + diff --git a/ui-vnc-don-t-return-an-empty-SASL-mechlist-to-the-cl.patch b/ui-vnc-don-t-return-an-empty-SASL-mechlist-to-the-cl.patch new file mode 100644 index 0000000000000000000000000000000000000000..10049b8fbec458c6b85787d15b1d77a0f48980b3 --- /dev/null +++ b/ui-vnc-don-t-return-an-empty-SASL-mechlist-to-the-cl.patch @@ -0,0 +1,47 @@ +From 838c585cf6c899a0b48683a0b46ed01cc24d835c Mon Sep 17 00:00:00 2001 +From: Susanooo +Date: Fri, 25 Oct 2024 10:08:39 +0800 +Subject: [PATCH] ui/vnc: don't return an empty SASL mechlist to the client +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The SASL initialization phase may determine that there are no valid +mechanisms available to use. This may be because the host OS admin +forgot to install some packages, or it might be because the requested +SSF level is incompatible with available mechanisms, or other unknown +reasons. + +If we return an empty mechlist to the client, they're going to get a +failure from the SASL library on their end and drop the connection. +Thus there is no point even sending this back to the client, we can +just drop the connection immediately. + +Reviewed-by: Marc-André Lureau +Signed-off-by: Daniel P. Berrangé +Signed-off-by: zhangchujun +--- + ui/vnc-auth-sasl.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..e321c9decc 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -674,6 +674,13 @@ void start_auth_sasl(VncState *vs) + } + trace_vnc_auth_sasl_mech_list(vs, mechlist); + ++ if (g_str_equal(mechlist, "")) { ++ trace_vnc_auth_fail(vs, vs->auth, "no available SASL mechanisms", ""); ++ sasl_dispose(&vs->sasl.conn); ++ vs->sasl.conn = NULL; ++ goto authabort; ++ } ++ + vs->sasl.mechlist = g_strdup(mechlist); + mechlistlen = strlen(mechlist); + vnc_write_u32(vs, mechlistlen); +-- +2.41.0.windows.1 + diff --git a/update-docs-tools-virtfs-proxy-helper.rst.patch b/update-docs-tools-virtfs-proxy-helper.rst.patch new file mode 100644 index 0000000000000000000000000000000000000000..aadfac66231a0d99f3acc042e0da54fceb5e4596 --- /dev/null +++ b/update-docs-tools-virtfs-proxy-helper.rst.patch @@ -0,0 +1,27 @@ +From c31f85b015326ad6619c707ada5cea2713970741 Mon Sep 17 00:00:00 2001 +From: lixiang_yewu +Date: Mon, 2 Sep 2024 07:35:57 +0000 +Subject: [PATCH] update docs/tools/virtfs-proxy-helper.rst. This place is + spelled wrong. + +Signed-off-by: lixiang_yewu +--- + docs/tools/virtfs-proxy-helper.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/tools/virtfs-proxy-helper.rst b/docs/tools/virtfs-proxy-helper.rst +index bd310ebb07..175b480926 100644 +--- a/docs/tools/virtfs-proxy-helper.rst ++++ b/docs/tools/virtfs-proxy-helper.rst +@@ -55,7 +55,7 @@ The following options are supported: + .. option:: -f, --fd SOCKET_ID + + Use given file descriptor as socket descriptor for communicating with +- qemu proxy fs drier. Usually a helper like libvirt will create ++ qemu proxy fs driver. Usually a helper like libvirt will create + socketpair and pass one of the fds as parameter to this option. + + .. option:: -s, --socket SOCKET_FILE +-- +2.41.0.windows.1 + diff --git a/update-io-trace-events.patch b/update-io-trace-events.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9b66a88d77c66dc5c34b8cb79115b39dd7603b8 --- /dev/null +++ b/update-io-trace-events.patch @@ -0,0 +1,26 @@ +From c6b96a0e10db061c9ab790b443f0bfd8220d7d3c Mon Sep 17 00:00:00 2001 +From: lixiang_yewu +Date: Mon, 2 Sep 2024 07:39:00 +0000 +Subject: [PATCH] update io/trace-events. Parameters should remain consistent. + +Signed-off-by: lixiang_yewu +--- + io/trace-events | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/io/trace-events b/io/trace-events +index 3cc5cf1efd..79e1a19af7 100644 +--- a/io/trace-events ++++ b/io/trace-events +@@ -38,7 +38,7 @@ qio_channel_file_new_path(void *ioc, const char *path, int flags, int mode, int + + # channel-tls.c + qio_channel_tls_new_client(void *ioc, void *master, void *creds, const char *hostname) "TLS new client ioc=%p master=%p creds=%p hostname=%s" +-qio_channel_tls_new_server(void *ioc, void *master, void *creds, const char *aclname) "TLS new client ioc=%p master=%p creds=%p acltname=%s" ++qio_channel_tls_new_server(void *ioc, void *master, void *creds, const char *aclname) "TLS new client ioc=%p master=%p creds=%p aclname=%s" + qio_channel_tls_handshake_start(void *ioc) "TLS handshake start ioc=%p" + qio_channel_tls_handshake_pending(void *ioc, int status) "TLS handshake pending ioc=%p status=%d" + qio_channel_tls_handshake_fail(void *ioc) "TLS handshake fail ioc=%p" +-- +2.41.0.windows.1 + diff --git a/update-linux-headers-Import-iommu.h.patch b/update-linux-headers-Import-iommu.h.patch deleted file mode 100644 index eea744e5063aeba90d9e16967f88b9d902de93f1..0000000000000000000000000000000000000000 --- a/update-linux-headers-Import-iommu.h.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 78c269f4ed09a3272d99a65d9c86977a01ef99c8 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 9 May 2019 10:23:42 -0400 -Subject: [PATCH] update-linux-headers: Import iommu.h - -Update the script to import the new iommu.h uapi header. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - scripts/update-linux-headers.sh | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh -index f76d77363b..dfdfdfddcf 100755 ---- a/scripts/update-linux-headers.sh -+++ b/scripts/update-linux-headers.sh -@@ -141,7 +141,7 @@ done - - rm -rf "$output/linux-headers/linux" - mkdir -p "$output/linux-headers/linux" --for header in kvm.h vfio.h vfio_ccw.h vhost.h \ -+for header in kvm.h vfio.h vfio_ccw.h vhost.h iommu.h \ - psci.h psp-sev.h userfaultfd.h mman.h; do - cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" - done --- -2.27.0 - diff --git a/usb-hub-Fix-handling-port-power-control-messages.patch b/usb-hub-Fix-handling-port-power-control-messages.patch new file mode 100644 index 0000000000000000000000000000000000000000..0abf5986a44fd394b767bfffabce3d9e3ef512f2 --- /dev/null +++ b/usb-hub-Fix-handling-port-power-control-messages.patch @@ -0,0 +1,39 @@ +From bdd1d8b5aea219c7ec1fb590430e3c8e99f43700 Mon Sep 17 00:00:00 2001 +From: qihao_yewu +Date: Mon, 18 Nov 2024 21:37:32 -0500 +Subject: [PATCH] usb-hub: Fix handling port power control messages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from b2cc69997924b651c0c6f4037782e25f2e438715 + +The ClearPortFeature control message fails for PORT_POWER because there +is no break; at the end of the case statement, causing it to fall through +to the failure handler. Add the missing break; to solve the problem. + +Fixes: 1cc403eb21 ("usb-hub: emulate per port power switching") +Signed-off-by: Guenter Roeck +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20241112170152.217664-11-linux@roeck-us.net> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + hw/usb/dev-hub.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c +index 5703e0e826..7b3cfa2c1b 100644 +--- a/hw/usb/dev-hub.c ++++ b/hw/usb/dev-hub.c +@@ -479,6 +479,7 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p, + usb_hub_port_clear(port, PORT_STAT_SUSPEND); + port->wPortChange = 0; + } ++ break; + default: + goto fail; + } +-- +2.41.0.windows.1 + diff --git a/usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch b/usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch deleted file mode 100644 index 836e01ce8c2eda354b60c5b32b0e8303ab652f7c..0000000000000000000000000000000000000000 --- a/usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 93be5f3334394aa9a1794007aed79e75cf4d348b Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Mon, 21 Jun 2021 10:19:58 +0800 -Subject: [PATCH] usb: limit combined packets to 1 MiB (CVE-2021-3527) - -Fix CVE-2021-3527 - -usb-host and usb-redirect try to batch bulk transfers by combining many -small usb packets into a single, large transfer request, to reduce the -overhead and improve performance. - -This patch adds a size limit of 1 MiB for those combined packets to -restrict the host resources the guest can bind that way. -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann -Message-Id: <20210503132915.2335822-6-kraxel@redhat.com> - -Signed-off-by: Jiajie Li ---- - hw/usb/combined-packet.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/usb/combined-packet.c b/hw/usb/combined-packet.c -index 5d57e883dc..e56802f89a 100644 ---- a/hw/usb/combined-packet.c -+++ b/hw/usb/combined-packet.c -@@ -171,7 +171,9 @@ void usb_ep_combine_input_packets(USBEndpoint *ep) - if ((p->iov.size % ep->max_packet_size) != 0 || !p->short_not_ok || - next == NULL || - /* Work around for Linux usbfs bulk splitting + migration */ -- (totalsize == (16 * KiB - 36) && p->int_req)) { -+ (totalsize == (16 * KiB - 36) && p->int_req) || -+ /* Next package may grow combined package over 1MiB */ -+ totalsize > 1 * MiB - ep->max_packet_size) { - usb_device_handle_data(ep->dev, first); - assert(first->status == USB_RET_ASYNC); - if (first->combined) { --- -2.27.0 - diff --git a/usbredir-Prevent-recursion-in-usbredir_write.patch b/usbredir-Prevent-recursion-in-usbredir_write.patch deleted file mode 100644 index 29eb50f2d08b41c570fa90940ac01894c046e90e..0000000000000000000000000000000000000000 --- a/usbredir-Prevent-recursion-in-usbredir_write.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 30203c01fa1bb2a7b92575683f85695a2d420b38 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 18 Dec 2019 11:30:12 +0000 -Subject: [PATCH] usbredir: Prevent recursion in usbredir_write - -I've got a case where usbredir_write manages to call back into itself -via spice; this patch causes the recursion to fail (0 bytes) the write; -this seems to avoid the deadlock I was previously seeing. - -I can't say I fully understand the interaction of usbredir and spice; -but there are a few similar guards in spice and usbredir -to catch other cases especially onces also related to spice_server_char_device_wakeup - -This case seems to be triggered by repeated migration+repeated -reconnection of the viewer; but my debugging suggests the migration -finished before this hits. - -The backtrace of the hang looks like: - reds_handle_ticket - reds_handle_other_links - reds_channel_do_link - red_channel_connect - spicevmc_connect - usbredir_create_parser - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - spice_chr_write - spice_server_char_device_wakeup - red_char_device_wakeup - red_char_device_write_to_device - vmc_write - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - qemu_mutex_lock_impl - -and we fail as we lang through qemu_chr_write_buffer's lock -twice. - -Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> -Signed-off-by: Gerd Hoffmann ---- - hw/usb/redirect.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index 9764a57987..3cf82589ed 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -109,6 +109,7 @@ struct USBRedirDevice { - /* Properties */ - CharBackend cs; - bool enable_streams; -+ bool in_write; - uint8_t debug; - int32_t bootindex; - char *filter_str; -@@ -286,6 +287,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - return 0; - } - -+ /* Recursion check */ -+ if (dev->in_write) { -+ DPRINTF("usbredir_write recursion\n"); -+ return 0; -+ } -+ dev->in_write = true; -+ - r = qemu_chr_fe_write(&dev->cs, data, count); - if (r < count) { - if (!dev->watch) { -@@ -296,6 +304,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - r = 0; - } - } -+ dev->in_write = false; - return r; - } - --- -2.27.0 - diff --git a/usbredir-fix-buffer-overflow-on-vmload.patch b/usbredir-fix-buffer-overflow-on-vmload.patch deleted file mode 100644 index 4a43c35cad37bcece9822ddf61033c18dd7edfc4..0000000000000000000000000000000000000000 --- a/usbredir-fix-buffer-overflow-on-vmload.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 66fce891aecec3969d1ba979cf0a9a6df70afecd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 7 Aug 2019 12:40:48 +0400 -Subject: [PATCH] usbredir: fix buffer-overflow on vmload -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -If interface_count is NO_INTERFACE_INFO, let's not access the arrays -out-of-bounds. - -==994==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x625000243930 at pc 0x5642068086a8 bp 0x7f0b6f9ffa50 sp 0x7f0b6f9ffa40 -READ of size 1 at 0x625000243930 thread T0 - #0 0x5642068086a7 in usbredir_check_bulk_receiving /home/elmarco/src/qemu/hw/usb/redirect.c:1503 - #1 0x56420681301c in usbredir_post_load /home/elmarco/src/qemu/hw/usb/redirect.c:2154 - #2 0x5642068a56c2 in vmstate_load_state /home/elmarco/src/qemu/migration/vmstate.c:168 - #3 0x56420688e2ac in vmstate_load /home/elmarco/src/qemu/migration/savevm.c:829 - #4 0x5642068980cb in qemu_loadvm_section_start_full /home/elmarco/src/qemu/migration/savevm.c:2211 - #5 0x564206899645 in qemu_loadvm_state_main /home/elmarco/src/qemu/migration/savevm.c:2395 - #6 0x5642068998cf in qemu_loadvm_state /home/elmarco/src/qemu/migration/savevm.c:2467 - #7 0x56420685f3e9 in process_incoming_migration_co /home/elmarco/src/qemu/migration/migration.c:449 - #8 0x564207106c47 in coroutine_trampoline /home/elmarco/src/qemu/util/coroutine-ucontext.c:115 - #9 0x7f0c0604e37f (/lib64/libc.so.6+0x4d37f) - -Signed-off-by: Marc-André Lureau -Reviewed-by: Liam Merwick -Reviewed-by: Li Qiang -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20190807084048.4258-1-marcandre.lureau@redhat.com -Signed-off-by: Gerd Hoffmann -Signed-off-by: Zhenyu Ye ---- - hw/usb/redirect.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index 998fc6e4..9764a579 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -1495,6 +1495,11 @@ static void usbredir_check_bulk_receiving(USBRedirDevice *dev) - for (i = EP2I(USB_DIR_IN); i < MAX_ENDPOINTS; i++) { - dev->endpoint[i].bulk_receiving_enabled = 0; - } -+ -+ if (dev->interface_info.interface_count == NO_INTERFACE_INFO) { -+ return; -+ } -+ - for (i = 0; i < dev->interface_info.interface_count; i++) { - quirks = usb_get_quirks(dev->device_info.vendor_id, - dev->device_info.product_id, --- -2.22.0.windows.1 - diff --git a/util-add-slirp_fmt-helpers.patch b/util-add-slirp_fmt-helpers.patch deleted file mode 100644 index b752f1293a0b88ef0ae01410a2fc63f9ff875df8..0000000000000000000000000000000000000000 --- a/util-add-slirp_fmt-helpers.patch +++ /dev/null @@ -1,124 +0,0 @@ -From f3475a4a22dd84be0d2d7daa11676ac861da64bc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureauls?= -Date: Tue, 14 Apr 2020 18:51:39 +0800 -Subject: [PATCH] util: add slirp_fmt() helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Various calls to snprintf() in libslirp assume that snprintf() returns -"only" the number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Introduce slirp_fmt() that handles several pathological cases the -way libslirp usually expect: - -- treat error as fatal (instead of silently returning -1) - -- fmt0() will always \0 end - -- return the number of bytes actually written (instead of what would -have been written, which would usually result in OOB later), including -the ending \0 for fmt0() - -- warn if truncation happened (instead of ignoring) - -Other less common cases can still be handled with strcpy/snprintf() etc. -Signed-off-by: default avatarMarc-André Lureau -Reviewed-by: Samuel Thibault's avatarSamuel Thibault -Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> ---- - slirp/src/util.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ - slirp/src/util.h | 3 +++ - 2 files changed, 66 insertions(+) - -diff --git a/slirp/src/util.c b/slirp/src/util.c -index e5960871..dcae899e 100644 ---- a/slirp/src/util.c -+++ b/slirp/src/util.c -@@ -364,3 +364,66 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) - } - *q = '\0'; - } -+ -+static int slirp_vsnprintf(char *str, size_t size, -+ const char *format, va_list args) -+{ -+ int rv = vsnprintf(str, size, format, args); -+ -+ if (rv < 0) { -+ g_error("vsnprintf() failed: %s", g_strerror(errno)); -+ } -+ -+ return rv; -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - returns the number of bytes written (excluding optional \0-ending) -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv > size) { -+ g_critical("vsnprintf() truncation"); -+ } -+ -+ return MIN(rv, size); -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - always \0-end (unless size == 0) -+ * - returns the number of bytes actually written, including \0 ending -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt0(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv >= size) { -+ g_critical("vsnprintf() truncation"); -+ if (size > 0) -+ str[size - 1] = '\0'; -+ rv = size; -+ } else { -+ rv += 1; /* include \0 */ -+ } -+ -+ return rv; -+} -+ -diff --git a/slirp/src/util.h b/slirp/src/util.h -index 3c6223ce..0558dfc2 100644 ---- a/slirp/src/util.h -+++ b/slirp/src/util.h -@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) - - void slirp_pstrcpy(char *buf, int buf_size, const char *str); - -+int slirp_fmt(char *str, size_t size, const char *format, ...); -+int slirp_fmt0(char *str, size_t size, const char *format, ...); -+ - #endif --- -2.23.0 diff --git a/util-async-hold-AioContext-ref-to-prevent-use-after-free.patch b/util-async-hold-AioContext-ref-to-prevent-use-after-free.patch deleted file mode 100644 index da4403c4ec7757c4860023b2a8e0018c3bf23b9d..0000000000000000000000000000000000000000 --- a/util-async-hold-AioContext-ref-to-prevent-use-after-free.patch +++ /dev/null @@ -1,63 +0,0 @@ -From e965bc6c633921ab238b1f5ea64055975b24e2bb Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 23 Jul 2019 20:06:23 +0100 -Subject: [PATCH 4/5] util/async: hold AioContext ref to prevent use-after-free - -The tests/test-bdrv-drain /bdrv-drain/iothread/drain test case does the -following: - -1. The preadv coroutine calls aio_bh_schedule_oneshot() and then yields. -2. The one-shot BH executes in another AioContext. All it does is call - aio_co_wakeup(preadv_co). -3. The preadv coroutine is re-entered and returns. - -There is a race condition in aio_co_wake() where the preadv coroutine -returns and the test case destroys the preadv IOThread. aio_co_wake() -can still be running in the other AioContext and it performs an access -to the freed IOThread AioContext. - -Here is the race in aio_co_schedule(): - - QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, - co, co_scheduled_next); - <-- race: co may execute before we invoke qemu_bh_schedule()! - qemu_bh_schedule(ctx->co_schedule_bh); - -So if co causes ctx to be freed then we're in trouble. Fix this problem -by holding a reference to ctx. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20190723190623.21537-1-stefanha@redhat.com -Message-Id: <20190723190623.21537-1-stefanha@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry-picked from commit f0f81002873c06fdef9bb2a272ddfd26af65b851) ---- - util/async.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/util/async.c b/util/async.c -index c10642a..afc17fb 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -460,9 +460,17 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co) - abort(); - } - -+ /* The coroutine might run and release the last ctx reference before we -+ * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until -+ * we're done. -+ */ -+ aio_context_ref(ctx); -+ - QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, - co, co_scheduled_next); - qemu_bh_schedule(ctx->co_schedule_bh); -+ -+ aio_context_unref(ctx); - } - - void aio_co_wake(struct Coroutine *co) --- -1.8.3.1 - diff --git a/util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch b/util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch deleted file mode 100644 index 797d71e73d5115ead1f4bdf6ae3ed4aabebeb572..0000000000000000000000000000000000000000 --- a/util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 00b5032eaddb7193f03f0a28b10286244d2e2a7b Mon Sep 17 00:00:00 2001 -From: Carlos Santos -Date: Thu, 17 Oct 2019 09:37:13 -0300 -Subject: [PATCH] util/cacheinfo: fix crash when compiling with uClibc - -uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE -but the corresponding sysconf calls returns -1, which is a valid result, -meaning that the limit is indeterminate. - -Handle this situation using the fallback values instead of crashing due -to an assertion failure. - -Signed-off-by: Carlos Santos -Message-Id: <20191017123713.30192-1-casantos@redhat.com> -Signed-off-by: Richard Henderson ---- - util/cacheinfo.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/util/cacheinfo.c b/util/cacheinfo.c -index ea6f3e99bf..d94dc6adc8 100644 ---- a/util/cacheinfo.c -+++ b/util/cacheinfo.c -@@ -93,10 +93,16 @@ static void sys_cache_info(int *isize, int *dsize) - static void sys_cache_info(int *isize, int *dsize) - { - # ifdef _SC_LEVEL1_ICACHE_LINESIZE -- *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE); -+ int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); -+ if (tmp_isize > 0) { -+ *isize = tmp_isize; -+ } - # endif - # ifdef _SC_LEVEL1_DCACHE_LINESIZE -- *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); -+ int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); -+ if (tmp_dsize > 0) { -+ *dsize = tmp_dsize; -+ } - # endif - } - #endif /* sys_cache_info */ --- -2.27.0 - diff --git a/util-char_dev-Add-open_cdev.patch b/util-char_dev-Add-open_cdev.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6af1918d6b5181c0290f5dada9a4ab3c1962f50 --- /dev/null +++ b/util-char_dev-Add-open_cdev.patch @@ -0,0 +1,167 @@ +From 90688ff9c5802965f24460ac79fe52b93d2adb1f Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Sat, 11 Jan 2025 10:52:38 +0800 +Subject: [PATCH] util/char_dev: Add open_cdev() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +/dev/vfio/devices/vfioX may not exist. In that case it is still possible +to open /dev/char/$major:$minor instead. Add helper function to abstract +the cdev open. + +Suggested-by: Jason Gunthorpe +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + MAINTAINERS | 2 + + include/qemu/chardev_open.h | 16 ++++++++ + util/chardev_open.c | 81 +++++++++++++++++++++++++++++++++++++ + util/meson.build | 1 + + 4 files changed, 100 insertions(+) + create mode 100644 include/qemu/chardev_open.h + create mode 100644 util/chardev_open.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index a5a446914a..ca70bb4e64 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan + S: Supported + F: backends/iommufd.c + F: include/sysemu/iommufd.h ++F: include/qemu/chardev_open.h ++F: util/chardev_open.c + + vhost + M: Michael S. Tsirkin +diff --git a/include/qemu/chardev_open.h b/include/qemu/chardev_open.h +new file mode 100644 +index 0000000000..64e8fcfdcb +--- /dev/null ++++ b/include/qemu/chardev_open.h +@@ -0,0 +1,16 @@ ++/* ++ * QEMU Chardev Helper ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * ++ * Authors: Yi Liu ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_CHARDEV_OPEN_H ++#define QEMU_CHARDEV_OPEN_H ++ ++int open_cdev(const char *devpath, dev_t cdev); ++#endif +diff --git a/util/chardev_open.c b/util/chardev_open.c +new file mode 100644 +index 0000000000..f776429788 +--- /dev/null ++++ b/util/chardev_open.c +@@ -0,0 +1,81 @@ ++/* ++ * Copyright (c) 2019, Mellanox Technologies. All rights reserved. ++ * Copyright (C) 2023 Intel Corporation. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Authors: Yi Liu ++ * ++ * Copied from ++ * https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/chardev_open.h" ++ ++static int open_cdev_internal(const char *path, dev_t cdev) ++{ ++ struct stat st; ++ int fd; ++ ++ fd = qemu_open_old(path, O_RDWR); ++ if (fd == -1) { ++ return -1; ++ } ++ if (fstat(fd, &st) || !S_ISCHR(st.st_mode) || ++ (cdev != 0 && st.st_rdev != cdev)) { ++ close(fd); ++ return -1; ++ } ++ return fd; ++} ++ ++static int open_cdev_robust(dev_t cdev) ++{ ++ g_autofree char *devpath = NULL; ++ ++ /* ++ * This assumes that udev is being used and is creating the /dev/char/ ++ * symlinks. ++ */ ++ devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev)); ++ return open_cdev_internal(devpath, cdev); ++} ++ ++int open_cdev(const char *devpath, dev_t cdev) ++{ ++ int fd; ++ ++ fd = open_cdev_internal(devpath, cdev); ++ if (fd == -1 && cdev != 0) { ++ return open_cdev_robust(cdev); ++ } ++ return fd; ++} +diff --git a/util/meson.build b/util/meson.build +index c2322ef6e7..174c133368 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -108,6 +108,7 @@ if have_block + util_ss.add(files('filemonitor-stub.c')) + endif + util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c')) ++ util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c')) + endif + + if cpu == 'aarch64' +-- +2.41.0.windows.1 + diff --git a/util-hbitmap-strict-hbitmap_reset.patch b/util-hbitmap-strict-hbitmap_reset.patch deleted file mode 100644 index b7f568f1bc6d21ae6923a552c2536414d5d33fdd..0000000000000000000000000000000000000000 --- a/util-hbitmap-strict-hbitmap_reset.patch +++ /dev/null @@ -1,77 +0,0 @@ -From fcd7cba6acb7344aca70f5f8ec16626e817b35a5 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 6 Aug 2019 18:26:11 +0300 -Subject: [PATCH] util/hbitmap: strict hbitmap_reset - -hbitmap_reset has an unobvious property: it rounds requested region up. -It may provoke bugs, like in recently fixed write-blocking mode of -mirror: user calls reset on unaligned region, not keeping in mind that -there are possible unrelated dirty bytes, covered by rounded-up region -and information of this unrelated "dirtiness" will be lost. - -Make hbitmap_reset strict: assert that arguments are aligned, allowing -only one exception when @start + @count == hb->orig_size. It's needed -to comfort users of hbitmap_next_dirty_area, which cares about -hb->orig_size. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Message-Id: <20190806152611.280389-1-vsementsov@virtuozzo.com> -[Maintainer edit: Max's suggestions from on-list. --js] -[Maintainer edit: Eric's suggestion for aligned macro. --js] -Signed-off-by: John Snow -(cherry picked from commit 48557b138383aaf69c2617ca9a88bfb394fc50ec) -*prereq for fed33bd175f663cc8c13f8a490a4f35a19756cfe -Signed-off-by: Michael Roth ---- - include/qemu/hbitmap.h | 5 +++++ - tests/test-hbitmap.c | 2 +- - util/hbitmap.c | 4 ++++ - 3 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h -index 4afbe6292e..1bf944ca3d 100644 ---- a/include/qemu/hbitmap.h -+++ b/include/qemu/hbitmap.h -@@ -132,6 +132,11 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count); - * @count: Number of bits to reset. - * - * Reset a consecutive range of bits in an HBitmap. -+ * @start and @count must be aligned to bitmap granularity. The only exception -+ * is resetting the tail of the bitmap: @count may be equal to hb->orig_size - -+ * @start, in this case @count may be not aligned. The sum of @start + @count is -+ * allowed to be greater than hb->orig_size, but only if @start < hb->orig_size -+ * and @start + @count = ALIGN_UP(hb->orig_size, granularity). - */ - void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count); - -diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c -index 592d8219db..2be56d1597 100644 ---- a/tests/test-hbitmap.c -+++ b/tests/test-hbitmap.c -@@ -423,7 +423,7 @@ static void test_hbitmap_granularity(TestHBitmapData *data, - hbitmap_test_check(data, 0); - hbitmap_test_set(data, 0, 3); - g_assert_cmpint(hbitmap_count(data->hb), ==, 4); -- hbitmap_test_reset(data, 0, 1); -+ hbitmap_test_reset(data, 0, 2); - g_assert_cmpint(hbitmap_count(data->hb), ==, 2); - } - -diff --git a/util/hbitmap.c b/util/hbitmap.c -index bcc0acdc6a..71c6ba2c52 100644 ---- a/util/hbitmap.c -+++ b/util/hbitmap.c -@@ -476,6 +476,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) - /* Compute range in the last layer. */ - uint64_t first; - uint64_t last = start + count - 1; -+ uint64_t gran = 1ULL << hb->granularity; -+ -+ assert(QEMU_IS_ALIGNED(start, gran)); -+ assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); - - trace_hbitmap_reset(hb, start, count, - start >> hb->granularity, last >> hb->granularity); --- -2.23.0 diff --git a/util-iov-improve-qemu_iovec_is_zero.patch b/util-iov-improve-qemu_iovec_is_zero.patch deleted file mode 100644 index 0cca67b8b1ed17eb873514c177eb6e371ae21f17..0000000000000000000000000000000000000000 --- a/util-iov-improve-qemu_iovec_is_zero.patch +++ /dev/null @@ -1,102 +0,0 @@ -From b3b76fc643912d2c86b13caff30a1151f2958702 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 4 Jun 2019 19:15:04 +0300 -Subject: [PATCH] util/iov: improve qemu_iovec_is_zero - -We'll need to check a part of qiov soon, so implement it now. - -Optimization with align down to 4 * sizeof(long) is dropped due to: -1. It is strange: it aligns length of the buffer, but where is a - guarantee that buffer pointer is aligned itself? -2. buffer_is_zero() is a better place for optimizations and it has - them. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Acked-by: Stefan Hajnoczi -Message-id: 20190604161514.262241-3-vsementsov@virtuozzo.com -Message-Id: <20190604161514.262241-3-vsementsov@virtuozzo.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit f76889e7b947d896db51be8a4d9c941c2f70365a) -*prereq for 292d06b9 -Signed-off-by: Michael Roth ---- - block/io.c | 2 +- - include/qemu/iov.h | 2 +- - util/iov.c | 31 +++++++++++++++++++------------ - 3 files changed, 21 insertions(+), 14 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 06305c6ea6..dccf687acc 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1715,7 +1715,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, - - if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && - !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && -- qemu_iovec_is_zero(qiov)) { -+ qemu_iovec_is_zero(qiov, 0, qiov->size)) { - flags |= BDRV_REQ_ZERO_WRITE; - if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { - flags |= BDRV_REQ_MAY_UNMAP; -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index f3787a0cf7..29957c8a72 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -212,7 +212,7 @@ void qemu_iovec_concat(QEMUIOVector *dst, - size_t qemu_iovec_concat_iov(QEMUIOVector *dst, - struct iovec *src_iov, unsigned int src_cnt, - size_t soffset, size_t sbytes); --bool qemu_iovec_is_zero(QEMUIOVector *qiov); -+bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes); - void qemu_iovec_destroy(QEMUIOVector *qiov); - void qemu_iovec_reset(QEMUIOVector *qiov); - size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, -diff --git a/util/iov.c b/util/iov.c -index 366ff9cdd1..9ac0261853 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -451,23 +451,30 @@ void qemu_iovec_init_extended( - } - - /* -- * Check if the contents of the iovecs are all zero -+ * Check if the contents of subrange of qiov data is all zeroes. - */ --bool qemu_iovec_is_zero(QEMUIOVector *qiov) -+bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) - { -- int i; -- for (i = 0; i < qiov->niov; i++) { -- size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); -- uint8_t *ptr = qiov->iov[i].iov_base; -- if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { -+ struct iovec *iov; -+ size_t current_offset; -+ -+ assert(offset + bytes <= qiov->size); -+ -+ iov = iov_skip_offset(qiov->iov, offset, ¤t_offset); -+ -+ while (bytes) { -+ uint8_t *base = (uint8_t *)iov->iov_base + current_offset; -+ size_t len = MIN(iov->iov_len - current_offset, bytes); -+ -+ if (!buffer_is_zero(base, len)) { - return false; - } -- for (; offs < qiov->iov[i].iov_len; offs++) { -- if (ptr[offs]) { -- return false; -- } -- } -+ -+ current_offset = 0; -+ bytes -= len; -+ iov++; - } -+ - return true; - } - --- -2.23.0 diff --git a/util-iov-introduce-qemu_iovec_init_extended.patch b/util-iov-introduce-qemu_iovec_init_extended.patch deleted file mode 100644 index 0a488a63413b69816576ff8394f1e282c292e7d7..0000000000000000000000000000000000000000 --- a/util-iov-introduce-qemu_iovec_init_extended.patch +++ /dev/null @@ -1,177 +0,0 @@ -From cff024fe856ab36db3056ba4cb1d7cfa4c39795d Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Tue, 4 Jun 2019 19:15:03 +0300 -Subject: [PATCH] util/iov: introduce qemu_iovec_init_extended - -Introduce new initialization API, to create requests with padding. Will -be used in the following patch. New API uses qemu_iovec_init_buf if -resulting io vector has only one element, to avoid extra allocations. -So, we need to update qemu_iovec_destroy to support destroying such -QIOVs. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Acked-by: Stefan Hajnoczi -Message-id: 20190604161514.262241-2-vsementsov@virtuozzo.com -Message-Id: <20190604161514.262241-2-vsementsov@virtuozzo.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit d953169d4840f312d3b9a54952f4a7ccfcb3b311) -*prereq for 292d06b9 -Signed-off-by: Michael Roth ---- - include/qemu/iov.h | 7 +++ - util/iov.c | 112 +++++++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 114 insertions(+), 5 deletions(-) - -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index 48b45987b7..f3787a0cf7 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -199,6 +199,13 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) - - void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); - void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); -+void qemu_iovec_init_extended( -+ QEMUIOVector *qiov, -+ void *head_buf, size_t head_len, -+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -+ void *tail_buf, size_t tail_len); -+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, -+ size_t offset, size_t len); - void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); - void qemu_iovec_concat(QEMUIOVector *dst, - QEMUIOVector *src, size_t soffset, size_t sbytes); -diff --git a/util/iov.c b/util/iov.c -index 74e6ca8ed7..366ff9cdd1 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -353,6 +353,103 @@ void qemu_iovec_concat(QEMUIOVector *dst, - qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes); - } - -+/* -+ * qiov_find_iov -+ * -+ * Return pointer to iovec structure, where byte at @offset in original vector -+ * @iov exactly is. -+ * Set @remaining_offset to be offset inside that iovec to the same byte. -+ */ -+static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, -+ size_t *remaining_offset) -+{ -+ while (offset > 0 && offset >= iov->iov_len) { -+ offset -= iov->iov_len; -+ iov++; -+ } -+ *remaining_offset = offset; -+ -+ return iov; -+} -+ -+/* -+ * qiov_slice -+ * -+ * Find subarray of iovec's, containing requested range. @head would -+ * be offset in first iov (returned by the function), @tail would be -+ * count of extra bytes in last iovec (returned iov + @niov - 1). -+ */ -+static struct iovec *qiov_slice(QEMUIOVector *qiov, -+ size_t offset, size_t len, -+ size_t *head, size_t *tail, int *niov) -+{ -+ struct iovec *iov, *end_iov; -+ -+ assert(offset + len <= qiov->size); -+ -+ iov = iov_skip_offset(qiov->iov, offset, head); -+ end_iov = iov_skip_offset(iov, *head + len, tail); -+ -+ if (*tail > 0) { -+ assert(*tail < end_iov->iov_len); -+ *tail = end_iov->iov_len - *tail; -+ end_iov++; -+ } -+ -+ *niov = end_iov - iov; -+ -+ return iov; -+} -+ -+/* -+ * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, -+ * and @tail_buf buffer into new qiov. -+ */ -+void qemu_iovec_init_extended( -+ QEMUIOVector *qiov, -+ void *head_buf, size_t head_len, -+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -+ void *tail_buf, size_t tail_len) -+{ -+ size_t mid_head, mid_tail; -+ int total_niov, mid_niov = 0; -+ struct iovec *p, *mid_iov; -+ -+ if (mid_len) { -+ mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, -+ &mid_head, &mid_tail, &mid_niov); -+ } -+ -+ total_niov = !!head_len + mid_niov + !!tail_len; -+ if (total_niov == 1) { -+ qemu_iovec_init_buf(qiov, NULL, 0); -+ p = &qiov->local_iov; -+ } else { -+ qiov->niov = qiov->nalloc = total_niov; -+ qiov->size = head_len + mid_len + tail_len; -+ p = qiov->iov = g_new(struct iovec, qiov->niov); -+ } -+ -+ if (head_len) { -+ p->iov_base = head_buf; -+ p->iov_len = head_len; -+ p++; -+ } -+ -+ if (mid_len) { -+ memcpy(p, mid_iov, mid_niov * sizeof(*p)); -+ p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; -+ p[0].iov_len -= mid_head; -+ p[mid_niov - 1].iov_len -= mid_tail; -+ p += mid_niov; -+ } -+ -+ if (tail_len) { -+ p->iov_base = tail_buf; -+ p->iov_len = tail_len; -+ } -+} -+ - /* - * Check if the contents of the iovecs are all zero - */ -@@ -374,14 +471,19 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov) - return true; - } - -+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, -+ size_t offset, size_t len) -+{ -+ qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); -+} -+ - void qemu_iovec_destroy(QEMUIOVector *qiov) - { -- assert(qiov->nalloc != -1); -+ if (qiov->nalloc != -1) { -+ g_free(qiov->iov); -+ } - -- qemu_iovec_reset(qiov); -- g_free(qiov->iov); -- qiov->nalloc = 0; -- qiov->iov = NULL; -+ memset(qiov, 0, sizeof(*qiov)); - } - - void qemu_iovec_reset(QEMUIOVector *qiov) --- -2.23.0 diff --git a/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch b/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch new file mode 100644 index 0000000000000000000000000000000000000000..82e2884cba6131390119bf688ee0452887236b86 --- /dev/null +++ b/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch @@ -0,0 +1,42 @@ +From 28763d8df34c20cab60baec8f4f5615cbea8c0df Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Fri, 11 Feb 2022 18:20:59 +0800 +Subject: [PATCH] util/log: add CONFIG_DISABLE_QEMU_LOG macro + +Using CONFIG_DISABLE_QEMU_LOG macro to control +qemu_log function. + +Signed-off-by: Yan Wang +Signed-off-by: Adttil +--- + util/log.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/util/log.c b/util/log.c +index d36c98da0b..78b6cf225f 100644 +--- a/util/log.c ++++ b/util/log.c +@@ -143,6 +143,12 @@ void qemu_log_unlock(FILE *logfile) + } + } + ++#ifdef CONFIG_DISABLE_QEMU_LOG ++void qemu_log(const char *fmt, ...) ++{ ++ return; ++} ++#else + void qemu_log(const char *fmt, ...) + { + FILE *f = qemu_log_trylock(); +@@ -155,6 +161,7 @@ void qemu_log(const char *fmt, ...) + qemu_log_unlock(f); + } + } ++#endif + + static void __attribute__((__constructor__)) startup(void) + { +-- +2.27.0 + diff --git a/util-userfaultfd-Remove-unused-uffd_poll_events.patch b/util-userfaultfd-Remove-unused-uffd_poll_events.patch new file mode 100644 index 0000000000000000000000000000000000000000..31b028408576c33dd4177325e21770b550db9919 --- /dev/null +++ b/util-userfaultfd-Remove-unused-uffd_poll_events.patch @@ -0,0 +1,71 @@ +From ef3d2918827d6c5204af06e1597dc4dbde22414a Mon Sep 17 00:00:00 2001 +From: Zhang Jiao +Date: Thu, 17 Oct 2024 09:43:01 +0800 +Subject: [PATCH] util/userfaultfd: Remove unused uffd_poll_events + +chery-pick from ccf6b78275816c9dec84d3a40e9aa3b6ba6ebc06 + +uffd_poll_events has been unused since it was added; it's also +just a wrapper around a plain old poll call, so doesn't add anything. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240919134626.166183-8-dave@treblig.org +Signed-off-by: Peter Xu +Signed-off-by: Zhang Jiao +--- + include/qemu/userfaultfd.h | 1 - + util/userfaultfd.c | 28 ---------------------------- + 2 files changed, 29 deletions(-) + +diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h +index 18a4314212..a1979308d7 100644 +--- a/include/qemu/userfaultfd.h ++++ b/include/qemu/userfaultfd.h +@@ -39,7 +39,6 @@ int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, + int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake); + int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); + int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); +-bool uffd_poll_events(int uffd_fd, int tmo); + + #endif /* CONFIG_LINUX */ + +diff --git a/util/userfaultfd.c b/util/userfaultfd.c +index fdff4867e8..b7d320d0b1 100644 +--- a/util/userfaultfd.c ++++ b/util/userfaultfd.c +@@ -356,31 +356,3 @@ int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count) + + return (int) (res / sizeof(struct uffd_msg)); + } +- +-/** +- * uffd_poll_events: poll UFFD file descriptor for read +- * +- * Returns true if events are available for read, false otherwise +- * +- * @uffd_fd: UFFD file descriptor +- * @tmo: timeout value +- */ +-bool uffd_poll_events(int uffd_fd, int tmo) +-{ +- int res; +- struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 }; +- +- do { +- res = poll(&poll_fd, 1, tmo); +- } while (res < 0 && errno == EINTR); +- +- if (res == 0) { +- return false; +- } +- if (res < 0) { +- error_report("uffd_poll_events() failed: errno=%i", errno); +- return false; +- } +- +- return (poll_fd.revents & POLLIN) != 0; +-} +-- +2.41.0.windows.1 + diff --git a/vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch b/vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2b1f66101c80676f464727a00710ae9c6d3a11c --- /dev/null +++ b/vdpa-Fix-dirty-page-bitmap-synchronization-not-done-.patch @@ -0,0 +1,39 @@ +From 257ffabb9c06b476a3a42bf679db6fbc61c19459 Mon Sep 17 00:00:00 2001 +From: Adttil <2429917001@qq.com> +Date: Fri, 25 Apr 2025 09:41:59 +0800 +Subject: [PATCH] vdpa:Fix dirty page bitmap synchronization not done after + suspend for vdpa devices + +Change the flag for vdpa device to determine whether to perform log_sync +from dev->start to dev->log, and do not release dev->log after vdpa device +suspend, and release it uniformly by vhost_dev_stop. + +Signed-off-by: Adttil <2429917001@qq.com> +--- + hw/virtio/vhost.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index d29075aa04..bec6e63fc7 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -252,7 +252,7 @@ static void vhost_log_sync(MemoryListener *listener, + memory_listener); + MigrationState *ms = migrate_get_current(); + +- if (!dev->log_enabled || !dev->started) { ++ if (!dev->log_enabled || !dev->log) { + return; + } + +@@ -2624,7 +2624,6 @@ int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + memory_listener_unregister(&hdev->iommu_listener); + } + vhost_stop_config_intr(hdev); +- vhost_log_put(hdev, true); + hdev->started = false; + vdev->vhost_started = false; + hdev->vdev = NULL; +-- +2.41.0.windows.1 + diff --git a/vdpa-correct-param-passed-in-when-unregister-save.patch b/vdpa-correct-param-passed-in-when-unregister-save.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f3aa089a764098f3f976cb99fc5e47a56c775a8 --- /dev/null +++ b/vdpa-correct-param-passed-in-when-unregister-save.patch @@ -0,0 +1,30 @@ +From 5714aaddcbc313e63da435a253d9d472984d7b49 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 14 Dec 2023 11:22:54 +0800 +Subject: [PATCH] vdpa: correct param passed in when unregister save + +The idstr passed in the unregister_savevm function is inconsisten +with the idstr passed in when register_savevm_live registration. +Needs to be modified, otherwise migration will fail after hotunplug +all vdpa devices. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index b889dd4715..1d299019da 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -404,6 +404,6 @@ void vdpa_migration_register(VhostVdpaDevice *vdev) + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { + migration_remove_notifier(&vdev->migration_state); +- unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); ++ unregister_savevm(NULL, "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +-- +2.27.0 + diff --git a/vdpa-dev-Fix-initialisation-order-to-restore-VDUSE-c.patch b/vdpa-dev-Fix-initialisation-order-to-restore-VDUSE-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..109974b7c783df1ff2ff627c988be4215db17b7c --- /dev/null +++ b/vdpa-dev-Fix-initialisation-order-to-restore-VDUSE-c.patch @@ -0,0 +1,180 @@ +From 9ab31c6abf095d8f7c986676cf6a70132a3441b7 Mon Sep 17 00:00:00 2001 +From: Adttil <2429917001@qq.com> +Date: Tue, 10 Dec 2024 00:33:28 +0800 +Subject: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE + compatibility +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +VDUSE requires that virtqueues are first enabled before the DRIVER_OK +status flag is set; with the current API of the kernel module, it is +impossible to enable the opposite order in our block export code because +userspace is not notified when a virtqueue is enabled. + +This requirement also mathces the normal initialisation order as done by +the generic vhost code in QEMU. However, commit 6c48254 accidentally +changed the order for vdpa-dev and broke access to VDUSE devices with +this. + +This changes vdpa-dev to use the normal order again and use the standard +vhost callback .vhost_set_vring_enable for this. VDUSE devices can be +used with vdpa-dev again after this fix. + +vhost_net intentionally avoided enabling the vrings for vdpa and does +this manually later while it does enable them for other vhost backends. +Reflect this in the vhost_net code and return early for vdpa, so that +the behaviour doesn't change for this device. + +Cc: qemu-stable@nongnu.org +Fixes: 6c48254 ('vdpa: move vhost_vdpa_set_vring_ready to the caller') +Signed-off-by: Kevin Wolf +Message-ID: <20240315155949.86066-1-kwolf@redhat.com> +Reviewed-by: Eugenio Pérez +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +--- + hw/net/vhost_net.c | 10 ++++++++++ + hw/virtio/trace-events | 2 +- + hw/virtio/vdpa-dev.c | 5 +---- + hw/virtio/vhost-vdpa.c | 29 ++++++++++++++++++++++++++--- + hw/virtio/vhost.c | 8 +++++++- + 5 files changed, 45 insertions(+), 9 deletions(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index e48c373b14..a02d65d208 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -599,6 +599,16 @@ int vhost_set_vring_enable(NetClientState *nc, int enable) + VHostNetState *net = get_vhost_net(nc); + const VhostOps *vhost_ops = net->dev.vhost_ops; + ++ /* ++ * vhost-vdpa network devices need to enable dataplane virtqueues after ++ * DRIVER_OK, so they can recover device state before starting dataplane. ++ * Because of that, we don't enable virtqueues here and leave it to ++ * net/vhost-vdpa.c. ++ */ ++ if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { ++ return 0; ++ } ++ + nc->vring_enable = enable; + + if (vhost_ops && vhost_ops->vhost_set_vring_enable) { +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 637cac4edf..f136815072 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 + vhost_vdpa_reset_device(void *dev) "dev: %p" + vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" +-vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" ++vhost_vdpa_set_vring_enable_one(void *dev, unsigned i, int enable, int r) "dev: %p, idx: %u, enable: %u, r: %d" + vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" + vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 + vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 91e71847b0..7b2b19dfb8 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -259,14 +259,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) + + s->dev.acked_features = vdev->guest_features; + +- ret = vhost_dev_start(&s->dev, vdev, false); ++ ret = vhost_dev_start(&s->dev, vdev, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Error starting vhost"); + goto err_guest_notifiers; + } +- for (i = 0; i < s->dev.nvqs; ++i) { +- vhost_vdpa_set_vring_ready(&s->vdpa, i); +- } + s->started = true; + + /* +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index d49826845f..7e172eee49 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -883,12 +883,13 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) + return idx; + } + +-int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) ++static int vhost_vdpa_set_vring_enable_one(struct vhost_vdpa *v, unsigned idx, ++ int enable) + { + struct vhost_dev *dev = v->dev; + struct vhost_vring_state state = { + .index = idx, +- .num = 1, ++ .num = enable, + }; + hwaddr addr = virtio_queue_get_desc_addr(dev->vdev, idx); + if (addr == 0) { +@@ -897,10 +898,31 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) + + int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); + +- trace_vhost_vdpa_set_vring_ready(dev, idx, r); ++ trace_vhost_vdpa_set_vring_enable_one(dev, idx, enable, r); + return r; + } + ++static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ unsigned int i; ++ int ret; ++ ++ for (i = 0; i < dev->nvqs; ++i) { ++ ret = vhost_vdpa_set_vring_enable_one(v, i, enable); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) ++{ ++ return vhost_vdpa_set_vring_enable_one(v, idx, 1); ++} ++ + static int vhost_vdpa_set_config_call(struct vhost_dev *dev, + int fd) + { +@@ -1584,6 +1606,7 @@ const VhostOps vdpa_ops = { + .vhost_set_features = vhost_vdpa_set_features, + .vhost_reset_device = vhost_vdpa_reset_device, + .vhost_get_vq_index = vhost_vdpa_get_vq_index, ++ .vhost_set_vring_enable = vhost_vdpa_set_vring_enable, + .vhost_get_config = vhost_vdpa_get_config, + .vhost_set_config = vhost_vdpa_set_config, + .vhost_requires_shm_log = NULL, +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index d073a6d5a5..d29075aa04 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -2063,7 +2063,13 @@ static int vhost_dev_set_vring_enable(struct vhost_dev *hdev, int enable) + return hdev->vhost_ops->vhost_set_vring_enable(hdev, enable); + } + +-/* Host notifiers must be enabled at this point. */ ++/* ++ * Host notifiers must be enabled at this point. ++ * ++ * If @vrings is true, this function will enable all vrings before starting the ++ * device. If it is false, the vring initialization is left to be done by the ++ * caller. ++ */ + int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + { + int i, r; +-- +2.41.0.windows.1 + diff --git a/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch b/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a9ed108a970b2c447c9aaa0a8de42f7009300f4 --- /dev/null +++ b/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch @@ -0,0 +1,67 @@ +From b82f02e93d5efa2ea62dd135c508cb707fdd35a7 Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 19 Dec 2023 20:32:00 +0800 +Subject: [PATCH] vdpa: don't suspend/resume device when vdpa device not + started + +When vdpa device not started, we don't need to suspend vdpa device +and send vdpa device state information. Therefore, add the suspended +flag of vdpa device to distinguish whether the device is suspended and +use it to determine whether the device needs to resume in dest qemu. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1d299019da..887c96a201 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -294,10 +294,13 @@ static int vdpa_save_complete_precopy(QEMUFile *f, void *opaque) + int ret; + + qemu_put_be64(f, VDPA_MIG_FLAG_DEV_CONFIG_STATE); +- ret = vhost_vdpa_dev_buffer_save(hdev, f); +- if (ret) { +- error_report("Save vdpa device buffer failed: %d\n", ret); +- return ret; ++ qemu_put_be16(f, (uint16_t)vdev->suspended); ++ if (vdev->suspended) { ++ ret = vhost_vdpa_dev_buffer_save(hdev, f); ++ if (ret) { ++ error_report("Save vdpa device buffer failed: %d\n", ret); ++ return ret; ++ } + } + qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); + +@@ -311,6 +314,7 @@ static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) + + int ret; + uint64_t data; ++ uint16_t suspended; + + data = qemu_get_be64(f); + while (data != VDPA_MIG_FLAG_END_OF_STATE) { +@@ -323,10 +327,13 @@ static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) + return -EINVAL; + } + } else if (data == VDPA_MIG_FLAG_DEV_CONFIG_STATE) { +- ret = vhost_vdpa_dev_buffer_load(hdev, f); +- if (ret) { +- error_report("fail to restore device buffer.\n"); +- return ret; ++ suspended = qemu_get_be16(f); ++ if (suspended) { ++ ret = vhost_vdpa_dev_buffer_load(hdev, f); ++ if (ret) { ++ error_report("fail to restore device buffer.\n"); ++ return ret; ++ } + } + } + +-- +2.27.0 + diff --git a/vdpa-fix-vdpa-device-migrate-rollback-wrong-when-sus.patch b/vdpa-fix-vdpa-device-migrate-rollback-wrong-when-sus.patch new file mode 100644 index 0000000000000000000000000000000000000000..3c1899bba18c57ee145b515370eb7c5877069dfb --- /dev/null +++ b/vdpa-fix-vdpa-device-migrate-rollback-wrong-when-sus.patch @@ -0,0 +1,140 @@ +From ea76b33ca7a8c2fd39f50b6d1bb6702ab0a4fc87 Mon Sep 17 00:00:00 2001 +From: fangyi +Date: Sat, 22 Jun 2024 07:02:48 +0000 +Subject: [PATCH 1/6] vdpa: fix vdpa device migrate rollback wrong when suspend + device failed. + + 1. set vdpa->suspended before call vhost_dev_suspend to make sure vdpa device + will resume when suspend failed. + 2. using state == RUN_STATE_FINISH_MIGRATE instead of ms->state == MIGRATION_STATUS_ACTIVE + to judge vm in migration. As migrate_fd_cancel will change ms->state, + which will result in some vdpa devices not being suspended. + +Signed-off-by: fangyi +--- + hw/virtio/vdpa-dev-mig.c | 81 ++++------------------------------------ + 1 file changed, 7 insertions(+), 74 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 887c96a201..7de996c835 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -130,100 +130,33 @@ free: + static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + { + VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- int ret; + + if (!vdpa->started || vdpa->suspended) { + return 0; + } + +- if (!k->set_guest_notifiers) { +- return -EFAULT; +- } +- +- vdpa->started = false; + vdpa->suspended = true; + +- ret = vhost_dev_suspend(&vdpa->dev, vdev, false); +- if (ret) { +- goto suspend_fail; +- } +- +- ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); +- if (ret < 0) { +- error_report("vhost guest notifier cleanup failed: %d\n", ret); +- goto set_guest_notifiers_fail; +- } +- +- vhost_dev_disable_notifiers(&vdpa->dev, vdev); +- return ret; +- +-set_guest_notifiers_fail: +- ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); +- if (ret) { +- error_report("vhost guest notifier restore failed: %d\n", ret); +- } +- +-suspend_fail: +- vdpa->suspended = false; +- vdpa->started = true; +- return ret; ++ return vhost_dev_suspend(&vdpa->dev, vdev, false); + } + + static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) + { + VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- int i, ret; ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ int ret; + +- if (vdpa->started || !vdpa->suspended) { ++ if (!vdpa->started || ++ (!vdpa->suspended && mis->state != RUN_STATE_RESTORE_VM)) { + return 0; + } + +- if (!k->set_guest_notifiers) { +- error_report("binding does not support guest notifiers\n"); +- return -ENOSYS; +- } +- +- ret = vhost_dev_enable_notifiers(&vdpa->dev, vdev); ++ ret = vhost_dev_resume(&vdpa->dev, vdev, false); + if (ret < 0) { +- error_report("Error enabling host notifiers: %d\n", ret); + return ret; + } + +- ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); +- if (ret < 0) { +- error_report("Error binding guest notifier: %d\n", ret); +- goto err_host_notifiers; +- } +- +- vdpa->dev.acked_features = vdev->guest_features; +- +- ret = vhost_dev_resume(&vdpa->dev, vdev, false); +- if (ret < 0) { +- error_report("Error starting vhost: %d\n", ret); +- goto err_guest_notifiers; +- } +- vdpa->started = true; + vdpa->suspended = false; +- +- /* +- * guest_notifier_mask/pending not used yet, so just unmask +- * everything here. virtio-pci will do the right thing by +- * enabling/disabling irqfd. +- */ +- for (i = 0; i < vdpa->dev.nvqs; i++) { +- vhost_virtqueue_mask(&vdpa->dev, vdev, i, false); +- } +- +- return ret; +- +-err_guest_notifiers: +- k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); +-err_host_notifiers: +- vhost_dev_disable_notifiers(&vdpa->dev, vdev); + return ret; + } + +@@ -248,7 +181,7 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!running) { +- if (ms->state == MIGRATION_STATUS_ACTIVE || state == RUN_STATE_PAUSED) { ++ if (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED) { + ret = vhost_vdpa_device_suspend(vdpa); + if (ret) { + error_report("suspend vdpa device failed: %d\n", ret); +-- +2.43.0 + diff --git a/vdpa-implement-vdpa-device-migration.patch b/vdpa-implement-vdpa-device-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..295cdcdc24869ad704f467190303b456a14136e7 --- /dev/null +++ b/vdpa-implement-vdpa-device-migration.patch @@ -0,0 +1,75 @@ +From 4688e12c57a34801010abf2a4cf528fcef3b9ec0 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:59:56 +0800 +Subject: [PATCH] vdpa: implement vdpa device migration + +Integrate the live migration code, call the registered live +migration function, and open the vdpa live migration prototype + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index f22d5d5bc0..6af78a4229 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -28,6 +28,8 @@ + #include "hw/virtio/vdpa-dev.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" ++#include "hw/virtio/vdpa-dev-mig.h" ++#include "migration/migration.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -154,6 +156,8 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + vhost_vdpa_device_dummy_handle_output); + } + ++ vdpa_migration_register(v); ++ + return; + + free_config: +@@ -173,6 +177,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + int i; + ++ vdpa_migration_unregister(s); + virtio_set_status(vdev, 0); + + for (i = 0; i < s->num_queues; i++) { +@@ -308,6 +313,7 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + { + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); ++ MigrationState *ms = migrate_get_current(); + bool should_start = virtio_device_started(vdev, status); + Error *local_err = NULL; + int ret; +@@ -320,6 +326,11 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + return; + } + ++ if (ms->state == RUN_STATE_PAUSED || ++ ms->state == RUN_STATE_RESTORE_VM) { ++ return; ++ } ++ + if (should_start) { + ret = vhost_vdpa_device_start(vdev, &local_err); + if (ret < 0) { +@@ -338,7 +349,7 @@ static Property vhost_vdpa_device_properties[] = { + + static const VMStateDescription vmstate_vhost_vdpa_device = { + .name = "vhost-vdpa-device", +- .unmigratable = 1, ++ .unmigratable = 0, + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { +-- +2.27.0 + diff --git a/vdpa-iommufd-All-vdpa-devices-perform-only-one-log_s.patch b/vdpa-iommufd-All-vdpa-devices-perform-only-one-log_s.patch new file mode 100644 index 0000000000000000000000000000000000000000..4689f6a27bf137fa7c40be57e15062e44f9d814d --- /dev/null +++ b/vdpa-iommufd-All-vdpa-devices-perform-only-one-log_s.patch @@ -0,0 +1,260 @@ +From c18333142111a3bd55429594436f25765d41077a Mon Sep 17 00:00:00 2001 +From: Adttil <2429917001@qq.com> +Date: Wed, 9 Apr 2025 22:57:50 +0800 +Subject: [PATCH] vdpa/iommufd: All vdpa devices perform only one log_sync each + time. + +For all vdpa devices, since they share the same dirty page bytemap, +only one synchronization is needed each time +the dirty page bytemap is synchronized. + +Signed-off-by: Adttil <2429917001@qq.com> +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-iommufd.c | 52 ++++++++++++++++++++++++++++++++++++ + hw/virtio/vdpa-dev.c | 1 + + hw/virtio/vhost.c | 16 +++++------ + include/exec/memory.h | 10 +++++++ + include/hw/virtio/vhost.h | 15 +++++++++++ + system/memory.c | 6 +++++ + 6 files changed, 92 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-iommufd.c b/hw/virtio/vdpa-dev-iommufd.c +index 668c6a1cb1..2b0498f9dc 100644 +--- a/hw/virtio/vdpa-dev-iommufd.c ++++ b/hw/virtio/vdpa-dev-iommufd.c +@@ -12,6 +12,9 @@ + #include "exec/target_page.h" + #include "exec/address-spaces.h" + #include "hw/virtio/vdpa-dev-iommufd.h" ++#include "migration/migration.h" ++#include "qapi/qapi-commands-migration.h" ++#include "hw/virtio/vhost.h" + + static QLIST_HEAD(, VDPAIOMMUFDContainer) vdpa_container_list = + QLIST_HEAD_INITIALIZER(vdpa_container_list); +@@ -118,6 +121,51 @@ static void vhost_vdpa_iommufd_container_region_del(MemoryListener *listener, + memory_region_unref(section->mr); + } + ++static void vhost_vdpa_iommufd_container_log_sync(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VDPAIOMMUFDContainer *container = container_of(listener, VDPAIOMMUFDContainer, listener); ++ IOMMUFDHWPT *hwpt; ++ VhostVdpaDevice *vdev; ++ MigrationState *ms = migrate_get_current(); ++ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ QLIST_FOREACH(vdev, &hwpt->device_list, next) { ++ if (!vdev->dev.log_enabled || !vdev->dev.log) { ++ continue; ++ } ++ ++ /** ++ * For the vhost-vdpa device, log_sync is performed on the entire VM, ++ * that is, this sync is for the entire flatview. ++ * Therefore, the first MemoryRegionSection of flatview needs to be ++ * synchronized. The rest of the mrs do not need to be synchronized. ++ */ ++ if (is_first_section(section)) { ++ int r = vdev->dev.vhost_ops->vhost_log_sync(&vdev->dev); ++ if (r < 0) { ++ qemu_log("Failed to sync dirty log: %d\n", r); ++ if (migration_is_running(ms->state)) { ++ qmp_migrate_cancel(NULL); ++ } ++ return; ++ } ++ } ++ ++ /** ++ * Dirty maps are merged separately by MRS, so each MRS needs to be iterated. ++ */ ++ if (vhost_bytemap_log_support(&vdev->dev)) { ++ vhost_sync_dirty_bytemap(&vdev->dev, section); ++ } else { ++ vhost_sync_dirty_bitmap(&vdev->dev, section, 0x0, ~0x0ULL); ++ } ++ return; ++ } ++ } ++} ++ ++ + /* + * IOTLB API used by vhost vdpa iommufd container + */ +@@ -125,6 +173,7 @@ const MemoryListener vhost_vdpa_iommufd_container_listener = { + .name = "vhost-vdpa-iommufd-container", + .region_add = vhost_vdpa_iommufd_container_region_add, + .region_del = vhost_vdpa_iommufd_container_region_del, ++ .log_sync = vhost_vdpa_iommufd_container_log_sync, + }; + + static int vhost_vdpa_container_connect_iommufd(VDPAIOMMUFDContainer *container) +@@ -268,6 +317,7 @@ static int vhost_vdpa_container_attach_device(VDPAIOMMUFDContainer *container, V + ret = ioctl(vdev->vhostfd, VHOST_VDPA_ATTACH_IOMMUFD_PT, &pt_id); + if (ret == 0) { + QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); ++ vdev->dev.has_container = true; + return 0; + } + } +@@ -293,6 +343,7 @@ static int vhost_vdpa_container_attach_device(VDPAIOMMUFDContainer *container, V + } + + QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); ++ vdev->dev.has_container = true; + QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); + + return 0; +@@ -318,6 +369,7 @@ static void vhost_vdpa_container_detach_device(VDPAIOMMUFDContainer *container, + ioctl(vdev->vhostfd, VHOST_VDPA_DETACH_IOMMUFD_PT, &hwpt->hwpt_id); + + QLIST_SAFE_REMOVE(vdev, next); ++ vdev->dev.has_container = false; + + /* No device using this hwpt, free it */ + if (QLIST_EMPTY(&hwpt->device_list)) { +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index b256ad540c..7ce8547419 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -111,6 +111,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + v->dev.vq_index = 0; + v->dev.vq_index_end = v->dev.nvqs; + v->dev.backend_features = 0; ++ v->dev.has_container = false; + v->started = false; + + ret = vhost_vdpa_get_iova_range(v->vhostfd, &iova_range); +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index ed2f41e47a..58207e472b 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -46,7 +46,7 @@ + do { } while (0) + #endif + +-static inline bool vhost_bytemap_log_support(struct vhost_dev *dev) ++bool vhost_bytemap_log_support(struct vhost_dev *dev) + { + return (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG)); + } +@@ -159,10 +159,10 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev) + } + } + +-static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, +- MemoryRegionSection *section, +- hwaddr first, +- hwaddr last) ++int vhost_sync_dirty_bitmap(struct vhost_dev *dev, ++ MemoryRegionSection *section, ++ hwaddr first, ++ hwaddr last) + { + int i; + hwaddr start_addr; +@@ -239,8 +239,8 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + return 0; + } + +-static int vhost_sync_dirty_bytemap(struct vhost_dev *dev, +- MemoryRegionSection *section) ++int vhost_sync_dirty_bytemap(struct vhost_dev *dev, ++ MemoryRegionSection *section) + { + unsigned long *bytemap = dev->log->log; + return memory_section_set_dirty_bytemap(section, bytemap); +@@ -253,7 +253,7 @@ static void vhost_log_sync(MemoryListener *listener, + memory_listener); + MigrationState *ms = migrate_get_current(); + +- if (!dev->log_enabled || !dev->log) { ++ if (!dev->log_enabled || !dev->log || dev->has_container) { + return; + } + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index c14dc69d27..e58ca3d368 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2085,6 +2085,16 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client); + void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, + hwaddr size); + ++/** ++ * is_first_section: Determine whether a MemoryRegionSection is the first section ++ * ++ * Determine whether a MemoryRegionSection is the first section ++ * of its corresponding parent MemoryRegion. ++ * ++ * @section: MemoryRegionSection ++ */ ++bool is_first_section(MemoryRegionSection *section); ++ + /** + * memory_region_clear_dirty_bitmap - clear dirty bitmap for memory range + * +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 9ca5819deb..598ae13757 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -133,6 +133,7 @@ struct vhost_dev { + QLIST_HEAD(, vhost_iommu) iommu_list; + IOMMUNotifier n; + const VhostDevConfigOps *config_ops; ++ bool has_container; + }; + + extern const VhostOps kernel_ops; +@@ -206,6 +207,14 @@ static inline bool vhost_dev_is_started(struct vhost_dev *hdev) + return hdev->started; + } + ++/** ++ * vhost_bytemap_log_support() - check if the vhost device supports dirty bytemap ++ * @dev: common vhost_dev structure ++ * ++ * Return: true if the vhost device supports dirty bytemap, false otherwise. ++ */ ++bool vhost_bytemap_log_support(struct vhost_dev *dev); ++ + /** + * vhost_dev_start() - start the vhost device + * @hdev: common vhost_dev structure +@@ -343,6 +352,12 @@ int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, + struct vhost_inflight *inflight); + bool used_memslots_is_exceeded(void); + bool vhost_dev_has_iommu(struct vhost_dev *dev); ++int vhost_sync_dirty_bitmap(struct vhost_dev *dev, ++ MemoryRegionSection *section, ++ hwaddr first, ++ hwaddr last); ++int vhost_sync_dirty_bytemap(struct vhost_dev *dev, ++ MemoryRegionSection *section); + + #ifdef CONFIG_VHOST + int vhost_reset_device(struct vhost_dev *hdev); +diff --git a/system/memory.c b/system/memory.c +index 9db07fd832..dff55f7388 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -2271,6 +2271,12 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, + memory_region_get_dirty_log_mask(mr)); + } + ++bool is_first_section(MemoryRegionSection *section) ++{ ++ return section->fv->ranges->addr.start == section->offset_within_address_space && ++ section->fv->ranges->addr.size == section->size; ++} ++ + /* + * If memory region `mr' is NULL, do global sync. Otherwise, sync + * dirty bitmap for the specified memory region. +-- +2.41.0.windows.1 + diff --git a/vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch b/vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch new file mode 100644 index 0000000000000000000000000000000000000000..c166f38f11d2672b6db9591f57e33b93870a3e46 --- /dev/null +++ b/vdpa-iommufd-Implement-DMA-mapping-through-the-iommu.patch @@ -0,0 +1,288 @@ +From b88b03c84aa695b96a91329e2d01fffad551c34d Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 27 Mar 2025 19:24:53 +0800 +Subject: [PATCH] vdpa/iommufd:Implement DMA mapping through the iommufd + interface + +Change the owner of memorylistener from the independent vDPA device to VDPAIOMMUFDContainer + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-iommufd.c | 137 +++++++++++++++++++++++++++ + hw/virtio/vdpa-dev.c | 4 +- + hw/virtio/vhost-vdpa.c | 13 +-- + include/hw/virtio/vdpa-dev-iommufd.h | 1 + + include/hw/virtio/vhost-vdpa.h | 7 ++ + 5 files changed, 154 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-iommufd.c b/hw/virtio/vdpa-dev-iommufd.c +index d72f56d52f..668c6a1cb1 100644 +--- a/hw/virtio/vdpa-dev-iommufd.c ++++ b/hw/virtio/vdpa-dev-iommufd.c +@@ -9,11 +9,124 @@ + #include + #include + #include "qapi/error.h" ++#include "exec/target_page.h" ++#include "exec/address-spaces.h" + #include "hw/virtio/vdpa-dev-iommufd.h" + + static QLIST_HEAD(, VDPAIOMMUFDContainer) vdpa_container_list = + QLIST_HEAD_INITIALIZER(vdpa_container_list); + ++static int vhost_vdpa_iommufd_container_dma_map(VDPAIOMMUFDContainer *container, hwaddr iova, ++ hwaddr size, void *vaddr, bool readonly) ++{ ++ return iommufd_backend_map_dma(container->iommufd, container->ioas_id, iova, size, vaddr, readonly); ++ ++} ++static int vhost_vdpa_iommufd_container_dma_unmap(VDPAIOMMUFDContainer *container, ++ hwaddr iova, hwaddr size) ++{ ++ return iommufd_backend_unmap_dma(container->iommufd, container->ioas_id, iova, size); ++} ++ ++static void vhost_vdpa_iommufd_container_region_add(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VDPAIOMMUFDContainer *container = container_of(listener, VDPAIOMMUFDContainer, listener); ++ hwaddr iova; ++ Int128 llend, llsize; ++ void *vaddr; ++ int page_size = qemu_target_page_size(); ++ int page_mask = -page_size; ++ int ret; ++ ++ if (vhost_vdpa_listener_skipped_section(section, 0, ULLONG_MAX, page_mask)) { ++ return; ++ } ++ ++ if (unlikely((section->offset_within_address_space & ~page_mask) != ++ (section->offset_within_region & ~page_mask))) { ++ return; ++ } ++ ++ iova = ROUND_UP(section->offset_within_address_space, page_size); ++ llend = vhost_vdpa_section_end(section, page_mask); ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ ++ memory_region_ref(section->mr); ++ vaddr = memory_region_get_ram_ptr(section->mr) + ++ section->offset_within_region + ++ (iova - section->offset_within_address_space); ++ ++ llsize = int128_sub(llend, int128_make64(iova)); ++ ++ ret = vhost_vdpa_iommufd_container_dma_map(container, iova, int128_get64(llsize), ++ vaddr, section->readonly); ++ if (ret) { ++ qemu_log("vhost vdpa iommufd container dma map failed: %d\n", ret); ++ } ++} ++ ++static void vhost_vdpa_iommufd_container_region_del(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VDPAIOMMUFDContainer *container = container_of(listener, VDPAIOMMUFDContainer, listener); ++ hwaddr iova; ++ Int128 llend, llsize; ++ int page_size = qemu_target_page_size(); ++ int page_mask = -page_size; ++ int ret; ++ ++ if (vhost_vdpa_listener_skipped_section(section, 0, ULLONG_MAX, page_mask)) { ++ return; ++ } ++ ++ if (unlikely((section->offset_within_address_space & ~page_mask) != ++ (section->offset_within_region & ~page_mask))) { ++ return; ++ } ++ ++ iova = ROUND_UP(section->offset_within_address_space, page_size); ++ llend = vhost_vdpa_section_end(section, page_mask); ++ ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ ++ llsize = int128_sub(llend, int128_make64(iova)); ++ /* ++ * The unmap ioctl doesn't accept a full 64-bit. need to check it ++ */ ++ if (int128_eq(llsize, int128_2_64())) { ++ llsize = int128_rshift(llsize, 1); ++ ret = vhost_vdpa_iommufd_container_dma_unmap(container, iova, int128_get64(llsize)); ++ ++ if (ret) { ++ qemu_log("vhost vdpa iommufd container unmap failed(0x%" HWADDR_PRIx ", " ++ "0x%" HWADDR_PRIx ") = %d (%m)", iova, int128_get64(llsize), ret); ++ } ++ iova += int128_get64(llsize); ++ } ++ ret = vhost_vdpa_iommufd_container_dma_unmap(container, iova, int128_get64(llsize)); ++ ++ if (ret) { ++ qemu_log("vhost vdpa iommufd container unmap failed(0x%" HWADDR_PRIx ", " ++ "0x%" HWADDR_PRIx ") = %d (%m)", iova, int128_get64(llsize), ret); ++ } ++ ++ memory_region_unref(section->mr); ++} ++ ++/* ++ * IOTLB API used by vhost vdpa iommufd container ++ */ ++const MemoryListener vhost_vdpa_iommufd_container_listener = { ++ .name = "vhost-vdpa-iommufd-container", ++ .region_add = vhost_vdpa_iommufd_container_region_add, ++ .region_del = vhost_vdpa_iommufd_container_region_del, ++}; ++ + static int vhost_vdpa_container_connect_iommufd(VDPAIOMMUFDContainer *container) + { + IOMMUFDBackend *iommufd = container->iommufd; +@@ -87,6 +200,7 @@ static VDPAIOMMUFDContainer *vhost_vdpa_create_container(VhostVdpaDevice *vdev) + + container = g_new0(VDPAIOMMUFDContainer, 1); + container->iommufd = vdev->iommufd; ++ container->listener = vhost_vdpa_iommufd_container_listener; + QLIST_INIT(&container->hwpt_list); + + QLIST_INSERT_HEAD(&vdpa_container_list, container, next); +@@ -213,11 +327,27 @@ static void vhost_vdpa_container_detach_device(VDPAIOMMUFDContainer *container, + } + } + ++static int vhost_vdpa_container_get_dev_count(VDPAIOMMUFDContainer *container) ++{ ++ IOMMUFDHWPT *hwpt; ++ VhostVdpaDevice *dev; ++ int dev_count = 0; ++ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ QLIST_FOREACH(dev, &hwpt->device_list, next) { ++ dev_count++; ++ } ++ } ++ ++ return dev_count; ++} ++ + int vhost_vdpa_attach_container(VhostVdpaDevice *vdev) + { + VDPAIOMMUFDContainer *container = NULL; + IOMMUFDBackend *iommufd = vdev->iommufd; + bool new_container = false; ++ int dev_count = 0; + int ret = 0; + + if (!iommufd) { +@@ -251,6 +381,12 @@ int vhost_vdpa_attach_container(VhostVdpaDevice *vdev) + goto unbind; + } + ++ /* register the container memory listener when attaching the first device */ ++ dev_count = vhost_vdpa_container_get_dev_count(container); ++ if (dev_count == 1) { ++ memory_listener_register(&container->listener, &address_space_memory); ++ } ++ + return 0; + + unbind: +@@ -288,6 +424,7 @@ void vhost_vdpa_detach_container(VhostVdpaDevice *vdev) + return; + } + /* No HWPT in this container, destroy it */ ++ memory_listener_unregister(&container->listener); + vhost_vdpa_container_disconnect_iommufd(container); + + vhost_vdpa_destroy_container(container); +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index a6bd695724..b256ad540c 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -136,9 +136,9 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + strerror(-ret)); + goto free_vqs; + } ++ } else { ++ memory_listener_register(&v->vdpa.listener, &address_space_memory); + } +- +- memory_listener_register(&v->vdpa.listener, &address_space_memory); + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_CONFIG_SIZE, + errp); +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 4a8fc37851..b5fb89b98e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -26,13 +26,14 @@ + #include "qemu/main-loop.h" + #include "trace.h" + #include "qapi/error.h" ++#include "hw/virtio/vdpa-dev-iommufd.h" + + /* + * Return one past the end of the end of section. Be careful with uint64_t + * conversions! + */ +-static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, +- int page_mask) ++Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, ++ int page_mask) + { + Int128 llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); +@@ -41,10 +42,10 @@ static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, + return llend; + } + +-static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, +- uint64_t iova_min, +- uint64_t iova_max, +- int page_mask) ++bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, ++ uint64_t iova_min, ++ uint64_t iova_max, ++ int page_mask) + { + Int128 llend; + +diff --git a/include/hw/virtio/vdpa-dev-iommufd.h b/include/hw/virtio/vdpa-dev-iommufd.h +index dc14d9dd15..8e56647690 100644 +--- a/include/hw/virtio/vdpa-dev-iommufd.h ++++ b/include/hw/virtio/vdpa-dev-iommufd.h +@@ -23,6 +23,7 @@ typedef struct IOMMUFDHWPT { + } IOMMUFDHWPT; + + typedef struct VDPAIOMMUFDContainer { ++ MemoryListener listener; + struct IOMMUFDBackend *iommufd; + uint32_t ioas_id; + QLIST_HEAD(, IOMMUFDHWPT) hwpt_list; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index ee255bc1bd..e32effc6e1 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -57,6 +57,13 @@ typedef struct vhost_vdpa { + int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); + int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); + ++Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, ++ int page_mask); ++bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, ++ uint64_t iova_min, ++ uint64_t iova_max, ++ int page_mask); ++ + int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, + hwaddr size, void *vaddr, bool readonly); + int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +-- +2.41.0.windows.1 + diff --git a/vdpa-iommufd-Introduce-vdpa-iommufd-module.patch b/vdpa-iommufd-Introduce-vdpa-iommufd-module.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7a365c8e9c3182e557ee0a30c87e6f6674b46e5 --- /dev/null +++ b/vdpa-iommufd-Introduce-vdpa-iommufd-module.patch @@ -0,0 +1,495 @@ +From 9cdd7c19a08c773f1f8a2d314bb94d61bd08fd77 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 27 Mar 2025 16:51:03 +0800 +Subject: [PATCH] vdpa/iommufd:Introduce vdpa-iommufd module + +The purpose of the vdpa-iommufd module is to share +the DMA mapping of multiple vdpa through the kernel iommufd interface. +The VDPA devices can share the same DMA mapping by +associating with the same IOMMUFD backend. +This can avoid VDPA devices from repeatedly establishing DMA mappings, +reduce the time required for hot plugging and unplugging VDPA devices, +and minimize duplicate IOMMU TLB. +The vDPA devices that need to be isolated can also be divided into +different groups by associating them with different iommufds. +Each iommufd backend is associated with a VDPAIOMMUFDContainer to +establish contact with multiple vDPA devices. +To improve availability, even if vDPA devices encounter problems when +sharing page tables, they can still complete DMA mapping by applying for a separate HWPT. + +Signed-off-by: libai +--- + hw/virtio/meson.build | 2 +- + hw/virtio/vdpa-dev-iommufd.c | 294 +++++++++++++++++++++++++++ + hw/virtio/vdpa-dev.c | 17 ++ + include/hw/virtio/vdpa-dev-iommufd.h | 40 ++++ + include/hw/virtio/vdpa-dev.h | 2 + + linux-headers/linux/vhost.h | 28 +++ + 6 files changed, 382 insertions(+), 1 deletion(-) + create mode 100644 hw/virtio/vdpa-dev-iommufd.c + create mode 100644 include/hw/virtio/vdpa-dev-iommufd.h + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index 596651d113..67291563d3 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -5,7 +5,7 @@ system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c') + system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) + system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +-system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c')) ++system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c', 'vdpa-dev-iommufd.c')) + + specific_virtio_ss = ss.source_set() + specific_virtio_ss.add(files('virtio.c')) +diff --git a/hw/virtio/vdpa-dev-iommufd.c b/hw/virtio/vdpa-dev-iommufd.c +new file mode 100644 +index 0000000000..d72f56d52f +--- /dev/null ++++ b/hw/virtio/vdpa-dev-iommufd.c +@@ -0,0 +1,294 @@ ++/* ++ * vhost vdpa device iommufd backend ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All Rights Reserved. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/log.h" ++#include ++#include ++#include "qapi/error.h" ++#include "hw/virtio/vdpa-dev-iommufd.h" ++ ++static QLIST_HEAD(, VDPAIOMMUFDContainer) vdpa_container_list = ++ QLIST_HEAD_INITIALIZER(vdpa_container_list); ++ ++static int vhost_vdpa_container_connect_iommufd(VDPAIOMMUFDContainer *container) ++{ ++ IOMMUFDBackend *iommufd = container->iommufd; ++ uint32_t ioas_id; ++ Error *err = NULL; ++ ++ if (!iommufd) { ++ return -1; ++ } ++ ++ if (!iommufd_backend_connect(iommufd, &err)) { ++ error_report_err(err); ++ return -1; ++ } ++ ++ if (!iommufd_backend_alloc_ioas(iommufd, &ioas_id, &err)) { ++ error_report_err(err); ++ iommufd_backend_disconnect(iommufd); ++ return -1; ++ } ++ container->ioas_id = ioas_id; ++ return 0; ++} ++ ++static void vhost_vdpa_container_disconnect_iommufd(VDPAIOMMUFDContainer *container) ++{ ++ IOMMUFDBackend *iommufd = container->iommufd; ++ uint32_t ioas_id = container->ioas_id; ++ ++ if (!iommufd) { ++ return; ++ } ++ ++ iommufd_backend_free_id(iommufd, ioas_id); ++ iommufd_backend_disconnect(iommufd); ++} ++ ++static IOMMUFDHWPT *vhost_vdpa_find_hwpt(VDPAIOMMUFDContainer *container, ++ VhostVdpaDevice *vdev) ++{ ++ IOMMUFDHWPT *hwpt = NULL; ++ VhostVdpaDevice *tmp = NULL; ++ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ QLIST_FOREACH(tmp, &hwpt->device_list, next) { ++ if (tmp == vdev) { ++ return hwpt; ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++static VDPAIOMMUFDContainer *vhost_vdpa_find_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ ++ QLIST_FOREACH(container, &vdpa_container_list, next) { ++ if (container->iommufd == vdev->iommufd) { ++ return container; ++ } ++ } ++ ++ return NULL; ++} ++ ++static VDPAIOMMUFDContainer *vhost_vdpa_create_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ ++ container = g_new0(VDPAIOMMUFDContainer, 1); ++ container->iommufd = vdev->iommufd; ++ QLIST_INIT(&container->hwpt_list); ++ ++ QLIST_INSERT_HEAD(&vdpa_container_list, container, next); ++ ++ return container; ++} ++ ++static void vhost_vdpa_destroy_container(VDPAIOMMUFDContainer *container) ++{ ++ if (!container) { ++ return; ++ } ++ ++ container->iommufd = NULL; ++ QLIST_SAFE_REMOVE(container, next); ++ g_free(container); ++} ++ ++static void vhost_vdpa_device_unbind_iommufd(VhostVdpaDevice *vdev) ++{ ++ int ret; ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_UNBIND_IOMMUFD, 0); ++ if (ret) { ++ qemu_log("vhost vdpa device unbind iommufd failed: %d, devid: %d\n", ++ ret, vdev->iommufd_devid); ++ } ++} ++ ++static int vhost_vdpa_device_bind_iommufd(VhostVdpaDevice *vdev) ++{ ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ struct vdpa_dev_bind_iommufd bind = { ++ .iommufd = iommufd->fd, ++ .out_devid = -1, ++ }; ++ int ret; ++ ++ /* iommufd auto unbind when vdev->vhostfd close */ ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_BIND_IOMMUFD, &bind); ++ if (ret) { ++ qemu_log("vhost vdpa device bind iommufd failed: %d\n", ret); ++ return ret; ++ } ++ vdev->iommufd_devid = bind.out_devid; ++ return 0; ++} ++ ++static int vhost_vdpa_container_attach_device(VDPAIOMMUFDContainer *container, VhostVdpaDevice *vdev) ++{ ++ IOMMUFDBackend *iommufd = NULL; ++ IOMMUFDHWPT *hwpt = NULL; ++ Error *err = NULL; ++ uint32_t pt_id; ++ int ret; ++ ++ if (!container || !container->iommufd || container->iommufd != vdev->iommufd) { ++ return -1; ++ } ++ ++ iommufd = container->iommufd; ++ ++ /* try to find an available hwpt */ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ pt_id = hwpt->hwpt_id; ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_ATTACH_IOMMUFD_PT, &pt_id); ++ if (ret == 0) { ++ QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); ++ return 0; ++ } ++ } ++ ++ /* available hwpt not found in the container, create a new one */ ++ hwpt = g_new0(IOMMUFDHWPT, 1); ++ QLIST_INIT(&hwpt->device_list); ++ ++ if (!iommufd_backend_alloc_hwpt(iommufd, vdev->iommufd_devid, ++ container->ioas_id, 0, 0, 0, NULL, ++ &pt_id, NULL, &err)) { ++ error_report_err(err); ++ ret = -1; ++ goto free_mem; ++ } ++ ++ hwpt->hwpt_id = pt_id; ++ ++ ret = ioctl(vdev->vhostfd, VHOST_VDPA_ATTACH_IOMMUFD_PT, &pt_id); ++ if (ret) { ++ qemu_log("vhost vdpa device attach iommufd pt failed: %d\n", ret); ++ goto free_hwpt; ++ } ++ ++ QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); ++ QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); ++ ++ return 0; ++ ++free_hwpt: ++ iommufd_backend_free_id(iommufd, hwpt->hwpt_id); ++free_mem: ++ g_free(hwpt); ++ return ret; ++} ++ ++static void vhost_vdpa_container_detach_device(VDPAIOMMUFDContainer *container, VhostVdpaDevice *vdev) ++{ ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ IOMMUFDHWPT *hwpt = NULL; ++ ++ /* find the hwpt using by this device */ ++ hwpt = vhost_vdpa_find_hwpt(container, vdev); ++ if (!hwpt) { ++ return; ++ } ++ ++ ioctl(vdev->vhostfd, VHOST_VDPA_DETACH_IOMMUFD_PT, &hwpt->hwpt_id); ++ ++ QLIST_SAFE_REMOVE(vdev, next); ++ ++ /* No device using this hwpt, free it */ ++ if (QLIST_EMPTY(&hwpt->device_list)) { ++ iommufd_backend_free_id(iommufd, hwpt->hwpt_id); ++ QLIST_SAFE_REMOVE(hwpt, next); ++ g_free(hwpt); ++ } ++} ++ ++int vhost_vdpa_attach_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ bool new_container = false; ++ int ret = 0; ++ ++ if (!iommufd) { ++ return 0; ++ } ++ ++ container = vhost_vdpa_find_container(vdev); ++ if (!container) { ++ container = vhost_vdpa_create_container(vdev); ++ if (!container) { ++ qemu_log("vdpa create container failed\n"); ++ return -1; ++ } ++ ret = vhost_vdpa_container_connect_iommufd(container); ++ if (ret) { ++ qemu_log("vdpa container connect iommufd failed\n"); ++ goto destroy; ++ } ++ new_container = true; ++ } ++ ++ ret = vhost_vdpa_device_bind_iommufd(vdev); ++ if (ret) { ++ qemu_log("vdpa device bind iommufd failed\n"); ++ goto disconnect; ++ } ++ ++ ret = vhost_vdpa_container_attach_device(container, vdev); ++ if (ret) { ++ qemu_log("vdpa container attach device failed\n"); ++ goto unbind; ++ } ++ ++ return 0; ++ ++unbind: ++ vhost_vdpa_device_unbind_iommufd(vdev); ++disconnect: ++ if (!new_container) { ++ return ret; ++ } ++ vhost_vdpa_container_disconnect_iommufd(container); ++destroy: ++ vhost_vdpa_destroy_container(container); ++ ++ return ret; ++} ++ ++void vhost_vdpa_detach_container(VhostVdpaDevice *vdev) ++{ ++ VDPAIOMMUFDContainer *container = NULL; ++ IOMMUFDBackend *iommufd = vdev->iommufd; ++ ++ if (!iommufd) { ++ return; ++ } ++ ++ container = vhost_vdpa_find_container(vdev); ++ if (!container) { ++ return; ++ } ++ ++ vhost_vdpa_container_detach_device(container, vdev); ++ ++ vhost_vdpa_device_unbind_iommufd(vdev); ++ ++ if (!QLIST_EMPTY(&container->hwpt_list)) { ++ return; ++ } ++ /* No HWPT in this container, destroy it */ ++ vhost_vdpa_container_disconnect_iommufd(container); ++ ++ vhost_vdpa_destroy_container(container); ++} +\ No newline at end of file +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 9ce7ed7eae..a6bd695724 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -32,6 +32,7 @@ + #include "migration/migration.h" + #include "exec/address-spaces.h" + #include "standard-headers/linux/virtio_ids.h" ++#include "hw/virtio/vdpa-dev-iommufd.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -127,6 +128,16 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + goto free_vqs; + } + ++ /* If the vdpa device is associated with an iommufd, attach device to container */ ++ if (v->iommufd) { ++ ret = vhost_vdpa_attach_container(v); ++ if (ret < 0) { ++ error_setg(errp, "vhost vdpa device attach container failed: %s", ++ strerror(-ret)); ++ goto free_vqs; ++ } ++ } ++ + memory_listener_register(&v->vdpa.listener, &address_space_memory); + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_CONFIG_SIZE, +@@ -168,6 +179,9 @@ free_config: + vhost_cleanup: + memory_listener_unregister(&v->vdpa.listener); + vhost_dev_cleanup(&v->dev); ++ if (v->iommufd) { ++ vhost_vdpa_detach_container(v); ++ } + free_vqs: + g_free(vqs); + out: +@@ -194,6 +208,9 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + g_free(s->dev.vqs); + memory_listener_unregister(&s->vdpa.listener); + vhost_dev_cleanup(&s->dev); ++ if (s->iommufd) { ++ vhost_vdpa_detach_container(s); ++ } + qemu_close(s->vhostfd); + s->vhostfd = -1; + } +diff --git a/include/hw/virtio/vdpa-dev-iommufd.h b/include/hw/virtio/vdpa-dev-iommufd.h +new file mode 100644 +index 0000000000..dc14d9dd15 +--- /dev/null ++++ b/include/hw/virtio/vdpa-dev-iommufd.h +@@ -0,0 +1,40 @@ ++/* ++ * vhost vDPA device support iommufd header ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All Rights Reserved. ++ */ ++ ++#ifndef _VHOST_VDPA_IOMMUFD_H ++#define _VHOST_VDPA_IOMMUFD_H ++ ++#include "hw/virtio/vdpa-dev.h" ++ ++/* ++ * A HW pagetable is called an iommu_domain inside the kernel. ++ * This user object allows directly creating an inspecting the ++ * domains. Domains that have kernel owned page tables will be ++ * associated with an iommufd_ioas that provides the IOVA to ++ * PFN map. ++ */ ++typedef struct IOMMUFDHWPT { ++ uint32_t hwpt_id; ++ QLIST_HEAD(, VhostVdpaDevice) device_list; ++ QLIST_ENTRY(IOMMUFDHWPT) next; ++} IOMMUFDHWPT; ++ ++typedef struct VDPAIOMMUFDContainer { ++ struct IOMMUFDBackend *iommufd; ++ uint32_t ioas_id; ++ QLIST_HEAD(, IOMMUFDHWPT) hwpt_list; ++ QLIST_ENTRY(VDPAIOMMUFDContainer) next; ++} VDPAIOMMUFDContainer; ++ ++struct vdpa_dev_bind_iommufd { ++ __s32 iommufd; ++ __u32 out_devid; ++}; ++ ++int vhost_vdpa_attach_container(VhostVdpaDevice *vdev); ++void vhost_vdpa_detach_container(VhostVdpaDevice *vdev); ++ ++#endif /* _VHOST_VDPA_IOMMUFD_H */ +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index accdb7fa28..872e630546 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -43,6 +43,8 @@ struct VhostVdpaDevice { + VMChangeStateEntry *vmstate; + Notifier migration_state; + IOMMUFDBackend *iommufd; ++ uint32_t iommufd_devid; ++ QLIST_ENTRY(VhostVdpaDevice) next; + }; + + #endif +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index a08e980a1e..f5c05abe8b 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -232,6 +232,34 @@ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) + ++/* Bind a vDPA device to the specified iommufd ++ * ++ * After the return of this ioctl, the vDPA device is binded to the specified ++ * iommufd, and the device id is also returned. ++ */ ++#define VHOST_VDPA_BIND_IOMMUFD _IO(VHOST_VIRTIO, 0x90) ++ ++/* Unbind a vDPA device from the specified iommufd ++ * ++ * After the return of this ioctl, the vDPA device is unbinded from the specified ++ * iommufd. ++ */ ++#define VHOST_VDPA_UNBIND_IOMMUFD _IO(VHOST_VIRTIO, 0x91) ++ ++/* Associate the vDPA device with an address space within the bound iommufd ++ * ++ * After the return of this ioctl, the vDPA device is attached to the bound ++ * iommufd. ++ */ ++#define VHOST_VDPA_ATTACH_IOMMUFD_PT _IO(VHOST_VIRTIO, 0x92) ++ ++/* Detach the vDPA device from an address space within the bound iommufd. ++ * ++ * After the return of this ioctl, the vDPA device is detached from the address ++ * space within the bound iommufd. ++ */ ++#define VHOST_VDPA_DETACH_IOMMUFD_PT _IO(VHOST_VIRTIO, 0x93) ++ + /* set and get device buffer */ + #define VHOST_GET_DEV_BUFFER _IOR(VHOST_VIRTIO, 0xb0, struct vhost_vdpa_config) + #define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) +-- +2.41.0.windows.1 + diff --git a/vdpa-iommufd-support-associating-iommufd-backend-for.patch b/vdpa-iommufd-support-associating-iommufd-backend-for.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a0b14d324027dc2dbad2b1d5be7dbb3cea7d2d3 --- /dev/null +++ b/vdpa-iommufd-support-associating-iommufd-backend-for.patch @@ -0,0 +1,57 @@ +From 184e5195a815d57701cd9358f4b0537025729833 Mon Sep 17 00:00:00 2001 +From: libai +Date: Wed, 26 Mar 2025 20:44:40 +0800 +Subject: [PATCH] vdpa/iommufd:support associating iommufd backend for vDPA + devices + +The following parameters can associate the iommufd object with the vdpa device: + +-object iommufd,id=iommufd1 +-device '{ + "driver":"vhost-vdpa-device-pci", + "id":"vhostdev0", + "vhostdev":"/dev/vhost-vdpa-1", + "iommufd":"iommufd1", +}' + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 1 + + include/hw/virtio/vdpa-dev.h | 2 ++ + 2 files changed, 3 insertions(+) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index bd787cf39c..9ce7ed7eae 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -356,6 +356,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + static Property vhost_vdpa_device_properties[] = { + DEFINE_PROP_STRING("vhostdev", VhostVdpaDevice, vhostdev), + DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0), ++ DEFINE_PROP_LINK("iommufd", VhostVdpaDevice, iommufd, TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 60e9c3f3fe..accdb7fa28 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -18,6 +18,7 @@ + #include "hw/virtio/vhost.h" + #include "hw/virtio/vhost-vdpa.h" + #include "qom/object.h" ++#include "sysemu/iommufd.h" + + + #define TYPE_VHOST_VDPA_DEVICE "vhost-vdpa-device" +@@ -41,6 +42,7 @@ struct VhostVdpaDevice { + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; + Notifier migration_state; ++ IOMMUFDBackend *iommufd; + }; + + #endif +-- +2.41.0.windows.1 + diff --git a/vdpa-move-memory-listener-to-the-realize-stage.patch b/vdpa-move-memory-listener-to-the-realize-stage.patch new file mode 100644 index 0000000000000000000000000000000000000000..56137c61c50fca095cf11049bf57e53340f0bb5e --- /dev/null +++ b/vdpa-move-memory-listener-to-the-realize-stage.patch @@ -0,0 +1,91 @@ +From 587f42300488af4478d7aa1b62e2b351155621db Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 16:01:16 +0800 +Subject: [PATCH] vdpa: move memory listener to the realize stage + +Move the memory listener registration of vdpa from the start stage +to the realize stage. Avoid that in the start phase, the memory +listener callback function has not yet been processed. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 4 ++++ + hw/virtio/vhost-vdpa.c | 5 ----- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 6af78a4229..877bf7464f 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -30,6 +30,7 @@ + #include "sysemu/runstate.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/migration.h" ++#include "exec/address-spaces.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -125,6 +126,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + goto free_vqs; + } + ++ memory_listener_register(&v->vdpa.listener, &address_space_memory); + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_CONFIG_SIZE, + errp); +@@ -163,6 +165,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + free_config: + g_free(v->config); + vhost_cleanup: ++ memory_listener_unregister(&v->vdpa.listener); + vhost_dev_cleanup(&v->dev); + free_vqs: + g_free(vqs); +@@ -188,6 +191,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + + g_free(s->config); + g_free(s->dev.vqs); ++ memory_listener_unregister(&s->vdpa.listener); + vhost_dev_cleanup(&s->dev); + qemu_close(s->vhostfd); + s->vhostfd = -1; +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 063e941544..30408f2069 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1320,8 +1320,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + "IOMMU and try again"); + return -1; + } +- memory_listener_register(&v->listener, dev->vdev->dma_as); +- + return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + } + +@@ -1515,7 +1513,6 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + + static int vhost_vdpa_suspend_device(struct vhost_dev *dev) + { +- struct vhost_vdpa *v = dev->opaque; + int ret; + + vhost_vdpa_svqs_stop(dev); +@@ -1526,7 +1523,6 @@ static int vhost_vdpa_suspend_device(struct vhost_dev *dev) + } + + ret = vhost_vdpa_call(dev, VHOST_VDPA_SUSPEND, NULL); +- memory_listener_unregister(&v->listener); + return ret; + } + +@@ -1548,7 +1544,6 @@ static int vhost_vdpa_resume_device(struct vhost_dev *dev) + return 0; + } + +- memory_listener_register(&v->listener, &address_space_memory); + return vhost_vdpa_call(dev, VHOST_VDPA_RESUME, NULL); + } + +-- +2.27.0 + diff --git a/vdpa-remove-memory-listener-unregister-in-vhost_vdpa.patch b/vdpa-remove-memory-listener-unregister-in-vhost_vdpa.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c38bfc97c62c2a8416c1cc9fcb8799df40051d8 --- /dev/null +++ b/vdpa-remove-memory-listener-unregister-in-vhost_vdpa.patch @@ -0,0 +1,37 @@ +From 1c62372d7c9e1f71ef9563e88b7491a7272b2a7d Mon Sep 17 00:00:00 2001 +From: fangyi +Date: Tue, 29 Oct 2024 20:02:10 +0800 +Subject: [PATCH 6/6] vdpa: remove memory listener unregister in + vhost_vdpa_reset_status + +Remove memory listener unregister in vhost_vdpa_reset_status as we +move the memory listener registration of vdpa from the start stage +to the realize stage before. +--- + hw/virtio/vhost-vdpa.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 69cf3b76e9..dcf1ef2c15 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1292,8 +1292,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + + static void vhost_vdpa_reset_status(struct vhost_dev *dev) + { +- struct vhost_vdpa *v = dev->opaque; +- + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { + return; + } +@@ -1301,7 +1299,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) + vhost_vdpa_reset_device(dev); + vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER); +- memory_listener_unregister(&v->listener); + } + + static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, +-- +2.43.0 + diff --git a/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch b/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f18cb554fd2d0cd90ef726a20b52ffb6219f98c --- /dev/null +++ b/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch @@ -0,0 +1,38 @@ +From 0f515ff831f46ef34cd83aa145e547e48d8b3b56 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 14 Dec 2023 11:05:52 +0800 +Subject: [PATCH] vdpa: set vring enable only if the vring address has already + been set + +Currently, vhost-vdpa does not determine the status of each vring when +performing the enable operation on vring. When the vBIOS(EDK2) is running, +the driver will not enable all vrings. In this case, setting all vrings +to enable is isconsistent with the actual situation. + +Add logic when enabling vring, make a judement on the vring status. If the +vring address is not set, the vring will not enabled. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 30408f2069..d49826845f 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -890,6 +890,11 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) + .index = idx, + .num = 1, + }; ++ hwaddr addr = virtio_queue_get_desc_addr(dev->vdev, idx); ++ if (addr == 0) { ++ return 0; ++ } ++ + int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); + + trace_vhost_vdpa_set_vring_ready(dev, idx, r); +-- +2.27.0 + diff --git a/vdpa-support-resizing-virtio-blk-capacity-online-for.patch b/vdpa-support-resizing-virtio-blk-capacity-online-for.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e43b7c9dbc944bfb1271219eb882599a204da56 --- /dev/null +++ b/vdpa-support-resizing-virtio-blk-capacity-online-for.patch @@ -0,0 +1,50 @@ +From 8c65e8d7c923ade6f3c7fbef43000562d4733629 Mon Sep 17 00:00:00 2001 +From: fangyi +Date: Sat, 7 Sep 2024 07:11:07 +0000 +Subject: [PATCH 2/6] vdpa: support resizing virtio-blk capacity online for + kernel vdpa + +Signed-off-by: fangyi +--- + hw/virtio/vdpa-dev.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 91e71847b0..bf4b3ec3fd 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -31,6 +31,7 @@ + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/migration.h" + #include "exec/address-spaces.h" ++#include "standard-headers/linux/virtio_ids.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -201,7 +202,23 @@ static void + vhost_vdpa_device_get_config(VirtIODevice *vdev, uint8_t *config) + { + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); ++ uint8_t *new_config; ++ int ret; ++ ++ if (s->vdev_id != VIRTIO_ID_BLOCK) { ++ goto out; ++ } + ++ new_config = g_malloc0(s->config_size); ++ ret = vhost_dev_get_config(&s->dev, new_config, s->config_size, NULL); ++ if (ret < 0) { ++ error_report("vhost-vdpa-device: get config failed(%d)\n", ret); ++ goto free; ++ } ++ memcpy(s->config, new_config, s->config_size); ++free: ++ g_free(new_config); ++out: + memcpy(config, s->config, s->config_size); + } + +-- +2.43.0 + diff --git a/vdpa-support-vdpa-device-suspend-resume.patch b/vdpa-support-vdpa-device-suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e8ff653152a49d9410c2479427368f9a50c07d2 --- /dev/null +++ b/vdpa-support-vdpa-device-suspend-resume.patch @@ -0,0 +1,120 @@ +From e58b48ab2bb679f4c661301019d6f94bd39f93e5 Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 19 Dec 2023 20:18:03 +0800 +Subject: [PATCH] vdpa: support vdpa device suspend/resume + +only implement suspend and resume interface used for migration. The +current implementation still has bugs when suspend/resume a virtual +machine. Fix it. + +Fixes: 4c5a9a0703 (""vhost: implement vhost_vdpa_device_suspend/resume) + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 16 +++++++++++----- + hw/virtio/vdpa-dev.c | 8 +------- + include/hw/virtio/vdpa-dev.h | 1 + + 3 files changed, 13 insertions(+), 12 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 9b47e3ed45..8b13f89c85 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -143,6 +143,7 @@ static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + } + + vdpa->started = false; ++ vdpa->suspended = true; + + ret = vhost_dev_suspend(&vdpa->dev, vdev, false); + if (ret) { +@@ -165,6 +166,7 @@ set_guest_notifiers_fail: + } + + suspend_fail: ++ vdpa->suspended = false; + vdpa->started = true; + return ret; + } +@@ -201,6 +203,7 @@ static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) + goto err_guest_notifiers; + } + vdpa->started = true; ++ vdpa->suspended = false; + + /* + * guest_notifier_mask/pending not used yet, so just unmask +@@ -241,7 +244,7 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!running) { +- if (ms->state == RUN_STATE_PAUSED) { ++ if (ms->state == MIGRATION_STATUS_ACTIVE || state == RUN_STATE_PAUSED) { + ret = vhost_vdpa_device_suspend(vdpa); + if (ret) { + error_report("suspend vdpa device failed: %d\n", ret); +@@ -251,16 +254,19 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + } + } + } else { +- if (ms->state == RUN_STATE_RESTORE_VM) { ++ if (vdpa->suspended) { + ret = vhost_vdpa_device_resume(vdpa); + if (ret) { +- error_report("migration dest resume device failed, abort!\n"); +- exit(EXIT_FAILURE); ++ error_report("vhost vdpa device resume failed: %d\n", ret); + } + } + + if (mis->state == RUN_STATE_RESTORE_VM) { +- vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ ret = vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ if (ret) { ++ error_report("migration dest resume device failed: %d\n", ret); ++ exit(EXIT_FAILURE); ++ } + /* post resume */ + mis->bh = qemu_bh_new(vdpa_dev_migration_handle_incoming_bh, + hdev); +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 877bf7464f..91e71847b0 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -317,7 +317,6 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + { + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +- MigrationState *ms = migrate_get_current(); + bool should_start = virtio_device_started(vdev, status); + Error *local_err = NULL; + int ret; +@@ -326,12 +325,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + should_start = false; + } + +- if (s->started == should_start) { +- return; +- } +- +- if (ms->state == RUN_STATE_PAUSED || +- ms->state == RUN_STATE_RESTORE_VM) { ++ if (s->started == should_start || s->suspended) { + return; + } + +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 20f50c76c6..60e9c3f3fe 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -37,6 +37,7 @@ struct VhostVdpaDevice { + int config_size; + uint16_t queue_size; + bool started; ++ bool suspended; + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; + Notifier migration_state; +-- +2.27.0 + diff --git a/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch b/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch new file mode 100644 index 0000000000000000000000000000000000000000..69f13f0993b2229d4fac841eb52be1d6f5552d55 --- /dev/null +++ b/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch @@ -0,0 +1,45 @@ +From a78602118043eb9923996504d5b2e1b14a1ec38d Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 21 Dec 2023 11:03:37 +0800 +Subject: [PATCH] vdpa: suspend function return 0 when the vdpa device is + stopped + +When vhost vdpa device is stopped(vdpa->started is false), suspend +operation do nothing and return success, instead of return failure. + +The same goes for resume function. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 8b13f89c85..b889dd4715 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -134,8 +134,8 @@ static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + +- if (!vdpa->started) { +- return -EFAULT; ++ if (!vdpa->started || vdpa->suspended) { ++ return 0; + } + + if (!k->set_guest_notifiers) { +@@ -178,6 +178,10 @@ static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int i, ret; + ++ if (vdpa->started || !vdpa->suspended) { ++ return 0; ++ } ++ + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers\n"); + return -ENOSYS; +-- +2.27.0 + diff --git a/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch b/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch deleted file mode 100644 index eb8fb5d8f84b88bed1e48516050af5546dfae1cb..0000000000000000000000000000000000000000 --- a/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch +++ /dev/null @@ -1,258 +0,0 @@ -From 3a875293ae00266e1c82a5c382066efc4acc64ce Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:15 +0530 -Subject: [PATCH] vfio: Add VM state change handler to know state of VM - -VM state change handler is called on change in VM's state. Based on -VM state, VFIO device state should be changed. -Added read/write helper functions for migration region. -Added function to set device_state. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Cornelia Huck -[aw: lx -> HWADDR_PRIx, remove redundant parens] -Signed-off-by: Alex Williamson -Signed-off-by: Shenming Lu ---- - hw/vfio/migration.c | 160 ++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 2 + - include/hw/vfio/vfio-common.h | 4 + - 3 files changed, 166 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index fd7faf423c..ca82c78536 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -10,6 +10,7 @@ - #include "qemu/osdep.h" - #include - -+#include "sysemu/sysemu.h" - #include "hw/vfio/vfio-common.h" - #include "cpu.h" - #include "migration/migration.h" -@@ -22,6 +23,157 @@ - #include "exec/ram_addr.h" - #include "pci.h" - #include "trace.h" -+#include "hw/hw.h" -+ -+static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, -+ off_t off, bool iswrite) -+{ -+ int ret; -+ -+ ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : -+ pread(vbasedev->fd, val, count, off); -+ if (ret < count) { -+ error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" -+ HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, -+ vbasedev->name, off, strerror(errno)); -+ return (ret < 0) ? ret : -EINVAL; -+ } -+ return 0; -+} -+ -+static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, -+ off_t off, bool iswrite) -+{ -+ int ret, done = 0; -+ __u8 *tbuf = buf; -+ -+ while (count) { -+ int bytes = 0; -+ -+ if (count >= 8 && !(off % 8)) { -+ bytes = 8; -+ } else if (count >= 4 && !(off % 4)) { -+ bytes = 4; -+ } else if (count >= 2 && !(off % 2)) { -+ bytes = 2; -+ } else { -+ bytes = 1; -+ } -+ -+ ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); -+ if (ret) { -+ return ret; -+ } -+ -+ count -= bytes; -+ done += bytes; -+ off += bytes; -+ tbuf += bytes; -+ } -+ return done; -+} -+ -+#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) -+#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) -+ -+#define VFIO_MIG_STRUCT_OFFSET(f) \ -+ offsetof(struct vfio_device_migration_info, f) -+/* -+ * Change the device_state register for device @vbasedev. Bits set in @mask -+ * are preserved, bits set in @value are set, and bits not set in either @mask -+ * or @value are cleared in device_state. If the register cannot be accessed, -+ * the resulting state would be invalid, or the device enters an error state, -+ * an error is returned. -+ */ -+ -+static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, -+ uint32_t value) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ VFIORegion *region = &migration->region; -+ off_t dev_state_off = region->fd_offset + -+ VFIO_MIG_STRUCT_OFFSET(device_state); -+ uint32_t device_state; -+ int ret; -+ -+ ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), -+ dev_state_off); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ device_state = (device_state & mask) | value; -+ -+ if (!VFIO_DEVICE_STATE_VALID(device_state)) { -+ return -EINVAL; -+ } -+ -+ ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), -+ dev_state_off); -+ if (ret < 0) { -+ int rret; -+ -+ rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), -+ dev_state_off); -+ -+ if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { -+ hw_error("%s: Device in error state 0x%x", vbasedev->name, -+ device_state); -+ return rret ? rret : -EIO; -+ } -+ return ret; -+ } -+ -+ migration->device_state = device_state; -+ trace_vfio_migration_set_state(vbasedev->name, device_state); -+ return 0; -+} -+ -+static void vfio_vmstate_change(void *opaque, int running, RunState state) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ uint32_t value, mask; -+ int ret; -+ -+ if (vbasedev->migration->vm_running == running) { -+ return; -+ } -+ -+ if (running) { -+ /* -+ * Here device state can have one of _SAVING, _RESUMING or _STOP bit. -+ * Transition from _SAVING to _RUNNING can happen if there is migration -+ * failure, in that case clear _SAVING bit. -+ * Transition from _RESUMING to _RUNNING occurs during resuming -+ * phase, in that case clear _RESUMING bit. -+ * In both the above cases, set _RUNNING bit. -+ */ -+ mask = ~VFIO_DEVICE_STATE_MASK; -+ value = VFIO_DEVICE_STATE_RUNNING; -+ } else { -+ /* -+ * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset -+ * _RUNNING bit -+ */ -+ mask = ~VFIO_DEVICE_STATE_RUNNING; -+ value = 0; -+ } -+ -+ ret = vfio_migration_set_state(vbasedev, mask, value); -+ if (ret) { -+ /* -+ * Migration should be aborted in this case, but vm_state_notify() -+ * currently does not support reporting failures. -+ */ -+ error_report("%s: Failed to set device state 0x%x", vbasedev->name, -+ (migration->device_state & mask) | value); -+ qemu_file_set_error(migrate_get_current()->to_dst_file, ret); -+ } -+ vbasedev->migration->vm_running = running; -+ trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), -+ (migration->device_state & mask) | value); -+} - - static void vfio_migration_exit(VFIODevice *vbasedev) - { -@@ -38,6 +190,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, - { - int ret; - Object *obj; -+ VFIOMigration *migration; - - if (!vbasedev->ops->vfio_get_object) { - return -EINVAL; -@@ -64,6 +217,10 @@ static int vfio_migration_init(VFIODevice *vbasedev, - ret = -EINVAL; - goto err; - } -+ -+ migration = vbasedev->migration; -+ migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, -+ vbasedev); - return 0; - - err: -@@ -111,6 +268,9 @@ add_blocker: - void vfio_migration_finalize(VFIODevice *vbasedev) - { - if (vbasedev->migration) { -+ VFIOMigration *migration = vbasedev->migration; -+ -+ qemu_del_vm_change_state_handler(migration->vm_state); - vfio_migration_exit(vbasedev); - } - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index fd034ac536..1626862315 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) "" - - # migration.c - vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" -+vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" -+vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index e0482c2bac..533d6737ac 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -29,6 +29,7 @@ - #ifdef CONFIG_LINUX - #include - #endif -+#include "sysemu/sysemu.h" - - #define VFIO_MSG_PREFIX "vfio %s: " - -@@ -58,7 +59,10 @@ typedef struct VFIORegion { - } VFIORegion; - - typedef struct VFIOMigration { -+ VMChangeStateEntry *vm_state; - VFIORegion region; -+ uint32_t device_state; -+ int vm_running; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.27.0 - diff --git a/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch b/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch deleted file mode 100644 index b15a1c4bfbeb108bc5a098df8d68aef288b5e9c2..0000000000000000000000000000000000000000 --- a/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 4363ea5cded9c6d2838a9564b067f583a6ef077f Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:22 +0530 -Subject: [PATCH] vfio: Add function to start and stop dirty pages tracking - -Call VFIO_IOMMU_DIRTY_PAGES ioctl to start and stop dirty pages tracking -for VFIO devices. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 36 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 36 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 0d2bd9e5cd..0bdf6a1820 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -11,6 +11,7 @@ - #include "qemu/main-loop.h" - #include "qemu/cutils.h" - #include -+#include - - #include "sysemu/sysemu.h" - #include "hw/vfio/vfio-common.h" -@@ -391,10 +392,40 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) - return qemu_file_get_error(f); - } - -+static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start) -+{ -+ int ret; -+ VFIOMigration *migration = vbasedev->migration; -+ VFIOContainer *container = vbasedev->group->container; -+ struct vfio_iommu_type1_dirty_bitmap dirty = { -+ .argsz = sizeof(dirty), -+ }; -+ -+ if (start) { -+ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { -+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; -+ } else { -+ return -EINVAL; -+ } -+ } else { -+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; -+ } -+ -+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); -+ if (ret) { -+ error_report("Failed to set dirty tracking flag 0x%x errno: %d", -+ dirty.flags, errno); -+ return -errno; -+ } -+ return ret; -+} -+ - static void vfio_migration_cleanup(VFIODevice *vbasedev) - { - VFIOMigration *migration = vbasedev->migration; - -+ vfio_set_dirty_page_tracking(vbasedev, false); -+ - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } -@@ -435,6 +466,11 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) - return ret; - } - -+ ret = vfio_set_dirty_page_tracking(vbasedev, true); -+ if (ret) { -+ return ret; -+ } -+ - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - ret = qemu_file_get_error(f); --- -2.27.0 - diff --git a/vfio-Add-function-to-unmap-VFIO-region.patch b/vfio-Add-function-to-unmap-VFIO-region.patch deleted file mode 100644 index 2cdd76a09bd44c73b42f4294055b935a31446b7e..0000000000000000000000000000000000000000 --- a/vfio-Add-function-to-unmap-VFIO-region.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 68cc2be61588d14de2313342ee87eb0bb2b990e0 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:11 +0530 -Subject: [PATCH] vfio: Add function to unmap VFIO region - -This function will be used for migration region. -Migration region is mmaped when migration starts and will be unmapped when -migration is complete. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Reviewed-by: Cornelia Huck -Signed-off-by: Alex Williamson ---- - hw/vfio/common.c | 32 ++++++++++++++++++++++++++++---- - hw/vfio/trace-events | 1 + - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 30 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index a859298fda..4c32b1bb99 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -906,6 +906,18 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, - return 0; - } - -+static void vfio_subregion_unmap(VFIORegion *region, int index) -+{ -+ trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem), -+ region->mmaps[index].offset, -+ region->mmaps[index].offset + -+ region->mmaps[index].size - 1); -+ memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem); -+ munmap(region->mmaps[index].mmap, region->mmaps[index].size); -+ object_unparent(OBJECT(®ion->mmaps[index].mem)); -+ region->mmaps[index].mmap = NULL; -+} -+ - int vfio_region_mmap(VFIORegion *region) - { - int i, prot = 0; -@@ -936,10 +948,7 @@ int vfio_region_mmap(VFIORegion *region) - region->mmaps[i].mmap = NULL; - - for (i--; i >= 0; i--) { -- memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); -- munmap(region->mmaps[i].mmap, region->mmaps[i].size); -- object_unparent(OBJECT(®ion->mmaps[i].mem)); -- region->mmaps[i].mmap = NULL; -+ vfio_subregion_unmap(region, i); - } - - return ret; -@@ -964,6 +973,21 @@ int vfio_region_mmap(VFIORegion *region) - return 0; - } - -+void vfio_region_unmap(VFIORegion *region) -+{ -+ int i; -+ -+ if (!region->mem) { -+ return; -+ } -+ -+ for (i = 0; i < region->nr_mmaps; i++) { -+ if (region->mmaps[i].mmap) { -+ vfio_subregion_unmap(region, i); -+ } -+ } -+} -+ - void vfio_region_exit(VFIORegion *region) - { - int i; -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index b1ef55a33f..8cdc27946c 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -111,6 +111,7 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg - vfio_region_exit(const char *name, int index) "Device %s, region %d" - vfio_region_finalize(const char *name, int index) "Device %s, region %d" - vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d" -+vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Region %s unmap [0x%lx - 0x%lx]" - vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" - vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" - vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 9107bd41c0..93493891ba 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -171,6 +171,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, - int index, const char *name); - int vfio_region_mmap(VFIORegion *region); - void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); -+void vfio_region_unmap(VFIORegion *region); - void vfio_region_exit(VFIORegion *region); - void vfio_region_finalize(VFIORegion *region); - void vfio_reset_handler(void *opaque); --- -2.27.0 - diff --git a/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch b/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch deleted file mode 100644 index 2831e94ca260e5753f2bcd0007ab036cba387b33..0000000000000000000000000000000000000000 --- a/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 1333031bd3b488ed4904a61fd292cd5aa93f8c5b Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:25 +0530 -Subject: [PATCH] vfio: Add ioctl to get dirty pages bitmap during dma unmap - -With vIOMMU, IO virtual address range can get unmapped while in pre-copy -phase of migration. In that case, unmap ioctl should return pages pinned -in that range and QEMU should find its correcponding guest physical -addresses and report those dirty. - -Suggested-by: Alex Williamson -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast] -Signed-off-by: Alex Williamson ---- - hw/vfio/common.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 93 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 8773b998ac..4ce1c10734 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -320,11 +320,95 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) - return true; - } - -+static bool vfio_devices_all_running_and_saving(VFIOContainer *container) -+{ -+ VFIOGroup *group; -+ VFIODevice *vbasedev; -+ MigrationState *ms = migrate_get_current(); -+ -+ if (!migration_is_setup_or_active(ms->state)) { -+ return false; -+ } -+ -+ QLIST_FOREACH(group, &container->group_list, container_next) { -+ QLIST_FOREACH(vbasedev, &group->device_list, next) { -+ VFIOMigration *migration = vbasedev->migration; -+ -+ if (!migration) { -+ return false; -+ } -+ -+ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && -+ (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { -+ continue; -+ } else { -+ return false; -+ } -+ } -+ } -+ return true; -+} -+ -+static int vfio_dma_unmap_bitmap(VFIOContainer *container, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) -+{ -+ struct vfio_iommu_type1_dma_unmap *unmap; -+ struct vfio_bitmap *bitmap; -+ uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS; -+ int ret; -+ -+ unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); -+ -+ unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); -+ unmap->iova = iova; -+ unmap->size = size; -+ unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; -+ bitmap = (struct vfio_bitmap *)&unmap->data; -+ -+ /* -+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of -+ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to -+ * TARGET_PAGE_SIZE. -+ */ -+ -+ bitmap->pgsize = TARGET_PAGE_SIZE; -+ bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / -+ BITS_PER_BYTE; -+ -+ if (bitmap->size > container->max_dirty_bitmap_size) { -+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, -+ (uint64_t)bitmap->size); -+ ret = -E2BIG; -+ goto unmap_exit; -+ } -+ -+ bitmap->data = g_try_malloc0(bitmap->size); -+ if (!bitmap->data) { -+ ret = -ENOMEM; -+ goto unmap_exit; -+ } -+ -+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); -+ if (!ret) { -+ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, -+ iotlb->translated_addr, pages); -+ } else { -+ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); -+ } -+ -+ g_free(bitmap->data); -+unmap_exit: -+ g_free(unmap); -+ return ret; -+} -+ - /* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ - static int vfio_dma_unmap(VFIOContainer *container, -- hwaddr iova, ram_addr_t size) -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) - { - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), -@@ -333,6 +417,11 @@ static int vfio_dma_unmap(VFIOContainer *container, - .size = size, - }; - -+ if (iotlb && container->dirty_pages_supported && -+ vfio_devices_all_running_and_saving(container)) { -+ return vfio_dma_unmap_bitmap(container, iova, size, iotlb); -+ } -+ - while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - /* - * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c -@@ -380,7 +469,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, - * the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || -- (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && -+ (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } -@@ -530,7 +619,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - iotlb->addr_mask + 1, vaddr, ret); - } - } else { -- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1); -+ ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); - if (ret) { - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%m)", -@@ -816,7 +905,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - } - - if (try_unmap) { -- ret = vfio_dma_unmap(container, iova, int128_get64(llsize)); -+ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%m)", --- -2.27.0 - diff --git a/vfio-Add-load-state-functions-to-SaveVMHandlers.patch b/vfio-Add-load-state-functions-to-SaveVMHandlers.patch deleted file mode 100644 index d70caeeef043b6c6cb53f09c9adb67b40b344862..0000000000000000000000000000000000000000 --- a/vfio-Add-load-state-functions-to-SaveVMHandlers.patch +++ /dev/null @@ -1,266 +0,0 @@ -From ddef5d5257987f2f415ce41fdc482feda61aa796 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:19 +0530 -Subject: [PATCH] vfio: Add load state functions to SaveVMHandlers - -Sequence during _RESUMING device state: -While data for this device is available, repeat below steps: -a. read data_offset from where user application should write data. -b. write data of data_size to migration region from data_offset. -c. write data_size which indicates vendor driver that data is written in - staging buffer. - -For user, data is opaque. User should write data in the same order as -received. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Yan Zhao -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 195 +++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 4 + - 2 files changed, 199 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index f78a77e1e3..954c064435 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -257,6 +257,77 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) - return ret; - } - -+static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, -+ uint64_t data_size) -+{ -+ VFIORegion *region = &vbasedev->migration->region; -+ uint64_t data_offset = 0, size, report_size; -+ int ret; -+ -+ do { -+ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), -+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if (data_offset + data_size > region->size) { -+ /* -+ * If data_size is greater than the data section of migration region -+ * then iterate the write buffer operation. This case can occur if -+ * size of migration region at destination is smaller than size of -+ * migration region at source. -+ */ -+ report_size = size = region->size - data_offset; -+ data_size -= size; -+ } else { -+ report_size = size = data_size; -+ data_size = 0; -+ } -+ -+ trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); -+ -+ while (size) { -+ void *buf; -+ uint64_t sec_size; -+ bool buf_alloc = false; -+ -+ buf = get_data_section_size(region, data_offset, size, &sec_size); -+ -+ if (!buf) { -+ buf = g_try_malloc(sec_size); -+ if (!buf) { -+ error_report("%s: Error allocating buffer ", __func__); -+ return -ENOMEM; -+ } -+ buf_alloc = true; -+ } -+ -+ qemu_get_buffer(f, buf, sec_size); -+ -+ if (buf_alloc) { -+ ret = vfio_mig_write(vbasedev, buf, sec_size, -+ region->fd_offset + data_offset); -+ g_free(buf); -+ -+ if (ret < 0) { -+ return ret; -+ } -+ } -+ size -= sec_size; -+ data_offset += sec_size; -+ } -+ -+ ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), -+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); -+ if (ret < 0) { -+ return ret; -+ } -+ } while (data_size); -+ -+ return 0; -+} -+ - static int vfio_update_pending(VFIODevice *vbasedev) - { - VFIOMigration *migration = vbasedev->migration; -@@ -293,6 +364,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque) - return qemu_file_get_error(f); - } - -+static int vfio_load_device_config_state(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ uint64_t data; -+ -+ if (vbasedev->ops && vbasedev->ops->vfio_load_config) { -+ int ret; -+ -+ ret = vbasedev->ops->vfio_load_config(vbasedev, f); -+ if (ret) { -+ error_report("%s: Failed to load device config space", -+ vbasedev->name); -+ return ret; -+ } -+ } -+ -+ data = qemu_get_be64(f); -+ if (data != VFIO_MIG_FLAG_END_OF_STATE) { -+ error_report("%s: Failed loading device config space, " -+ "end flag incorrect 0x%"PRIx64, vbasedev->name, data); -+ return -EINVAL; -+ } -+ -+ trace_vfio_load_device_config_state(vbasedev->name); -+ return qemu_file_get_error(f); -+} -+ - static void vfio_migration_cleanup(VFIODevice *vbasedev) - { - VFIOMigration *migration = vbasedev->migration; -@@ -483,12 +581,109 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - return ret; - } - -+static int vfio_load_setup(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ int ret = 0; -+ -+ if (migration->region.mmaps) { -+ ret = vfio_region_mmap(&migration->region); -+ if (ret) { -+ error_report("%s: Failed to mmap VFIO migration region %d: %s", -+ vbasedev->name, migration->region.nr, -+ strerror(-ret)); -+ error_report("%s: Falling back to slow path", vbasedev->name); -+ } -+ } -+ -+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, -+ VFIO_DEVICE_STATE_RESUMING); -+ if (ret) { -+ error_report("%s: Failed to set state RESUMING", vbasedev->name); -+ if (migration->region.mmaps) { -+ vfio_region_unmap(&migration->region); -+ } -+ } -+ return ret; -+} -+ -+static int vfio_load_cleanup(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ -+ vfio_migration_cleanup(vbasedev); -+ trace_vfio_load_cleanup(vbasedev->name); -+ return 0; -+} -+ -+static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) -+{ -+ VFIODevice *vbasedev = opaque; -+ int ret = 0; -+ uint64_t data; -+ -+ data = qemu_get_be64(f); -+ while (data != VFIO_MIG_FLAG_END_OF_STATE) { -+ -+ trace_vfio_load_state(vbasedev->name, data); -+ -+ switch (data) { -+ case VFIO_MIG_FLAG_DEV_CONFIG_STATE: -+ { -+ ret = vfio_load_device_config_state(f, opaque); -+ if (ret) { -+ return ret; -+ } -+ break; -+ } -+ case VFIO_MIG_FLAG_DEV_SETUP_STATE: -+ { -+ data = qemu_get_be64(f); -+ if (data == VFIO_MIG_FLAG_END_OF_STATE) { -+ return ret; -+ } else { -+ error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, -+ vbasedev->name, data); -+ return -EINVAL; -+ } -+ break; -+ } -+ case VFIO_MIG_FLAG_DEV_DATA_STATE: -+ { -+ uint64_t data_size = qemu_get_be64(f); -+ -+ if (data_size) { -+ ret = vfio_load_buffer(f, vbasedev, data_size); -+ if (ret < 0) { -+ return ret; -+ } -+ } -+ break; -+ } -+ default: -+ error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); -+ return -EINVAL; -+ } -+ -+ data = qemu_get_be64(f); -+ ret = qemu_file_get_error(f); -+ if (ret) { -+ return ret; -+ } -+ } -+ return ret; -+} -+ - static SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, - .save_live_pending = vfio_save_pending, - .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, -+ .load_setup = vfio_load_setup, -+ .load_cleanup = vfio_load_cleanup, -+ .load_state = vfio_load_state, - }; - - /* ---------------------------------------------------------------------- */ -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 9a1c5e17d9..4f08f5a633 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -157,3 +157,7 @@ vfio_save_device_config_state(const char *name) " (%s)" - vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 - vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" - vfio_save_complete_precopy(const char *name) " (%s)" -+vfio_load_device_config_state(const char *name) " (%s)" -+vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 -+vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 -+vfio_load_cleanup(const char *name) " (%s)" --- -2.27.0 - diff --git a/vfio-Add-migration-region-initialization-and-finaliz.patch b/vfio-Add-migration-region-initialization-and-finaliz.patch deleted file mode 100644 index c804f1f6c353143ca74fe05889d5e163ab9dc8c3..0000000000000000000000000000000000000000 --- a/vfio-Add-migration-region-initialization-and-finaliz.patch +++ /dev/null @@ -1,209 +0,0 @@ -From b7128f8aa03482634c07691cef69e7ed2d35200e Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:14 +0530 -Subject: [PATCH] vfio: Add migration region initialization and finalize - function - -Whether the VFIO device supports migration or not is decided based of -migration region query. If migration region query is successful and migration -region initialization is successful then migration is supported else -migration is blocked. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Acked-by: Dr. David Alan Gilbert -Reviewed-by: Cornelia Huck -Signed-off-by: Alex Williamson -Signed-off-by: Shenming Lu ---- - hw/vfio/Makefile.objs | 2 +- - hw/vfio/migration.c | 122 ++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 3 + - include/hw/vfio/vfio-common.h | 9 +++ - 4 files changed, 135 insertions(+), 1 deletion(-) - create mode 100644 hw/vfio/migration.c - -diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs -index abad8b818c..36033d1437 100644 ---- a/hw/vfio/Makefile.objs -+++ b/hw/vfio/Makefile.objs -@@ -1,4 +1,4 @@ --obj-y += common.o spapr.o -+obj-y += common.o spapr.o migration.o - obj-$(CONFIG_VFIO_PCI) += pci.o pci-quirks.o display.o - obj-$(CONFIG_VFIO_CCW) += ccw.o - obj-$(CONFIG_VFIO_PLATFORM) += platform.o -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -new file mode 100644 -index 0000000000..fd7faf423c ---- /dev/null -+++ b/hw/vfio/migration.c -@@ -0,0 +1,122 @@ -+/* -+ * Migration support for VFIO devices -+ * -+ * Copyright NVIDIA, Inc. 2020 -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. See -+ * the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include -+ -+#include "hw/vfio/vfio-common.h" -+#include "cpu.h" -+#include "migration/migration.h" -+#include "migration/qemu-file.h" -+#include "migration/register.h" -+#include "migration/blocker.h" -+#include "migration/misc.h" -+#include "qapi/error.h" -+#include "exec/ramlist.h" -+#include "exec/ram_addr.h" -+#include "pci.h" -+#include "trace.h" -+ -+static void vfio_migration_exit(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ vfio_region_exit(&migration->region); -+ vfio_region_finalize(&migration->region); -+ g_free(vbasedev->migration); -+ vbasedev->migration = NULL; -+} -+ -+static int vfio_migration_init(VFIODevice *vbasedev, -+ struct vfio_region_info *info) -+{ -+ int ret; -+ Object *obj; -+ -+ if (!vbasedev->ops->vfio_get_object) { -+ return -EINVAL; -+ } -+ -+ obj = vbasedev->ops->vfio_get_object(vbasedev); -+ if (!obj) { -+ return -EINVAL; -+ } -+ -+ vbasedev->migration = g_new0(VFIOMigration, 1); -+ -+ ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, -+ info->index, "migration"); -+ if (ret) { -+ error_report("%s: Failed to setup VFIO migration region %d: %s", -+ vbasedev->name, info->index, strerror(-ret)); -+ goto err; -+ } -+ -+ if (!vbasedev->migration->region.size) { -+ error_report("%s: Invalid zero-sized VFIO migration region %d", -+ vbasedev->name, info->index); -+ ret = -EINVAL; -+ goto err; -+ } -+ return 0; -+ -+err: -+ vfio_migration_exit(vbasedev); -+ return ret; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) -+{ -+ struct vfio_region_info *info = NULL; -+ Error *local_err = NULL; -+ int ret; -+ -+ ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, -+ VFIO_REGION_SUBTYPE_MIGRATION, &info); -+ if (ret) { -+ goto add_blocker; -+ } -+ -+ ret = vfio_migration_init(vbasedev, info); -+ if (ret) { -+ goto add_blocker; -+ } -+ -+ g_free(info); -+ trace_vfio_migration_probe(vbasedev->name, info->index); -+ return 0; -+ -+add_blocker: -+ error_setg(&vbasedev->migration_blocker, -+ "VFIO device doesn't support migration"); -+ g_free(info); -+ -+ ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ error_free(vbasedev->migration_blocker); -+ vbasedev->migration_blocker = NULL; -+ } -+ return ret; -+} -+ -+void vfio_migration_finalize(VFIODevice *vbasedev) -+{ -+ if (vbasedev->migration) { -+ vfio_migration_exit(vbasedev); -+ } -+ -+ if (vbasedev->migration_blocker) { -+ migrate_del_blocker(vbasedev->migration_blocker); -+ error_free(vbasedev->migration_blocker); -+ vbasedev->migration_blocker = NULL; -+ } -+} -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 8cdc27946c..fd034ac536 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -143,3 +143,6 @@ vfio_display_edid_link_up(void) "" - vfio_display_edid_link_down(void) "" - vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" - vfio_display_edid_write_error(void) "" -+ -+# migration.c -+vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 6ea4898c4d..e0482c2bac 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -57,6 +57,10 @@ typedef struct VFIORegion { - uint8_t nr; /* cache the region number for debug */ - } VFIORegion; - -+typedef struct VFIOMigration { -+ VFIORegion region; -+} VFIOMigration; -+ - typedef struct VFIOAddressSpace { - AddressSpace *as; - QLIST_HEAD(, VFIOContainer) containers; -@@ -113,6 +117,8 @@ typedef struct VFIODevice { - unsigned int num_irqs; - unsigned int num_regions; - unsigned int flags; -+ VFIOMigration *migration; -+ Error *migration_blocker; - } VFIODevice; - - struct VFIODeviceOps { -@@ -204,4 +210,7 @@ int vfio_spapr_create_window(VFIOContainer *container, - int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space); - -+int vfio_migration_probe(VFIODevice *vbasedev, Error **errp); -+void vfio_migration_finalize(VFIODevice *vbasedev); -+ - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.27.0 - diff --git a/vfio-Add-migration-state-change-notifier.patch b/vfio-Add-migration-state-change-notifier.patch deleted file mode 100644 index 5fe73a4cb18cd401d8d63ec8440cc361bbae60d9..0000000000000000000000000000000000000000 --- a/vfio-Add-migration-state-change-notifier.patch +++ /dev/null @@ -1,104 +0,0 @@ -From b61729a5e0ab89d29f041202b50d042405076e62 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:16 +0530 -Subject: [PATCH] vfio: Add migration state change notifier - -Added migration state change notifier to get notification on migration state -change. These states are translated to VFIO device state and conveyed to -vendor driver. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Cornelia Huck -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 28 ++++++++++++++++++++++++++++ - hw/vfio/trace-events | 1 + - include/hw/vfio/vfio-common.h | 2 ++ - 3 files changed, 31 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index ca82c78536..0c6c9b655f 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -175,6 +175,30 @@ static void vfio_vmstate_change(void *opaque, int running, RunState state) - (migration->device_state & mask) | value); - } - -+static void vfio_migration_state_notifier(Notifier *notifier, void *data) -+{ -+ MigrationState *s = data; -+ VFIOMigration *migration = container_of(notifier, VFIOMigration, -+ migration_state); -+ VFIODevice *vbasedev = migration->vbasedev; -+ int ret; -+ -+ trace_vfio_migration_state_notifier(vbasedev->name, -+ MigrationStatus_str(s->state)); -+ -+ switch (s->state) { -+ case MIGRATION_STATUS_CANCELLING: -+ case MIGRATION_STATUS_CANCELLED: -+ case MIGRATION_STATUS_FAILED: -+ ret = vfio_migration_set_state(vbasedev, -+ ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), -+ VFIO_DEVICE_STATE_RUNNING); -+ if (ret) { -+ error_report("%s: Failed to set state RUNNING", vbasedev->name); -+ } -+ } -+} -+ - static void vfio_migration_exit(VFIODevice *vbasedev) - { - VFIOMigration *migration = vbasedev->migration; -@@ -219,8 +243,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, - } - - migration = vbasedev->migration; -+ migration->vbasedev = vbasedev; - migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, - vbasedev); -+ migration->migration_state.notify = vfio_migration_state_notifier; -+ add_migration_state_change_notifier(&migration->migration_state); - return 0; - - err: -@@ -270,6 +297,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) - if (vbasedev->migration) { - VFIOMigration *migration = vbasedev->migration; - -+ remove_migration_state_change_notifier(&migration->migration_state); - qemu_del_vm_change_state_handler(migration->vm_state); - vfio_migration_exit(vbasedev); - } -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 1626862315..bd3d47b005 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -148,3 +148,4 @@ vfio_display_edid_write_error(void) "" - vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" - vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" - vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" -+vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 533d6737ac..efff0590ae 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -59,10 +59,12 @@ typedef struct VFIORegion { - } VFIORegion; - - typedef struct VFIOMigration { -+ struct VFIODevice *vbasedev; - VMChangeStateEntry *vm_state; - VFIORegion region; - uint32_t device_state; - int vm_running; -+ Notifier migration_state; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.27.0 - diff --git a/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch b/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch deleted file mode 100644 index 232efcf5dbcc072f358e3fec5a5f1186a1a97b55..0000000000000000000000000000000000000000 --- a/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 92f104ca6e35acae079ca3bb432f24452058d483 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:13 +0530 -Subject: [PATCH] vfio: Add save and load functions for VFIO PCI devices - -Added functions to save and restore PCI device specific data, -specifically config space of PCI device. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Signed-off-by: Alex Williamson ---- - hw/vfio/pci.c | 51 +++++++++++++++++++++++++++++++++++ - include/hw/vfio/vfio-common.h | 2 ++ - 2 files changed, 53 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index de0d286fc9..b9fae3ad28 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -35,6 +35,7 @@ - #include "pci.h" - #include "trace.h" - #include "qapi/error.h" -+#include "migration/qemu-file.h" - - #define TYPE_VFIO_PCI "vfio-pci" - #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) -@@ -2395,11 +2396,61 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) - return OBJECT(vdev); - } - -+static bool vfio_msix_present(void *opaque, int version_id) -+{ -+ PCIDevice *pdev = opaque; -+ -+ return msix_present(pdev); -+} -+ -+const VMStateDescription vmstate_vfio_pci_config = { -+ .name = "VFIOPCIDevice", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .fields = (VMStateField[]) { -+ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), -+ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ -+static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) -+{ -+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); -+ -+ vmstate_save_state(f, &vmstate_vfio_pci_config, vdev, NULL); -+} -+ -+static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) -+{ -+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); -+ PCIDevice *pdev = &vdev->pdev; -+ int ret; -+ -+ ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1); -+ if (ret) { -+ return ret; -+ } -+ -+ vfio_pci_write_config(pdev, PCI_COMMAND, -+ pci_get_word(pdev->config + PCI_COMMAND), 2); -+ -+ if (msi_enabled(pdev)) { -+ vfio_msi_enable(vdev); -+ } else if (msix_enabled(pdev)) { -+ vfio_msix_enable(vdev); -+ } -+ -+ return ret; -+} -+ - static VFIODeviceOps vfio_pci_ops = { - .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, - .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, - .vfio_eoi = vfio_intx_eoi, - .vfio_get_object = vfio_pci_get_object, -+ .vfio_save_config = vfio_pci_save_config, -+ .vfio_load_config = vfio_pci_load_config, - }; - - int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 771b6d59a3..6ea4898c4d 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -120,6 +120,8 @@ struct VFIODeviceOps { - int (*vfio_hot_reset_multi)(VFIODevice *vdev); - void (*vfio_eoi)(VFIODevice *vdev); - Object *(*vfio_get_object)(VFIODevice *vdev); -+ void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); -+ int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); - }; - - typedef struct VFIOGroup { --- -2.27.0 - diff --git a/vfio-Add-save-state-functions-to-SaveVMHandlers.patch b/vfio-Add-save-state-functions-to-SaveVMHandlers.patch deleted file mode 100644 index 14047fd8a474c07c71fa4ba622e1fb33d043b02d..0000000000000000000000000000000000000000 --- a/vfio-Add-save-state-functions-to-SaveVMHandlers.patch +++ /dev/null @@ -1,380 +0,0 @@ -From 94f106f95e887d1d706e8f771fd6ad287ddac2dc Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:18 +0530 -Subject: [PATCH] vfio: Add save state functions to SaveVMHandlers - -Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy -functions. These functions handles pre-copy and stop-and-copy phase. - -In _SAVING|_RUNNING device state or pre-copy phase: -- read pending_bytes. If pending_bytes > 0, go through below steps. -- read data_offset - indicates kernel driver to write data to staging - buffer. -- read data_size - amount of data in bytes written by vendor driver in - migration region. -- read data_size bytes of data from data_offset in the migration region. -- Write data packet to file stream as below: -{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data, -VFIO_MIG_FLAG_END_OF_STATE } - -In _SAVING device state or stop-and-copy phase -a. read config space of device and save to migration file stream. This - doesn't need to be from vendor driver. Any other special config state - from driver can be saved as data in following iteration. -b. read pending_bytes. If pending_bytes > 0, go through below steps. -c. read data_offset - indicates kernel driver to write data to staging - buffer. -d. read data_size - amount of data in bytes written by vendor driver in - migration region. -e. read data_size bytes of data from data_offset in the migration region. -f. Write data packet as below: - {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data} -g. iterate through steps b to f while (pending_bytes > 0) -h. Write {VFIO_MIG_FLAG_END_OF_STATE} - -When data region is mapped, its user's responsibility to read data from -data_offset of data_size before moving to next steps. - -Added fix suggested by Artem Polyakov to reset pending_bytes in -vfio_save_iterate(). -Added fix suggested by Zhi Wang to add 0 as data size in migration stream and -add END_OF_STATE delimiter to indicate phase complete. - -Suggested-by: Artem Polyakov -Suggested-by: Zhi Wang -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Reviewed-by: Yan Zhao -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 276 ++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 6 + - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 283 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 405228fc5a..f78a77e1e3 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -148,6 +148,151 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, - return 0; - } - -+static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, -+ uint64_t data_size, uint64_t *size) -+{ -+ void *ptr = NULL; -+ uint64_t limit = 0; -+ int i; -+ -+ if (!region->mmaps) { -+ if (size) { -+ *size = MIN(data_size, region->size - data_offset); -+ } -+ return ptr; -+ } -+ -+ for (i = 0; i < region->nr_mmaps; i++) { -+ VFIOMmap *map = region->mmaps + i; -+ -+ if ((data_offset >= map->offset) && -+ (data_offset < map->offset + map->size)) { -+ -+ /* check if data_offset is within sparse mmap areas */ -+ ptr = map->mmap + data_offset - map->offset; -+ if (size) { -+ *size = MIN(data_size, map->offset + map->size - data_offset); -+ } -+ break; -+ } else if ((data_offset < map->offset) && -+ (!limit || limit > map->offset)) { -+ /* -+ * data_offset is not within sparse mmap areas, find size of -+ * non-mapped area. Check through all list since region->mmaps list -+ * is not sorted. -+ */ -+ limit = map->offset; -+ } -+ } -+ -+ if (!ptr && size) { -+ *size = limit ? MIN(data_size, limit - data_offset) : data_size; -+ } -+ return ptr; -+} -+ -+static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ VFIORegion *region = &migration->region; -+ uint64_t data_offset = 0, data_size = 0, sz; -+ int ret; -+ -+ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), -+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), -+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, -+ migration->pending_bytes); -+ -+ qemu_put_be64(f, data_size); -+ sz = data_size; -+ -+ while (sz) { -+ void *buf; -+ uint64_t sec_size; -+ bool buf_allocated = false; -+ -+ buf = get_data_section_size(region, data_offset, sz, &sec_size); -+ -+ if (!buf) { -+ buf = g_try_malloc(sec_size); -+ if (!buf) { -+ error_report("%s: Error allocating buffer ", __func__); -+ return -ENOMEM; -+ } -+ buf_allocated = true; -+ -+ ret = vfio_mig_read(vbasedev, buf, sec_size, -+ region->fd_offset + data_offset); -+ if (ret < 0) { -+ g_free(buf); -+ return ret; -+ } -+ } -+ -+ qemu_put_buffer(f, buf, sec_size); -+ -+ if (buf_allocated) { -+ g_free(buf); -+ } -+ sz -= sec_size; -+ data_offset += sec_size; -+ } -+ -+ ret = qemu_file_get_error(f); -+ -+ if (!ret && size) { -+ *size = data_size; -+ } -+ -+ return ret; -+} -+ -+static int vfio_update_pending(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ VFIORegion *region = &migration->region; -+ uint64_t pending_bytes = 0; -+ int ret; -+ -+ ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), -+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); -+ if (ret < 0) { -+ migration->pending_bytes = 0; -+ return ret; -+ } -+ -+ migration->pending_bytes = pending_bytes; -+ trace_vfio_update_pending(vbasedev->name, pending_bytes); -+ return 0; -+} -+ -+static int vfio_save_device_config_state(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ -+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE); -+ -+ if (vbasedev->ops && vbasedev->ops->vfio_save_config) { -+ vbasedev->ops->vfio_save_config(vbasedev, f); -+ } -+ -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ -+ trace_vfio_save_device_config_state(vbasedev->name); -+ -+ return qemu_file_get_error(f); -+} -+ - static void vfio_migration_cleanup(VFIODevice *vbasedev) - { - VFIOMigration *migration = vbasedev->migration; -@@ -210,9 +355,140 @@ static void vfio_save_cleanup(void *opaque) - trace_vfio_save_cleanup(vbasedev->name); - } - -+static void vfio_save_pending(QEMUFile *f, void *opaque, -+ uint64_t threshold_size, -+ uint64_t *res_precopy_only, -+ uint64_t *res_compatible, -+ uint64_t *res_postcopy_only) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ int ret; -+ -+ ret = vfio_update_pending(vbasedev); -+ if (ret) { -+ return; -+ } -+ -+ *res_precopy_only += migration->pending_bytes; -+ -+ trace_vfio_save_pending(vbasedev->name, *res_precopy_only, -+ *res_postcopy_only, *res_compatible); -+} -+ -+static int vfio_save_iterate(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ uint64_t data_size; -+ int ret; -+ -+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); -+ -+ if (migration->pending_bytes == 0) { -+ ret = vfio_update_pending(vbasedev); -+ if (ret) { -+ return ret; -+ } -+ -+ if (migration->pending_bytes == 0) { -+ qemu_put_be64(f, 0); -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ /* indicates data finished, goto complete phase */ -+ return 1; -+ } -+ } -+ -+ ret = vfio_save_buffer(f, vbasedev, &data_size); -+ if (ret) { -+ error_report("%s: vfio_save_buffer failed %s", vbasedev->name, -+ strerror(errno)); -+ return ret; -+ } -+ -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ -+ ret = qemu_file_get_error(f); -+ if (ret) { -+ return ret; -+ } -+ -+ /* -+ * Reset pending_bytes as .save_live_pending is not called during savevm or -+ * snapshot case, in such case vfio_update_pending() at the start of this -+ * function updates pending_bytes. -+ */ -+ migration->pending_bytes = 0; -+ trace_vfio_save_iterate(vbasedev->name, data_size); -+ return 0; -+} -+ -+static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ uint64_t data_size; -+ int ret; -+ -+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING, -+ VFIO_DEVICE_STATE_SAVING); -+ if (ret) { -+ error_report("%s: Failed to set state STOP and SAVING", -+ vbasedev->name); -+ return ret; -+ } -+ -+ ret = vfio_save_device_config_state(f, opaque); -+ if (ret) { -+ return ret; -+ } -+ -+ ret = vfio_update_pending(vbasedev); -+ if (ret) { -+ return ret; -+ } -+ -+ while (migration->pending_bytes > 0) { -+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); -+ ret = vfio_save_buffer(f, vbasedev, &data_size); -+ if (ret < 0) { -+ error_report("%s: Failed to save buffer", vbasedev->name); -+ return ret; -+ } -+ -+ if (data_size == 0) { -+ break; -+ } -+ -+ ret = vfio_update_pending(vbasedev); -+ if (ret) { -+ return ret; -+ } -+ } -+ -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ -+ ret = qemu_file_get_error(f); -+ if (ret) { -+ return ret; -+ } -+ -+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0); -+ if (ret) { -+ error_report("%s: Failed to set state STOPPED", vbasedev->name); -+ return ret; -+ } -+ -+ trace_vfio_save_complete_precopy(vbasedev->name); -+ return ret; -+} -+ - static SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, -+ .save_live_pending = vfio_save_pending, -+ .save_live_iterate = vfio_save_iterate, -+ .save_live_complete_precopy = vfio_save_complete_precopy, - }; - - /* ---------------------------------------------------------------------- */ -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 86c18def01..9a1c5e17d9 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -151,3 +151,9 @@ vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t - vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" - vfio_save_setup(const char *name) " (%s)" - vfio_save_cleanup(const char *name) " (%s)" -+vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 -+vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 -+vfio_save_device_config_state(const char *name) " (%s)" -+vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 -+vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" -+vfio_save_complete_precopy(const char *name) " (%s)" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index efff0590ae..c825524606 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -65,6 +65,7 @@ typedef struct VFIOMigration { - uint32_t device_state; - int vm_running; - Notifier migration_state; -+ uint64_t pending_bytes; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.27.0 - diff --git a/vfio-Add-the-support-for-PrivateSharedManager-Interf.patch b/vfio-Add-the-support-for-PrivateSharedManager-Interf.patch new file mode 100644 index 0000000000000000000000000000000000000000..5721d362932dd5820794204bdfb2a14207fd5ab3 --- /dev/null +++ b/vfio-Add-the-support-for-PrivateSharedManager-Interf.patch @@ -0,0 +1,254 @@ +From 2cf51bbf91b9409b411e0904cd3a2f4875646fec Mon Sep 17 00:00:00 2001 +From: Chenyi Qiang +Date: Mon, 7 Apr 2025 15:49:26 +0800 +Subject: [PATCH] vfio: Add the support for PrivateSharedManager Interface + +Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/f301a300d981459e74387ee10de01e8589d35451 + +Subsystems like VFIO previously disabled ram block discard and only +allowed coordinated discarding via RamDiscardManager. However, +guest_memfd in confidential VMs relies on discard operations for page +conversion between private and shared memory. This can lead to stale +IOMMU mapping issue when assigning a hardware device to a confidential +VM via shared memory. With the introduction of PrivateSharedManager +interface to manage private and shared states and being distinct from +RamDiscardManager, include PrivateSharedManager in coordinated RAM +discard and add related support in VFIO. + +Currently, migration support for confidential VMs is not available, so +vfio_sync_dirty_bitmap() handling for PrivateSharedListener can be +ignored. The register/unregister of PrivateSharedListener is necessary +during vfio_listener_region_add/del(). The listener callbacks are +similar between RamDiscardListener and PrivateSharedListener, allowing +for extraction of common parts opportunisticlly. + +Signed-off-by: Chenyi Qiang +Conflicts: + hw/vfio/container-base.c +Signed-off-by: frankyj915 +Signed-off-by: houmingyong +--- + hw/vfio/common.c | 104 +++++++++++++++++++++++--- + hw/vfio/container-base.c | 1 + + include/hw/vfio/vfio-container-base.h | 10 +++ + 3 files changed, 105 insertions(+), 10 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index ab7450f3bd..62a2000acd 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -350,13 +350,9 @@ out: + rcu_read_unlock(); + } + +-static void vfio_ram_discard_notify_discard(StateChangeListener *scl, +- MemoryRegionSection *section) ++static void vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { +- RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); +- VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, +- listener); +- VFIOContainerBase *bcontainer = vrdl->bcontainer; + const hwaddr size = int128_get64(section->size); + const hwaddr iova = section->offset_within_address_space; + int ret; +@@ -369,13 +365,28 @@ static void vfio_ram_discard_notify_discard(StateChangeListener *scl, + } + } + +-static int vfio_ram_discard_notify_populate(StateChangeListener *scl, ++static void vfio_ram_discard_notify_discard(StateChangeListener *scl, + MemoryRegionSection *section) + { + RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); +- VFIOContainerBase *bcontainer = vrdl->bcontainer; ++ vfio_state_change_notify_to_state_clear(vrdl->bcontainer, section); ++} ++ ++static void vfio_private_shared_notify_to_private(StateChangeListener *scl, ++ MemoryRegionSection *section) ++{ ++ PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl); ++ VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener, ++ listener); ++ vfio_state_change_notify_to_state_clear(vpsl->bcontainer, section); ++} ++ ++static int vfio_state_change_notify_to_state_set(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ uint64_t granularity) ++{ + const hwaddr end = section->offset_within_region + + int128_get64(section->size); + hwaddr start, next, iova; +@@ -387,7 +398,7 @@ static int vfio_ram_discard_notify_populate(StateChangeListener *scl, + * unmap in minimum granularity later. + */ + for (start = section->offset_within_region; start < end; start = next) { +- next = ROUND_UP(start + 1, vrdl->granularity); ++ next = ROUND_UP(start + 1, granularity); + next = MIN(next, end); + + iova = start - section->offset_within_region + +@@ -398,13 +409,33 @@ static int vfio_ram_discard_notify_populate(StateChangeListener *scl, + vaddr, section->readonly); + if (ret) { + /* Rollback */ +- vfio_ram_discard_notify_discard(scl, section); ++ vfio_state_change_notify_to_state_clear(bcontainer, section); + return ret; + } + } + return 0; + } + ++static int vfio_ram_discard_notify_populate(StateChangeListener *scl, ++ MemoryRegionSection *section) ++{ ++ RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); ++ VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, ++ listener); ++ return vfio_state_change_notify_to_state_set(vrdl->bcontainer, section, ++ vrdl->granularity); ++} ++ ++static int vfio_private_shared_notify_to_shared(StateChangeListener *scl, ++ MemoryRegionSection *section) ++{ ++ PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl); ++ VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener, ++ listener); ++ return vfio_state_change_notify_to_state_set(vpsl->bcontainer, section, ++ vpsl->granularity); ++} ++ + static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +@@ -481,6 +512,27 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + } + } + ++static void vfio_register_private_shared_listener(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) ++{ ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); ++ VFIOPrivateSharedListener *vpsl; ++ PrivateSharedListener *psl; ++ ++ vpsl = g_new0(VFIOPrivateSharedListener, 1); ++ vpsl->bcontainer = bcontainer; ++ vpsl->mr = section->mr; ++ vpsl->offset_within_address_space = section->offset_within_address_space; ++ vpsl->granularity = generic_state_manager_get_min_granularity(gsm, ++ section->mr); ++ ++ psl = &vpsl->listener; ++ private_shared_listener_init(psl, vfio_private_shared_notify_to_shared, ++ vfio_private_shared_notify_to_private); ++ generic_state_manager_register_listener(gsm, &psl->scl, section); ++ QLIST_INSERT_HEAD(&bcontainer->vpsl_list, vpsl, next); ++} ++ + static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +@@ -506,6 +558,31 @@ static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, + g_free(vrdl); + } + ++static void vfio_unregister_private_shared_listener(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) ++{ ++ GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); ++ VFIOPrivateSharedListener *vpsl = NULL; ++ PrivateSharedListener *psl; ++ ++ QLIST_FOREACH(vpsl, &bcontainer->vpsl_list, next) { ++ if (vpsl->mr == section->mr && ++ vpsl->offset_within_address_space == ++ section->offset_within_address_space) { ++ break; ++ } ++ } ++ ++ if (!vpsl) { ++ hw_error("vfio: Trying to unregister missing RAM discard listener"); ++ } ++ ++ psl = &vpsl->listener; ++ generic_state_manager_unregister_listener(gsm, &psl->scl); ++ QLIST_REMOVE(vpsl, next); ++ g_free(vpsl); ++} ++ + static bool vfio_known_safe_misalignment(MemoryRegionSection *section) + { + MemoryRegion *mr = section->mr; +@@ -677,6 +754,9 @@ static void vfio_listener_region_add(MemoryListener *listener, + if (memory_region_has_ram_discard_manager(section->mr)) { + vfio_register_ram_discard_listener(bcontainer, section); + return; ++ } else if (memory_region_has_private_shared_manager(section->mr)) { ++ vfio_register_private_shared_listener(bcontainer, section); ++ return; + } + + vaddr = memory_region_get_ram_ptr(section->mr) + +@@ -796,6 +876,10 @@ static void vfio_listener_region_del(MemoryListener *listener, + vfio_unregister_ram_discard_listener(bcontainer, section); + /* Unregistering will trigger an unmap. */ + try_unmap = false; ++ } else if (memory_region_has_private_shared_manager(section->mr)) { ++ vfio_unregister_private_shared_listener(bcontainer, section); ++ /* Unregistering will trigger an unmap. */ ++ try_unmap = false; + } + + if (try_unmap) { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 913ae49077..a356ae91a9 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -82,6 +82,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); ++ QLIST_INIT(&bcontainer->vpsl_list); + } + + void vfio_container_destroy(VFIOContainerBase *bcontainer) +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 7a4c575115..faed33bf92 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -46,6 +46,7 @@ typedef struct VFIOContainerBase { + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; ++ QLIST_HEAD(, VFIOPrivateSharedListener) vpsl_list; + QLIST_ENTRY(VFIOContainerBase) next; + QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; +@@ -69,6 +70,15 @@ typedef struct VFIORamDiscardListener { + QLIST_ENTRY(VFIORamDiscardListener) next; + } VFIORamDiscardListener; + ++typedef struct VFIOPrivateSharedListener { ++ VFIOContainerBase *bcontainer; ++ MemoryRegion *mr; ++ hwaddr offset_within_address_space; ++ uint64_t granularity; ++ PrivateSharedListener listener; ++ QLIST_ENTRY(VFIOPrivateSharedListener) next; ++} VFIOPrivateSharedListener; ++ + int vfio_container_dma_map(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.33.0 + diff --git a/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch b/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch deleted file mode 100644 index e13a1daf40d518a550e8af5618b2d2b537cb43cc..0000000000000000000000000000000000000000 --- a/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch +++ /dev/null @@ -1,55 +0,0 @@ -From c1de789d89132b66243fbfe253f10764ce514a08 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:12 +0530 -Subject: [PATCH] vfio: Add vfio_get_object callback to VFIODeviceOps - -Hook vfio_get_object callback for PCI devices. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Alex Williamson ---- - hw/vfio/pci.c | 8 ++++++++ - include/hw/vfio/vfio-common.h | 1 + - 2 files changed, 9 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d7a4e1875c..de0d286fc9 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2388,10 +2388,18 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) - } - } - -+static Object *vfio_pci_get_object(VFIODevice *vbasedev) -+{ -+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); -+ -+ return OBJECT(vdev); -+} -+ - static VFIODeviceOps vfio_pci_ops = { - .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, - .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, - .vfio_eoi = vfio_intx_eoi, -+ .vfio_get_object = vfio_pci_get_object, - }; - - int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 93493891ba..771b6d59a3 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -119,6 +119,7 @@ struct VFIODeviceOps { - void (*vfio_compute_needs_reset)(VFIODevice *vdev); - int (*vfio_hot_reset_multi)(VFIODevice *vdev); - void (*vfio_eoi)(VFIODevice *vdev); -+ Object *(*vfio_get_object)(VFIODevice *vdev); - }; - - typedef struct VFIOGroup { --- -2.27.0 - diff --git a/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch b/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch deleted file mode 100644 index 6479a2550592f6eff10e6c244ed3648d887ed1a6..0000000000000000000000000000000000000000 --- a/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch +++ /dev/null @@ -1,182 +0,0 @@ -From 3ac0647003d192579bcb6c1081b75d9c8ada78e0 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:23 +0530 -Subject: [PATCH] vfio: Add vfio_listener_log_sync to mark dirty pages - -vfio_listener_log_sync gets list of dirty pages from container using -VFIO_IOMMU_GET_DIRTY_BITMAP ioctl and mark those pages dirty when all -devices are stopped and saving state. -Return early for the RAM block section of mapped MMIO region. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast] -Signed-off-by: Alex Williamson ---- - hw/vfio/common.c | 116 +++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 1 + - 2 files changed, 117 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 35168b8f3e..4d2828fc97 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -29,6 +29,7 @@ - #include "hw/vfio/vfio.h" - #include "exec/address-spaces.h" - #include "exec/memory.h" -+#include "exec/ram_addr.h" - #include "hw/hw.h" - #include "qemu/error-report.h" - #include "qemu/range.h" -@@ -36,6 +37,7 @@ - #include "sysemu/kvm.h" - #include "trace.h" - #include "qapi/error.h" -+#include "migration/migration.h" - - VFIOGroupList vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_group_list); -@@ -285,6 +287,39 @@ const MemoryRegionOps vfio_region_ops = { - }, - }; - -+/* -+ * Device state interfaces -+ */ -+ -+static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) -+{ -+ VFIOGroup *group; -+ VFIODevice *vbasedev; -+ MigrationState *ms = migrate_get_current(); -+ -+ if (!migration_is_setup_or_active(ms->state)) { -+ return false; -+ } -+ -+ QLIST_FOREACH(group, &container->group_list, container_next) { -+ QLIST_FOREACH(vbasedev, &group->device_list, next) { -+ VFIOMigration *migration = vbasedev->migration; -+ -+ if (!migration) { -+ return false; -+ } -+ -+ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && -+ !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { -+ continue; -+ } else { -+ return false; -+ } -+ } -+ } -+ return true; -+} -+ - /* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ -@@ -794,9 +829,90 @@ static void vfio_listener_region_del(MemoryListener *listener, - } - } - -+static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, -+ uint64_t size, ram_addr_t ram_addr) -+{ -+ struct vfio_iommu_type1_dirty_bitmap *dbitmap; -+ struct vfio_iommu_type1_dirty_bitmap_get *range; -+ uint64_t pages; -+ int ret; -+ -+ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); -+ -+ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); -+ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; -+ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; -+ range->iova = iova; -+ range->size = size; -+ -+ /* -+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of -+ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to -+ * TARGET_PAGE_SIZE. -+ */ -+ range->bitmap.pgsize = TARGET_PAGE_SIZE; -+ -+ pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS; -+ range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / -+ BITS_PER_BYTE; -+ range->bitmap.data = g_try_malloc0(range->bitmap.size); -+ if (!range->bitmap.data) { -+ ret = -ENOMEM; -+ goto err_out; -+ } -+ -+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); -+ if (ret) { -+ error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 -+ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, -+ (uint64_t)range->size, errno); -+ goto err_out; -+ } -+ -+ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data, -+ ram_addr, pages); -+ -+ trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size, -+ range->bitmap.size, ram_addr); -+err_out: -+ g_free(range->bitmap.data); -+ g_free(dbitmap); -+ -+ return ret; -+} -+ -+static int vfio_sync_dirty_bitmap(VFIOContainer *container, -+ MemoryRegionSection *section) -+{ -+ ram_addr_t ram_addr; -+ -+ ram_addr = memory_region_get_ram_addr(section->mr) + -+ section->offset_within_region; -+ -+ return vfio_get_dirty_bitmap(container, -+ TARGET_PAGE_ALIGN(section->offset_within_address_space), -+ int128_get64(section->size), ram_addr); -+} -+ -+static void vfio_listerner_log_sync(MemoryListener *listener, -+ MemoryRegionSection *section) -+{ -+ VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ -+ if (vfio_listener_skipped_section(section) || -+ !container->dirty_pages_supported) { -+ return; -+ } -+ -+ if (vfio_devices_all_stopped_and_saving(container)) { -+ vfio_sync_dirty_bitmap(container, section); -+ } -+} -+ - static const MemoryListener vfio_memory_listener = { - .region_add = vfio_listener_region_add, - .region_del = vfio_listener_region_del, -+ .log_sync = vfio_listerner_log_sync, - }; - - static void vfio_listener_release(VFIOContainer *container) -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 4f08f5a633..4167f35d64 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -161,3 +161,4 @@ vfio_load_device_config_state(const char *name) " (%s)" - vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 - vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 - vfio_load_cleanup(const char *name) " (%s)" -+vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 --- -2.27.0 - diff --git a/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch b/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch deleted file mode 100644 index 289638a9e3c453dfe9fa9e863209ddbd5ea0489f..0000000000000000000000000000000000000000 --- a/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 6aa770f4b83ca068d0c8f3102edda32666a8404d Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Tue, 11 May 2021 10:08:15 +0800 -Subject: [PATCH] vfio: Add vfio_prereg_listener_global_log_start/stop in - nested stage - -In nested mode, we set up the stage 2 and stage 1 separately. In my -opinion, vfio_memory_prereg_listener is used for stage 2 and -vfio_memory_listener is used for stage 1. So it feels weird to call -the global_log_start/stop interface in vfio_memory_listener to switch -dirty tracking, although this won't cause any errors. Add -global_log_start/stop interface in vfio_memory_prereg_listener -can separate stage 2 from stage 1. - -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index b5f9ba816e..fb7ca63748 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1239,6 +1239,17 @@ static void vfio_listener_log_global_start(MemoryListener *listener) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); - -+ /* For nested mode, vfio_prereg_listener is used to start dirty tracking */ -+ if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) { -+ vfio_set_dirty_page_tracking(container, true); -+ } -+} -+ -+static void vfio_prereg_listener_log_global_start(MemoryListener *listener) -+{ -+ VFIOContainer *container = -+ container_of(listener, VFIOContainer, prereg_listener); -+ - vfio_set_dirty_page_tracking(container, true); - } - -@@ -1246,6 +1257,17 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); - -+ /* For nested mode, vfio_prereg_listener is used to stop dirty tracking */ -+ if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) { -+ vfio_set_dirty_page_tracking(container, false); -+ } -+} -+ -+static void vfio_prereg_listener_log_global_stop(MemoryListener *listener) -+{ -+ VFIOContainer *container = -+ container_of(listener, VFIOContainer, prereg_listener); -+ - vfio_set_dirty_page_tracking(container, false); - } - -@@ -1614,6 +1636,8 @@ static const MemoryListener vfio_memory_listener = { - static MemoryListener vfio_memory_prereg_listener = { - .region_add = vfio_prereg_listener_region_add, - .region_del = vfio_prereg_listener_region_del, -+ .log_global_start = vfio_prereg_listener_log_global_start, -+ .log_global_stop = vfio_prereg_listener_log_global_stop, - .log_sync = vfio_prereg_listener_log_sync, - .log_clear = vfio_prereg_listener_log_clear, - }; --- -2.27.0 - diff --git a/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch b/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch deleted file mode 100644 index e4da89bd477558ea9e58538c75b0c198d27e3d21..0000000000000000000000000000000000000000 --- a/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f959faa36fc100894a44f2e6cd7e02a183ba142a Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Sat, 31 Jul 2021 09:40:24 +0800 -Subject: [PATCH] vfio: Add vfio_prereg_listener_log_clear to re-enable mark - dirty pages - -When tracking dirty pages, we just need to pay attention to stage 2 -mappings. Legacy vfio_listener_log_clear cannot be used in nested -stage. This patch adds vfio_prereg_listener_log_clear to re-enable -dirty pages in nested mode. - -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 40 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 39 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 6b00bd4c2f..b5f9ba816e 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1550,6 +1550,43 @@ static int vfio_physical_log_clear(VFIOContainer *container, - return ret; - } - -+static void vfio_prereg_listener_log_clear(MemoryListener *listener, -+ MemoryRegionSection *section) -+{ -+ VFIOContainer *container = -+ container_of(listener, VFIOContainer, prereg_listener); -+ -+ if (!memory_region_is_ram(section->mr)) { -+ return; -+ } -+ -+ vfio_physical_log_clear(container, section); -+} -+ -+static int vfio_clear_dirty_bitmap(VFIOContainer *container, -+ MemoryRegionSection *section) -+{ -+ if (memory_region_is_iommu(section->mr)) { -+ /* -+ * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are -+ * set up separately. It is inappropriate to pass 'giova' to kernel -+ * to get dirty pages. We only need to focus on stage 2 mapping when -+ * marking dirty pages. -+ */ -+ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { -+ return 0; -+ } -+ -+ /* -+ * TODO: x86. With the log_clear() interface added, x86 may inplement -+ * its own method. -+ */ -+ } -+ -+ /* Here we assume that memory_region_is_ram(section->mr) == true */ -+ return vfio_physical_log_clear(container, section); -+} -+ - static void vfio_listener_log_clear(MemoryListener *listener, - MemoryRegionSection *section) - { -@@ -1561,7 +1598,7 @@ static void vfio_listener_log_clear(MemoryListener *listener, - } - - if (vfio_devices_all_dirty_tracking(container)) { -- vfio_physical_log_clear(container, section); -+ vfio_clear_dirty_bitmap(container, section); - } - } - -@@ -1578,6 +1615,7 @@ static MemoryListener vfio_memory_prereg_listener = { - .region_add = vfio_prereg_listener_region_add, - .region_del = vfio_prereg_listener_region_del, - .log_sync = vfio_prereg_listener_log_sync, -+ .log_clear = vfio_prereg_listener_log_clear, - }; - - static void vfio_listener_release(VFIOContainer *container) --- -2.27.0 - diff --git a/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch b/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch deleted file mode 100644 index 77a0c8a14d29280b369466b1fa9b55dc62c26228..0000000000000000000000000000000000000000 --- a/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 4c5350044ac2f61ab8088278b59eb6388ca49ff1 Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Tue, 11 May 2021 10:08:14 +0800 -Subject: [PATCH] vfio: Add vfio_prereg_listener_log_sync in nested stage - -In nested mode, we set up the stage 2 (gpa->hpa)and stage 1 -(giova->gpa) separately by vfio_prereg_listener_region_add() -and vfio_listener_region_add(). So when marking dirty pages -we just need to pay attention to stage 2 mappings. - -Legacy vfio_listener_log_sync cannot be used in nested stage. -This patch adds vfio_prereg_listener_log_sync to mark dirty -pages in nested mode. - -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 5176fd3a3d..6b00bd4c2f 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1317,6 +1317,22 @@ static int vfio_dma_sync_ram_section_dirty_bitmap(VFIOContainer *container, - int128_get64(section->size), ram_addr); - } - -+static void vfio_prereg_listener_log_sync(MemoryListener *listener, -+ MemoryRegionSection *section) -+{ -+ VFIOContainer *container = -+ container_of(listener, VFIOContainer, prereg_listener); -+ -+ if (!memory_region_is_ram(section->mr) || -+ !container->dirty_pages_supported) { -+ return; -+ } -+ -+ if (vfio_devices_all_dirty_tracking(container)) { -+ vfio_dma_sync_ram_section_dirty_bitmap(container, section); -+ } -+} -+ - typedef struct { - IOMMUNotifier n; - VFIOGuestIOMMU *giommu; -@@ -1361,6 +1377,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - -+ /* -+ * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are -+ * set up separately. It is inappropriate to pass 'giova' to kernel -+ * to get dirty pages. We only need to focus on stage 2 mapping when -+ * marking dirty pages. -+ */ -+ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { -+ return 0; -+ } -+ - QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu) == section->mr && - giommu->n.start == section->offset_within_region) { -@@ -1551,6 +1577,7 @@ static const MemoryListener vfio_memory_listener = { - static MemoryListener vfio_memory_prereg_listener = { - .region_add = vfio_prereg_listener_region_add, - .region_del = vfio_prereg_listener_region_del, -+ .log_sync = vfio_prereg_listener_log_sync, - }; - - static void vfio_listener_release(VFIOContainer *container) --- -2.27.0 - diff --git a/vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch b/vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch deleted file mode 100644 index e65b3a8fe54f0fbc3fc5a00949632e67798ce788..0000000000000000000000000000000000000000 --- a/vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 8113fdcf0c1383ae5b9542563656bea3753d834e Mon Sep 17 00:00:00 2001 -From: Shenming Lu -Date: Wed, 10 Mar 2021 11:02:33 +0800 -Subject: [PATCH] vfio: Avoid disabling and enabling vectors repeatedly in VFIO - migration - -In VFIO migration resume phase and some guest startups, there are -already unmasked vectors in the vector table when calling -vfio_msix_enable(). So in order to avoid inefficiently disabling -and enabling vectors repeatedly, let's allocate all needed vectors -first and then enable these unmasked vectors one by one without -disabling. - -Signed-off-by: Shenming Lu -Message-Id: <20210310030233.1133-4-lushenming@huawei.com> -Signed-off-by: Alex Williamson ---- - hw/vfio/pci.c | 20 +++++++++++++++++--- - 1 file changed, 17 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index a637c35e7a..da7c740bce 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -563,6 +563,9 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) - - static void vfio_msix_enable(VFIOPCIDevice *vdev) - { -+ PCIDevice *pdev = &vdev->pdev; -+ unsigned int nr, max_vec = 0; -+ - vfio_disable_interrupts(vdev); - - vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); -@@ -581,11 +584,22 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) - * triggering to userspace, then immediately release the vector, leaving - * the physical device with no vectors enabled, but MSI-X enabled, just - * like the guest view. -+ * If there are already unmasked vectors (in migration resume phase and -+ * some guest startups) which will be enabled soon, we can allocate all -+ * of them here to avoid inefficiently disabling and enabling vectors -+ * repeatedly later. - */ -- vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL); -- vfio_msix_vector_release(&vdev->pdev, 0); -+ if (!pdev->msix_function_masked) { -+ for (nr = 0; nr < msix_nr_vectors_allocated(pdev); nr++) { -+ if (!msix_is_masked(pdev, nr)) { -+ max_vec = nr; -+ } -+ } -+ } -+ vfio_msix_vector_do_use(pdev, max_vec, NULL, NULL); -+ vfio_msix_vector_release(pdev, max_vec); - -- if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, -+ if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use, - vfio_msix_vector_release, NULL)) { - error_report("vfio: msix_set_vector_notifiers failed"); - } --- -2.27.0 - diff --git a/vfio-Change-default-dirty-pages-tracking-behavior-du.patch b/vfio-Change-default-dirty-pages-tracking-behavior-du.patch deleted file mode 100644 index d34f0541c8589124e35a10bb220be59e64f21e53..0000000000000000000000000000000000000000 --- a/vfio-Change-default-dirty-pages-tracking-behavior-du.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 69d1cc17c0a77dbd0d8e811cfaa899b01bf2e5bc Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 23 Nov 2020 19:53:19 +0530 -Subject: [PATCH] vfio: Change default dirty pages tracking behavior during - migration - -By default dirty pages tracking is enabled during iterative phase -(pre-copy phase). -Added per device opt-out option 'x-pre-copy-dirty-page-tracking' to -disable dirty pages tracking during iterative phase. If the option -'x-pre-copy-dirty-page-tracking=off' is set for any VFIO device, dirty -pages tracking during iterative phase will be disabled. - -Signed-off-by: Kirti Wankhede -Signed-off-by: Alex Williamson -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 11 +++++++---- - hw/vfio/pci.c | 3 +++ - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 11 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index a86a4c4506..d9cc3509ef 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -310,7 +310,7 @@ bool vfio_mig_active(void) - return true; - } - --static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) -+static bool vfio_devices_all_saving(VFIOContainer *container) - { - VFIOGroup *group; - VFIODevice *vbasedev; -@@ -328,8 +328,11 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) - return false; - } - -- if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && -- !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { -+ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { -+ if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) -+ && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { -+ return false; -+ } - continue; - } else { - return false; -@@ -1088,7 +1091,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, - return; - } - -- if (vfio_devices_all_stopped_and_saving(container)) { -+ if (vfio_devices_all_saving(container)) { - vfio_sync_dirty_bitmap(container, section); - } - } -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2795b8bd12..3641ad0c5c 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3180,6 +3180,9 @@ static void vfio_instance_init(Object *obj) - static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), - DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), -+ DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, -+ vbasedev.pre_copy_dirty_page_tracking, -+ ON_OFF_AUTO_ON), - DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice, - display, ON_OFF_AUTO_OFF), - DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0), -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 7398631d4c..475aa9fb40 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -130,6 +130,7 @@ typedef struct VFIODevice { - unsigned int flags; - VFIOMigration *migration; - Error *migration_blocker; -+ OnOffAuto pre_copy_dirty_page_tracking; - } VFIODevice; - - struct VFIODeviceOps { --- -2.27.0 - diff --git a/vfio-Create-host-IOMMU-device-instance.patch b/vfio-Create-host-IOMMU-device-instance.patch new file mode 100644 index 0000000000000000000000000000000000000000..03b681ffd6c68dc6ddfae96fb29dd4045596ffc6 --- /dev/null +++ b/vfio-Create-host-IOMMU-device-instance.patch @@ -0,0 +1,124 @@ +From a152921f6d534f2a515b4e88304ad115fae8fa8f Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:37 +0800 +Subject: [PATCH] vfio: Create host IOMMU device instance +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Create host IOMMU device instance in vfio_attach_device() and call +.realize() to initialize it further. + +Introuduce attribute VFIOIOMMUClass::hiod_typename and initialize +it based on VFIO backend type. It will facilitate HostIOMMUDevice +creation in vfio_attach_device(). + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/vfio/common.c | 18 +++++++++++++++++- + hw/vfio/container.c | 2 ++ + hw/vfio/iommufd.c | 2 ++ + include/hw/vfio/vfio-common.h | 1 + + include/hw/vfio/vfio-container-base.h | 3 +++ + 5 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b5d02df0c2..d5ff65f90a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1650,6 +1650,8 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + { + const VFIOIOMMUClass *ops = + VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); ++ HostIOMMUDevice *hiod = NULL; ++ int ret; + + if (vbasedev->iommufd) { + ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); +@@ -1657,7 +1659,20 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + + assert(ops); + +- return ops->attach_device(name, vbasedev, as, errp); ++ ret = ops->attach_device(name, vbasedev, as, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); ++ if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) { ++ object_unref(hiod); ++ ops->detach_device(vbasedev); ++ return -1; ++ } ++ vbasedev->hiod = hiod; ++ ++ return 0; + } + + void vfio_detach_device(VFIODevice *vbasedev) +@@ -1665,5 +1680,6 @@ void vfio_detach_device(VFIODevice *vbasedev) + if (!vbasedev->bcontainer) { + return; + } ++ object_unref(vbasedev->hiod); + vbasedev->bcontainer->ops->detach_device(vbasedev); + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index ed54ce6d0c..10f7635425 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1240,6 +1240,8 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) + { + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + ++ vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO; ++ + vioc->setup = vfio_legacy_setup; + vioc->dma_map = vfio_legacy_dma_map; + vioc->dma_unmap = vfio_legacy_dma_unmap; +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 2efdba5565..7cbf0e44f1 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -629,6 +629,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) + { + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + ++ vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO; ++ + vioc->dma_map = iommufd_cdev_map; + vioc->dma_unmap = iommufd_cdev_unmap; + vioc->attach_device = iommufd_cdev_attach; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 376b8350b9..d45d40c329 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -140,6 +140,7 @@ typedef struct VFIODevice { + OnOffAuto pre_copy_dirty_page_tracking; + bool dirty_pages_supported; + bool dirty_tracking; ++ HostIOMMUDevice *hiod; + int devid; + IOMMUFDBackend *iommufd; + } VFIODevice; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index b2813b0c11..7a4c575115 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -109,6 +109,9 @@ DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) + struct VFIOIOMMUClass { + InterfaceClass parent_class; + ++ /* Properties */ ++ const char *hiod_typename; ++ + /* basic feature */ + int (*setup)(VFIOContainerBase *bcontainer, Error **errp); + int (*dma_map)(const VFIOContainerBase *bcontainer, +-- +2.41.0.windows.1 + diff --git a/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch b/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch deleted file mode 100644 index 65949f079ed1eb2baea2e626fbd4d6140d23350c..0000000000000000000000000000000000000000 --- a/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch +++ /dev/null @@ -1,162 +0,0 @@ -From a400753d0f1a008367165aadf375abfe86a66ed7 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:24 +0530 -Subject: [PATCH] vfio: Dirty page tracking when vIOMMU is enabled - -When vIOMMU is enabled, register MAP notifier from log_sync when all -devices in container are in stop and copy phase of migration. Call replay -and get dirty pages from notifier callback. - -Suggested-by: Alex Williamson -Signed-off-by: Kirti Wankhede -Reviewed-by: Yan Zhao -Signed-off-by: Alex Williamson ---- - hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++--- - hw/vfio/trace-events | 1 + - 2 files changed, 83 insertions(+), 6 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 4d2828fc97..8773b998ac 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -441,8 +441,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) - } - - /* Called with rcu_read_lock held. */ --static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr, -- bool *read_only) -+static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, -+ ram_addr_t *ram_addr, bool *read_only) - { - MemoryRegion *mr; - hwaddr xlat; -@@ -473,8 +473,17 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr, - return false; - } - -- *vaddr = memory_region_get_ram_ptr(mr) + xlat; -- *read_only = !writable || mr->readonly; -+ if (vaddr) { -+ *vaddr = memory_region_get_ram_ptr(mr) + xlat; -+ } -+ -+ if (ram_addr) { -+ *ram_addr = memory_region_get_ram_addr(mr) + xlat; -+ } -+ -+ if (read_only) { -+ *read_only = !writable || mr->readonly; -+ } - - return true; - } -@@ -484,7 +493,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); - VFIOContainer *container = giommu->container; - hwaddr iova = iotlb->iova + giommu->iommu_offset; -- bool read_only; - void *vaddr; - int ret; - -@@ -500,7 +508,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - rcu_read_lock(); - - if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { -- if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) { -+ bool read_only; -+ -+ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) { - goto out; - } - /* -@@ -881,11 +891,77 @@ err_out: - return ret; - } - -+typedef struct { -+ IOMMUNotifier n; -+ VFIOGuestIOMMU *giommu; -+} vfio_giommu_dirty_notifier; -+ -+static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) -+{ -+ vfio_giommu_dirty_notifier *gdn = container_of(n, -+ vfio_giommu_dirty_notifier, n); -+ VFIOGuestIOMMU *giommu = gdn->giommu; -+ VFIOContainer *container = giommu->container; -+ hwaddr iova = iotlb->iova + giommu->iommu_offset; -+ ram_addr_t translated_addr; -+ -+ trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask); -+ -+ if (iotlb->target_as != &address_space_memory) { -+ error_report("Wrong target AS \"%s\", only system memory is allowed", -+ iotlb->target_as->name ? iotlb->target_as->name : "none"); -+ return; -+ } -+ -+ rcu_read_lock(); -+ if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { -+ int ret; -+ -+ ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, -+ translated_addr); -+ if (ret) { -+ error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " -+ "0x%"HWADDR_PRIx") = %d (%m)", -+ container, iova, -+ iotlb->addr_mask + 1, ret); -+ } -+ } -+ rcu_read_unlock(); -+} -+ - static int vfio_sync_dirty_bitmap(VFIOContainer *container, - MemoryRegionSection *section) - { - ram_addr_t ram_addr; - -+ if (memory_region_is_iommu(section->mr)) { -+ VFIOGuestIOMMU *giommu; -+ -+ QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { -+ if (MEMORY_REGION(giommu->iommu) == section->mr && -+ giommu->n.start == section->offset_within_region) { -+ Int128 llend; -+ vfio_giommu_dirty_notifier gdn = { .giommu = giommu }; -+ int idx = memory_region_iommu_attrs_to_index(giommu->iommu, -+ MEMTXATTRS_UNSPECIFIED); -+ -+ llend = int128_add(int128_make64(section->offset_within_region), -+ section->size); -+ llend = int128_sub(llend, int128_one()); -+ -+ iommu_notifier_init(&gdn.n, -+ vfio_iommu_map_dirty_notify, -+ IOMMU_NOTIFIER_MAP, -+ section->offset_within_region, -+ int128_get64(llend), -+ idx); -+ memory_region_iommu_replay(giommu->iommu, &gdn.n); -+ break; -+ } -+ } -+ return 0; -+ } -+ - ram_addr = memory_region_get_ram_addr(section->mr) + - section->offset_within_region; - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 4167f35d64..575ebde6e0 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -162,3 +162,4 @@ vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 - vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 - vfio_load_cleanup(const char *name) " (%s)" - vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 -+vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 --- -2.27.0 - diff --git a/vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch b/vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch deleted file mode 100644 index 47d59923070d7827152f59a60304ef708bcc1c62..0000000000000000000000000000000000000000 --- a/vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 8dc6e7ccc5712aee457ffb1f6cf1bf3f80e778d5 Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Thu, 27 May 2021 20:31:01 +0800 -Subject: [PATCH] vfio: Fix unregister SaveVMHandler in vfio_migration_finalize - -In the vfio_migration_init(), the SaveVMHandler is registered for -VFIO device. But it lacks the operation of 'unregister'. It will -lead to 'Segmentation fault (core dumped)' in -qemu_savevm_state_setup(), if performing live migration after a -VFIO device is hot deleted. - -Fixes: cd5b58f2ba (vfio: Register SaveVMHandlers for VFIO device) -Reported-by: Qixin Gan -Signed-off-by: Kunkun Jiang -Message-Id: <20210527123101.289-1-jiangkunkun@huawei.com> -Reviewed by: Kirti Wankhede -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index f1f006d584..d9e0e12824 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -893,6 +893,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) - - remove_migration_state_change_notifier(&migration->migration_state); - qemu_del_vm_change_state_handler(migration->vm_state); -+ unregister_savevm(vbasedev->dev, "vfio", vbasedev); - vfio_migration_exit(vbasedev); - } - --- -2.27.0 - diff --git a/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch b/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch deleted file mode 100644 index 6a2324b57811f6d375bbc7f795dc07f78baa42e2..0000000000000000000000000000000000000000 --- a/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 094aca3a87e63a0e6ae01b22f382c21dd91bb03e Mon Sep 17 00:00:00 2001 -From: Zenghui Yu -Date: Fri, 4 Dec 2020 09:42:40 +0800 -Subject: [PATCH] vfio: Fix vfio_listener_log_sync function name typo - -There is an obvious typo in the function name of the .log_sync() callback. -Spell it correctly. - -Signed-off-by: Zenghui Yu -Message-Id: <20201204014240.772-1-yuzenghui@huawei.com> -Signed-off-by: Alex Williamson -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index d9cc3509ef..ebd701faa0 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1081,7 +1081,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - int128_get64(section->size), ram_addr); - } - --static void vfio_listerner_log_sync(MemoryListener *listener, -+static void vfio_listener_log_sync(MemoryListener *listener, - MemoryRegionSection *section) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); -@@ -1099,7 +1099,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, - static const MemoryListener vfio_memory_listener = { - .region_add = vfio_listener_region_add, - .region_del = vfio_listener_region_del, -- .log_sync = vfio_listerner_log_sync, -+ .log_sync = vfio_listener_log_sync, - }; - - static void vfio_listener_release(VFIOContainer *container) --- -2.27.0 - diff --git a/vfio-Force-nested-if-iommu-requires-it.patch b/vfio-Force-nested-if-iommu-requires-it.patch deleted file mode 100644 index 6a6b9da3f1ebd6c44f6a298a9c456351a8a93fcd..0000000000000000000000000000000000000000 --- a/vfio-Force-nested-if-iommu-requires-it.patch +++ /dev/null @@ -1,100 +0,0 @@ -From e4122a95a30cd58e1cd6e1742928e68aa94fd7ee Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 28 Aug 2018 16:16:20 +0200 -Subject: [PATCH] vfio: Force nested if iommu requires it - -In case we detect the address space is translated by -a virtual IOMMU which requires HW nested paging to -integrate with VFIO, let's set up the container with -the VFIO_TYPE1_NESTING_IOMMU iommu_type. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 36 ++++++++++++++++++++++++++++-------- - 1 file changed, 28 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index fefa2ccfdf..c78b58d365 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1683,27 +1683,38 @@ static void vfio_put_address_space(VFIOAddressSpace *space) - * vfio_get_iommu_type - selects the richest iommu_type (v2 first) - */ - static int vfio_get_iommu_type(VFIOContainer *container, -+ bool want_nested, - Error **errp) - { -- int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, -+ int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU, -+ VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, - VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU }; -- int i; -+ int i, ret = -EINVAL; - - for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { - if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { -- return iommu_types[i]; -+ if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) { -+ continue; -+ } -+ ret = iommu_types[i]; -+ break; - } - } -- error_setg(errp, "No available IOMMU models"); -- return -EINVAL; -+ if (ret < 0) { -+ error_setg(errp, "No available IOMMU models"); -+ } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) { -+ error_setg(errp, "Nested mode requested but not supported"); -+ ret = -EINVAL; -+ } -+ return ret; - } - - static int vfio_init_container(VFIOContainer *container, int group_fd, -- Error **errp) -+ bool want_nested, Error **errp) - { - int iommu_type, dirty_log_manual_clear, ret; - -- iommu_type = vfio_get_iommu_type(container, errp); -+ iommu_type = vfio_get_iommu_type(container, want_nested, errp); - if (iommu_type < 0) { - return iommu_type; - } -@@ -1815,6 +1826,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - VFIOContainer *container; - int ret, fd; - VFIOAddressSpace *space; -+ IOMMUMemoryRegion *iommu_mr; -+ bool nested = false; -+ -+ if (memory_region_is_iommu(as->root)) { -+ iommu_mr = IOMMU_MEMORY_REGION(as->root); -+ memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED, -+ (void *)&nested); -+ } - - space = vfio_get_address_space(as); - -@@ -1879,13 +1898,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - QLIST_INIT(&container->hostwin_list); - QLIST_INIT(&container->dma_list); - -- ret = vfio_init_container(container, group->fd, errp); -+ ret = vfio_init_container(container, group->fd, nested, errp); - if (ret) { - goto free_container_exit; - } - trace_vfio_connect_new_container(group->groupid, container->fd); - - switch (container->iommu_type) { -+ case VFIO_TYPE1_NESTING_IOMMU: - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: - { --- -2.27.0 - diff --git a/vfio-Get-migration-capability-flags-for-container.patch b/vfio-Get-migration-capability-flags-for-container.patch deleted file mode 100644 index 88b9bb7e6fdfe4f4a75808bbdcb5ec45d354ae15..0000000000000000000000000000000000000000 --- a/vfio-Get-migration-capability-flags-for-container.patch +++ /dev/null @@ -1,186 +0,0 @@ -From fc49c9cbf2deba53370f48ad9db2adc5f6ceb3ba Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:21 +0530 -Subject: [PATCH] vfio: Get migration capability flags for container - -Added helper functions to get IOMMU info capability chain. -Added function to get migration capability information from that -capability chain for IOMMU container. - -Similar change was proposed earlier: -https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html - -Disable migration for devices if IOMMU module doesn't support migration -capability. - -Signed-off-by: Kirti Wankhede -Cc: Shameer Kolothum -Cc: Eric Auger -Signed-off-by: Alex Williamson ---- - hw/vfio/common.c | 90 +++++++++++++++++++++++++++++++---- - hw/vfio/migration.c | 7 ++- - include/hw/vfio/vfio-common.h | 3 ++ - 3 files changed, 91 insertions(+), 9 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 4c32b1bb99..35168b8f3e 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1210,6 +1210,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, - return 0; - } - -+static int vfio_get_iommu_info(VFIOContainer *container, -+ struct vfio_iommu_type1_info **info) -+{ -+ -+ size_t argsz = sizeof(struct vfio_iommu_type1_info); -+ -+ *info = g_new0(struct vfio_iommu_type1_info, 1); -+again: -+ (*info)->argsz = argsz; -+ -+ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { -+ g_free(*info); -+ *info = NULL; -+ return -errno; -+ } -+ -+ if (((*info)->argsz > argsz)) { -+ argsz = (*info)->argsz; -+ *info = g_realloc(*info, argsz); -+ goto again; -+ } -+ -+ return 0; -+} -+ -+static struct vfio_info_cap_header * -+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) -+{ -+ struct vfio_info_cap_header *hdr; -+ void *ptr = info; -+ -+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { -+ return NULL; -+ } -+ -+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { -+ if (hdr->id == id) { -+ return hdr; -+ } -+ } -+ -+ return NULL; -+} -+ -+static void vfio_get_iommu_info_migration(VFIOContainer *container, -+ struct vfio_iommu_type1_info *info) -+{ -+ struct vfio_info_cap_header *hdr; -+ struct vfio_iommu_type1_info_cap_migration *cap_mig; -+ -+ hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); -+ if (!hdr) { -+ return; -+ } -+ -+ cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, -+ header); -+ -+ /* -+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of -+ * TARGET_PAGE_SIZE to mark those dirty. -+ */ -+ if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) { -+ container->dirty_pages_supported = true; -+ container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; -+ container->dirty_pgsizes = cap_mig->pgsize_bitmap; -+ } -+} -+ - static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) - { -@@ -1273,6 +1342,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container = g_malloc0(sizeof(*container)); - container->space = space; - container->fd = fd; -+ container->dirty_pages_supported = false; - QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->hostwin_list); - -@@ -1285,7 +1355,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: - { -- struct vfio_iommu_type1_info info; -+ struct vfio_iommu_type1_info *info; - - /* - * FIXME: This assumes that a Type1 IOMMU can map any 64-bit -@@ -1294,15 +1364,19 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - * existing Type1 IOMMUs generally support any IOVA we're - * going to actually try in practice. - */ -- info.argsz = sizeof(info); -- ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); -- /* Ignore errors */ -- if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { -+ ret = vfio_get_iommu_info(container, &info); -+ -+ if (ret || !(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { - /* Assume 4k IOVA page size */ -- info.iova_pgsizes = 4096; -+ info->iova_pgsizes = 4096; - } -- vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); -- container->pgsizes = info.iova_pgsizes; -+ vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); -+ container->pgsizes = info->iova_pgsizes; -+ -+ if (!ret) { -+ vfio_get_iommu_info_migration(container, info); -+ } -+ g_free(info); - break; - } - case VFIO_SPAPR_TCE_v2_IOMMU: -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 954c064435..0d2bd9e5cd 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -832,9 +832,14 @@ err: - - int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) - { -+ VFIOContainer *container = vbasedev->group->container; - struct vfio_region_info *info = NULL; - Error *local_err = NULL; -- int ret; -+ int ret = -ENOTSUP; -+ -+ if (!container->dirty_pages_supported) { -+ goto add_blocker; -+ } - - ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, - VFIO_REGION_SUBTYPE_MIGRATION, &info); -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index c825524606..8fd0212264 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -84,6 +84,9 @@ typedef struct VFIOContainer { - unsigned iommu_type; - int error; - bool initialized; -+ bool dirty_pages_supported; -+ uint64_t dirty_pgsizes; -+ uint64_t max_dirty_bitmap_size; - unsigned long pgsizes; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; --- -2.27.0 - diff --git a/vfio-Helper-to-get-IRQ-info-including-capabilities.patch b/vfio-Helper-to-get-IRQ-info-including-capabilities.patch deleted file mode 100644 index 16f16d32faa4d793056700a52ef33b23716801c1..0000000000000000000000000000000000000000 --- a/vfio-Helper-to-get-IRQ-info-including-capabilities.patch +++ /dev/null @@ -1,178 +0,0 @@ -From 43fd039dcfee221eb3f86a2cf7deb287cc04e5ad Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 20 Jun 2019 16:39:57 +0200 -Subject: [PATCH] vfio: Helper to get IRQ info including capabilities - -As done for vfio regions, add helpers to retrieve irq info -including their optional capabilities. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 97 +++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 1 + - include/hw/vfio/vfio-common.h | 7 +++ - 3 files changed, 105 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index db9af3b0e5..98dc9e6f84 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1565,6 +1565,25 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) - return NULL; - } - -+struct vfio_info_cap_header * -+vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id) -+{ -+ struct vfio_info_cap_header *hdr; -+ void *ptr = info; -+ -+ if (!(info->flags & VFIO_IRQ_INFO_FLAG_CAPS)) { -+ return NULL; -+ } -+ -+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { -+ if (hdr->id == id) { -+ return hdr; -+ } -+ } -+ -+ return NULL; -+} -+ - static int vfio_setup_region_sparse_mmaps(VFIORegion *region, - struct vfio_region_info *info) - { -@@ -2499,6 +2518,33 @@ retry: - return 0; - } - -+int vfio_get_irq_info(VFIODevice *vbasedev, int index, -+ struct vfio_irq_info **info) -+{ -+ size_t argsz = sizeof(struct vfio_irq_info); -+ -+ *info = g_malloc0(argsz); -+ -+ (*info)->index = index; -+retry: -+ (*info)->argsz = argsz; -+ -+ if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, *info)) { -+ g_free(*info); -+ *info = NULL; -+ return -errno; -+ } -+ -+ if ((*info)->argsz > argsz) { -+ argsz = (*info)->argsz; -+ *info = g_realloc(*info, argsz); -+ -+ goto retry; -+ } -+ -+ return 0; -+} -+ - int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, - uint32_t subtype, struct vfio_region_info **info) - { -@@ -2534,6 +2580,42 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, - return -ENODEV; - } - -+int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type, -+ uint32_t subtype, struct vfio_irq_info **info) -+{ -+ int i; -+ -+ for (i = 0; i < vbasedev->num_irqs; i++) { -+ struct vfio_info_cap_header *hdr; -+ struct vfio_irq_info_cap_type *cap_type; -+ -+ if (vfio_get_irq_info(vbasedev, i, info)) { -+ continue; -+ } -+ -+ hdr = vfio_get_irq_info_cap(*info, VFIO_IRQ_INFO_CAP_TYPE); -+ if (!hdr) { -+ g_free(*info); -+ continue; -+ } -+ -+ cap_type = container_of(hdr, struct vfio_irq_info_cap_type, header); -+ -+ trace_vfio_get_dev_irq(vbasedev->name, i, -+ cap_type->type, cap_type->subtype); -+ -+ if (cap_type->type == type && cap_type->subtype == subtype) { -+ return 0; -+ } -+ -+ g_free(*info); -+ } -+ -+ *info = NULL; -+ return -ENODEV; -+} -+ -+ - bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) - { - struct vfio_region_info *info = NULL; -@@ -2549,6 +2631,21 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) - return ret; - } - -+bool vfio_has_irq_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) -+{ -+ struct vfio_region_info *info = NULL; -+ bool ret = false; -+ -+ if (!vfio_get_region_info(vbasedev, region, &info)) { -+ if (vfio_get_region_info_cap(info, cap_type)) { -+ ret = true; -+ } -+ g_free(info); -+ } -+ -+ return ret; -+} -+ - /* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 247b72c1eb..54e10046f5 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -117,6 +117,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re - vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" - vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" - vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" -+vfio_get_dev_irq(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" - vfio_dma_unmap_overflow_workaround(void) "" - vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d" - vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index b175158138..a82962ab16 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -238,6 +238,13 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, - bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); - struct vfio_info_cap_header * - vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id); -+int vfio_get_irq_info(VFIODevice *vbasedev, int index, -+ struct vfio_irq_info **info); -+int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type, -+ uint32_t subtype, struct vfio_irq_info **info); -+bool vfio_has_irq_cap(VFIODevice *vbasedev, int irq, uint16_t cap_type); -+struct vfio_info_cap_header * -+vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id); - #endif - extern const MemoryListener vfio_prereg_listener; - --- -2.27.0 - diff --git a/vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch b/vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch new file mode 100644 index 0000000000000000000000000000000000000000..f6c11b9690f0a6e0b8e15e52d6448952055c72b9 --- /dev/null +++ b/vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch @@ -0,0 +1,145 @@ +From 65c5381ba3ce5f062f0be9aa796e68b8a9d6bb3c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:53:02 +0800 +Subject: [PATCH] vfio: Introduce a helper function to initialize VFIODevice +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce a helper function to replace the common code to initialize +VFIODevice in pci, platform, ap and ccw VFIO device. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/ap.c | 8 ++------ + hw/vfio/ccw.c | 8 ++------ + hw/vfio/helpers.c | 11 +++++++++++ + hw/vfio/pci.c | 6 ++---- + hw/vfio/platform.c | 6 ++---- + include/hw/vfio/vfio-common.h | 2 ++ + 6 files changed, 21 insertions(+), 20 deletions(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index 95fe7cd98b..e157aa1ff7 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -226,18 +226,14 @@ static void vfio_ap_instance_init(Object *obj) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); + VFIODevice *vbasedev = &vapdev->vdev; + +- vbasedev->type = VFIO_DEVICE_TYPE_AP; +- vbasedev->ops = &vfio_ap_ops; +- vbasedev->dev = DEVICE(vapdev); +- vbasedev->fd = -1; +- + /* + * vfio-ap devices operate in a way compatible with discarding of + * memory in RAM blocks, as no pages are pinned in the host. + * This needs to be set before vfio_get_device() for vfio common to + * handle ram_block_discard_disable(). + */ +- vbasedev->ram_block_discard_allowed = true; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, ++ DEVICE(vapdev), true); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 6305a4c1b8..90e4a53437 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -683,11 +683,6 @@ static void vfio_ccw_instance_init(Object *obj) + VFIOCCWDevice *vcdev = VFIO_CCW(obj); + VFIODevice *vbasedev = &vcdev->vdev; + +- vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->ops = &vfio_ccw_ops; +- vbasedev->dev = DEVICE(vcdev); +- vbasedev->fd = -1; +- + /* + * All vfio-ccw devices are believed to operate in a way compatible with + * discarding of memory in RAM blocks, ie. pages pinned in the host are +@@ -696,7 +691,8 @@ static void vfio_ccw_instance_init(Object *obj) + * needs to be set before vfio_get_device() for vfio common to handle + * ram_block_discard_disable(). + */ +- vbasedev->ram_block_discard_allowed = true; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops, ++ DEVICE(vcdev), true); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 3592c3d54e..6789870802 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -652,3 +652,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) + } + vbasedev->fd = fd; + } ++ ++void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, ++ DeviceState *dev, bool ram_discard) ++{ ++ vbasedev->type = type; ++ vbasedev->ops = ops; ++ vbasedev->dev = dev; ++ vbasedev->fd = -1; ++ ++ vbasedev->ram_block_discard_allowed = ram_discard; ++} +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 87405584d7..1874ec1aba 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3327,10 +3327,8 @@ static void vfio_instance_init(Object *obj) + vdev->host.slot = ~0U; + vdev->host.function = ~0U; + +- vbasedev->type = VFIO_DEVICE_TYPE_PCI; +- vbasedev->ops = &vfio_pci_ops; +- vbasedev->dev = DEVICE(vdev); +- vbasedev->fd = -1; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops, ++ DEVICE(vdev), false); + + vdev->nv_gpudirect_clique = 0xFF; + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 506eb8193f..a8d9b7da63 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -657,10 +657,8 @@ static void vfio_platform_instance_init(Object *obj) + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); + VFIODevice *vbasedev = &vdev->vbasedev; + +- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; +- vbasedev->ops = &vfio_platform_ops; +- vbasedev->dev = DEVICE(vdev); +- vbasedev->fd = -1; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops, ++ DEVICE(vdev), false); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 37f01410d5..151b2ab65f 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -271,4 +271,6 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, + /* Returns 0 on success, or a negative errno. */ + int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); + void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); ++void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, ++ DeviceState *dev, bool ram_discard); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch b/vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b32a5de7f63c5d499b9051e0df7414272ce30d2 --- /dev/null +++ b/vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch @@ -0,0 +1,121 @@ +From 166ecdd78a0f5cf359c0cbb4f7a5c32beee12fd7 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:18 +0800 +Subject: [PATCH] vfio: Introduce base object for VFIOContainer and targeted + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce a dumb VFIOContainerBase object and its targeted interface. +This is willingly not a QOM object because we don't want it to be +visible from the user interface. The VFIOContainerBase will be +smoothly populated in subsequent patches as well as interfaces. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + include/hw/vfio/vfio-common.h | 8 ++--- + include/hw/vfio/vfio-container-base.h | 50 +++++++++++++++++++++++++++ + 2 files changed, 52 insertions(+), 6 deletions(-) + create mode 100644 include/hw/vfio/vfio-container-base.h + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index fd9828d50b..c89b5886f2 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -30,6 +30,7 @@ + #include + #endif + #include "sysemu/sysemu.h" ++#include "hw/vfio/vfio-container-base.h" + + #define VFIO_MSG_PREFIX "vfio %s: " + +@@ -89,6 +90,7 @@ typedef struct VFIODMARange { + } VFIODMARange; + + typedef struct VFIOContainer { ++ VFIOContainerBase bcontainer; + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener listener; +@@ -211,12 +213,6 @@ typedef struct VFIODisplay { + } dmabuf; + } VFIODisplay; + +-typedef struct { +- unsigned long *bitmap; +- hwaddr size; +- hwaddr pages; +-} VFIOBitmap; +- + VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +new file mode 100644 +index 0000000000..1d6daaea5d +--- /dev/null ++++ b/include/hw/vfio/vfio-container-base.h +@@ -0,0 +1,50 @@ ++/* ++ * VFIO BASE CONTAINER ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H ++#define HW_VFIO_VFIO_CONTAINER_BASE_H ++ ++#include "exec/memory.h" ++ ++typedef struct VFIODevice VFIODevice; ++typedef struct VFIOIOMMUOps VFIOIOMMUOps; ++ ++typedef struct { ++ unsigned long *bitmap; ++ hwaddr size; ++ hwaddr pages; ++} VFIOBitmap; ++ ++/* ++ * This is the base object for vfio container backends ++ */ ++typedef struct VFIOContainerBase { ++ const VFIOIOMMUOps *ops; ++} VFIOContainerBase; ++ ++struct VFIOIOMMUOps { ++ /* basic feature */ ++ int (*dma_map)(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly); ++ int (*dma_unmap)(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb); ++ int (*attach_device)(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp); ++ void (*detach_device)(VFIODevice *vbasedev); ++ /* migration feature */ ++ int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); ++ int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size); ++}; ++#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch b/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch deleted file mode 100644 index 124587d1e081a65740786aefbd1033d895678245..0000000000000000000000000000000000000000 --- a/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch +++ /dev/null @@ -1,261 +0,0 @@ -From eb3bfdb61025efe2891ce6732b8829a48dd75e2d Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 30 Aug 2018 15:04:25 +0200 -Subject: [PATCH] vfio: Introduce helpers to DMA map/unmap a RAM section - -Let's introduce two helpers that allow to DMA map/unmap a RAM -section. Those helpers will be called for nested stage setup in -another call site. Also the vfio_listener_region_add/del() -structure may be clearer. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 187 +++++++++++++++++++++++++++---------------- - hw/vfio/trace-events | 4 +- - 2 files changed, 119 insertions(+), 72 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index a8db784ac5..8837d33c57 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -709,13 +709,126 @@ hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end) - return NULL; - } - -+static int vfio_dma_map_ram_section(VFIOContainer *container, -+ MemoryRegionSection *section) -+{ -+ VFIOHostDMAWindow *hostwin; -+ Int128 llend, llsize; -+ hwaddr iova, end; -+ void *vaddr; -+ int ret; -+ -+ assert(memory_region_is_ram(section->mr)); -+ -+ iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); -+ llend = int128_make64(section->offset_within_address_space); -+ llend = int128_add(llend, section->size); -+ llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); -+ end = int128_get64(int128_sub(llend, int128_one())); -+ -+ vaddr = memory_region_get_ram_ptr(section->mr) + -+ section->offset_within_region + -+ (iova - section->offset_within_address_space); -+ -+ hostwin = hostwin_from_range(container, iova, end); -+ if (!hostwin) { -+ error_report("vfio: IOMMU Container %p can't map guest IOVA region" -+ " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, -+ container, iova, end); -+ return -EFAULT; -+ } -+ -+ trace_vfio_dma_map_ram(iova, end, vaddr); -+ -+ llsize = int128_sub(llend, int128_make64(iova)); -+ -+ if (memory_region_is_ram_device(section->mr)) { -+ hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; -+ -+ if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { -+ trace_vfio_listener_region_add_no_dma_map( -+ memory_region_name(section->mr), -+ section->offset_within_address_space, -+ int128_getlo(section->size), -+ pgmask + 1); -+ return 0; -+ } -+ } -+ -+ ret = vfio_dma_map(container, iova, int128_get64(llsize), -+ vaddr, section->readonly); -+ if (ret) { -+ error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " -+ "0x%"HWADDR_PRIx", %p) = %d (%m)", -+ container, iova, int128_get64(llsize), vaddr, ret); -+ if (memory_region_is_ram_device(section->mr)) { -+ /* Allow unexpected mappings not to be fatal for RAM devices */ -+ return 0; -+ } -+ return ret; -+ } -+ return 0; -+} -+ -+static void vfio_dma_unmap_ram_section(VFIOContainer *container, -+ MemoryRegionSection *section) -+{ -+ Int128 llend, llsize; -+ hwaddr iova, end; -+ bool try_unmap = true; -+ int ret; -+ -+ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); -+ llend = int128_make64(section->offset_within_address_space); -+ llend = int128_add(llend, section->size); -+ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); -+ -+ if (int128_ge(int128_make64(iova), llend)) { -+ return; -+ } -+ end = int128_get64(int128_sub(llend, int128_one())); -+ -+ llsize = int128_sub(llend, int128_make64(iova)); -+ -+ trace_vfio_dma_unmap_ram(iova, end); -+ -+ if (memory_region_is_ram_device(section->mr)) { -+ hwaddr pgmask; -+ VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end); -+ -+ assert(hostwin); /* or region_add() would have failed */ -+ -+ pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; -+ try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); -+ } -+ -+ if (try_unmap) { -+ if (int128_eq(llsize, int128_2_64())) { -+ /* The unmap ioctl doesn't accept a full 64-bit span. */ -+ llsize = int128_rshift(llsize, 1); -+ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); -+ if (ret) { -+ error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ "0x%"HWADDR_PRIx") = %d (%m)", -+ container, iova, int128_get64(llsize), ret); -+ } -+ iova += int128_get64(llsize); -+ } -+ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); -+ if (ret) { -+ error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ "0x%"HWADDR_PRIx") = %d (%m)", -+ container, iova, int128_get64(llsize), ret); -+ } -+ } -+} -+ - static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); - hwaddr iova, end; -- Int128 llend, llsize; -- void *vaddr; -+ Int128 llend; - int ret; - VFIOHostDMAWindow *hostwin; - -@@ -842,38 +955,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - } - - /* Here we assume that memory_region_is_ram(section->mr)==true */ -- -- vaddr = memory_region_get_ram_ptr(section->mr) + -- section->offset_within_region + -- (iova - section->offset_within_address_space); -- -- trace_vfio_listener_region_add_ram(iova, end, vaddr); -- -- llsize = int128_sub(llend, int128_make64(iova)); -- -- if (memory_region_is_ram_device(section->mr)) { -- hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; -- -- if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { -- trace_vfio_listener_region_add_no_dma_map( -- memory_region_name(section->mr), -- section->offset_within_address_space, -- int128_getlo(section->size), -- pgmask + 1); -- return; -- } -- } -- -- ret = vfio_dma_map(container, iova, int128_get64(llsize), -- vaddr, section->readonly); -- if (ret) { -- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " -- "0x%"HWADDR_PRIx", %p) = %d (%m)", -- container, iova, int128_get64(llsize), vaddr, ret); -- if (memory_region_is_ram_device(section->mr)) { -- /* Allow unexpected mappings not to be fatal for RAM devices */ -- return; -- } -+ if (vfio_dma_map_ram_section(container, section)) { - goto fail; - } - -@@ -902,10 +984,6 @@ static void vfio_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); -- hwaddr iova, end; -- Int128 llend, llsize; -- int ret; -- bool try_unmap = true; - - if (vfio_listener_skipped_section(section)) { - trace_vfio_listener_region_del_skip( -@@ -945,38 +1023,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - */ - } - -- iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); -- llend = int128_make64(section->offset_within_address_space); -- llend = int128_add(llend, section->size); -- llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); -- -- if (int128_ge(int128_make64(iova), llend)) { -- return; -- } -- end = int128_get64(int128_sub(llend, int128_one())); -- -- llsize = int128_sub(llend, int128_make64(iova)); -- -- trace_vfio_listener_region_del(iova, end); -- -- if (memory_region_is_ram_device(section->mr)) { -- hwaddr pgmask; -- VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end); -- -- assert(hostwin); /* or region_add() would have failed */ -- -- pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; -- try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); -- } -- -- if (try_unmap) { -- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); -- if (ret) { -- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -- "0x%"HWADDR_PRIx") = %d (%m)", -- container, iova, int128_get64(llsize), ret); -- } -- } -+ vfio_dma_unmap_ram_section(container, section); - - memory_region_unref(section->mr); - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 561dc6e758..9b6c7ca61b 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -97,10 +97,10 @@ vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i - vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64 - vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" - vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 --vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" -+vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" - vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" - vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 --vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 -+vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 - vfio_disconnect_container(int fd) "close container->fd=%d" - vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d" - vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d" --- -2.27.0 - diff --git a/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch b/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch deleted file mode 100644 index 274a0c08a92bfa268f3fab8a5d7842cdbab9d273..0000000000000000000000000000000000000000 --- a/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch +++ /dev/null @@ -1,64 +0,0 @@ -From ff9c1f7e3e17cc2afe1b2dfa545065e91941db8b Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Tue, 11 May 2021 10:08:13 +0800 -Subject: [PATCH] vfio: Introduce helpers to mark dirty pages of a RAM section - -Extract part of the code from vfio_sync_dirty_bitmap to form a -new helper, which allows to mark dirty pages of a RAM section. -This helper will be called for nested stage. - -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 22 ++++++++++++++-------- - 1 file changed, 14 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 21a866e545..5176fd3a3d 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1304,6 +1304,19 @@ err_out: - return ret; - } - -+static int vfio_dma_sync_ram_section_dirty_bitmap(VFIOContainer *container, -+ MemoryRegionSection *section) -+{ -+ ram_addr_t ram_addr; -+ -+ ram_addr = memory_region_get_ram_addr(section->mr) + -+ section->offset_within_region; -+ -+ return vfio_get_dirty_bitmap(container, -+ REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), -+ int128_get64(section->size), ram_addr); -+} -+ - typedef struct { - IOMMUNotifier n; - VFIOGuestIOMMU *giommu; -@@ -1345,8 +1358,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - static int vfio_sync_dirty_bitmap(VFIOContainer *container, - MemoryRegionSection *section) - { -- ram_addr_t ram_addr; -- - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - -@@ -1375,12 +1386,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - return 0; - } - -- ram_addr = memory_region_get_ram_addr(section->mr) + -- section->offset_within_region; -- -- return vfio_get_dirty_bitmap(container, -- REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), -- int128_get64(section->size), ram_addr); -+ return vfio_dma_sync_ram_section_dirty_bitmap(container, section); - } - - static void vfio_listener_log_sync(MemoryListener *listener, --- -2.27.0 - diff --git a/vfio-Introduce-hostwin_from_range-helper.patch b/vfio-Introduce-hostwin_from_range-helper.patch deleted file mode 100644 index b9a7099de2a6eeaa9265be66c8818ffc852e6583..0000000000000000000000000000000000000000 --- a/vfio-Introduce-hostwin_from_range-helper.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 25336cd596ff551293f1be6e108ad9277d80be0f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 22 Mar 2019 18:05:23 +0100 -Subject: [PATCH] vfio: Introduce hostwin_from_range helper - -Let's introduce a hostwin_from_range() helper that returns the -hostwin encapsulating an IOVA range or NULL if none is found. - -This improves the readibility of callers and removes the usage -of hostwin_found. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 36 +++++++++++++++++------------------- - 1 file changed, 17 insertions(+), 19 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index c78b58d365..a8db784ac5 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -696,6 +696,19 @@ out: - rcu_read_unlock(); - } - -+static VFIOHostDMAWindow * -+hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end) -+{ -+ VFIOHostDMAWindow *hostwin; -+ -+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -+ if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { -+ return hostwin; -+ } -+ } -+ return NULL; -+} -+ - static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -@@ -705,7 +718,6 @@ static void vfio_listener_region_add(MemoryListener *listener, - void *vaddr; - int ret; - VFIOHostDMAWindow *hostwin; -- bool hostwin_found; - - if (vfio_listener_skipped_section(section)) { - trace_vfio_listener_region_add_skip( -@@ -783,15 +795,8 @@ static void vfio_listener_region_add(MemoryListener *listener, - #endif - } - -- hostwin_found = false; -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { -- hostwin_found = true; -- break; -- } -- } -- -- if (!hostwin_found) { -+ hostwin = hostwin_from_range(container, iova, end); -+ if (!hostwin) { - error_report("vfio: IOMMU container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, - container, iova, end); -@@ -956,16 +961,9 @@ static void vfio_listener_region_del(MemoryListener *listener, - - if (memory_region_is_ram_device(section->mr)) { - hwaddr pgmask; -- VFIOHostDMAWindow *hostwin; -- bool hostwin_found = false; -+ VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end); - -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { -- hostwin_found = true; -- break; -- } -- } -- assert(hostwin_found); /* or region_add() would have failed */ -+ assert(hostwin); /* or region_add() would have failed */ - - pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; - try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); --- -2.27.0 - diff --git a/vfio-Maintain-DMA-mapping-range-for-the-container.patch b/vfio-Maintain-DMA-mapping-range-for-the-container.patch index 901a5e38ea78a2c490875611f12658151da661b9..5898ae86434982c5e54d5d3eaf152cb96ba1ff98 100644 --- a/vfio-Maintain-DMA-mapping-range-for-the-container.patch +++ b/vfio-Maintain-DMA-mapping-range-for-the-container.patch @@ -1,4 +1,4 @@ -From 90a6a1ec65d55d27faf79341b2dd9418d99da187 Mon Sep 17 00:00:00 2001 +From bd2d81775edf285149346bf793d9b71236d7cf34 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 8 May 2021 17:31:04 +0800 Subject: [PATCH] vfio: Maintain DMA mapping range for the container @@ -31,19 +31,58 @@ memory hot-{un}plug, sparse DMA mappings, etc. Suggestions welcome. Signed-off-by: Zenghui Yu Signed-off-by: Kunkun Jiang --- - hw/vfio/common.c | 62 +++++++++++++++++++++++++++++++---- - include/hw/vfio/vfio-common.h | 9 +++++ - 2 files changed, 65 insertions(+), 6 deletions(-) + hw/vfio/common.c | 9 +++++-- + hw/vfio/container.c | 49 +++++++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 12 +++++++++ + 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 245e32df5b..c33c4c539d 100644 +index e70fdf5e0c..564e933135 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c -@@ -420,6 +420,29 @@ unmap_exit: +@@ -1156,6 +1156,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + vfio_devices_all_device_dirty_tracking(container); + uint64_t dirty_pages; + VFIOBitmap vbmap; ++ VFIODMARange *qrange; + int ret; + + if (!container->dirty_pages_supported && !all_device_dirty_tracking) { +@@ -1165,10 +1166,16 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + return 0; + } + ++ qrange = vfio_lookup_match_range(container, iova, size); ++ /* the same as vfio_dma_unmap() */ ++ assert(qrange); ++ + ret = vfio_bitmap_alloc(&vbmap, size); + if (ret) { + return ret; + } ++ g_free(vbmap.bitmap); ++ vbmap.bitmap = qrange->bitmap; + + if (all_device_dirty_tracking) { + ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); +@@ -1186,8 +1193,6 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, + ram_addr, dirty_pages); + out: +- g_free(vbmap.bitmap); +- + return ret; + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 242010036a..9a176a0d33 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -112,6 +112,29 @@ unmap_exit: return ret; } -+static VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, + hwaddr start_addr, hwaddr size) +{ + VFIODMARange *qrange; @@ -56,11 +95,11 @@ index 245e32df5b..c33c4c539d 100644 + return NULL; +} + -+static void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange) ++void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange) +{ + uint64_t pages, size; + -+ pages = REAL_HOST_PAGE_ALIGN(qrange->size) / qemu_real_host_page_size; ++ pages = REAL_HOST_PAGE_ALIGN(qrange->size) / qemu_real_host_page_size(); + size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / BITS_PER_BYTE; + + qrange->bitmap = g_malloc0(size); @@ -69,15 +108,16 @@ index 245e32df5b..c33c4c539d 100644 /* * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 */ -@@ -433,12 +456,29 @@ static int vfio_dma_unmap(VFIOContainer *container, +@@ -124,6 +147,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, .iova = iova, .size = size, }; + VFIODMARange *qrange; + bool need_dirty_sync = false; + int ret; - if (iotlb && container->dirty_pages_supported && - vfio_devices_all_running_and_saving(container)) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); +@@ -136,6 +160,22 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + need_dirty_sync = true; } + /* @@ -99,7 +139,7 @@ index 245e32df5b..c33c4c539d 100644 while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { /* * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c -@@ -475,6 +515,14 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, +@@ -180,6 +220,14 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, .iova = iova, .size = size, }; @@ -114,56 +154,19 @@ index 245e32df5b..c33c4c539d 100644 if (!readonly) { map.flags |= VFIO_DMA_MAP_FLAG_WRITE; -@@ -986,9 +1034,14 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - { - struct vfio_iommu_type1_dirty_bitmap *dbitmap; - struct vfio_iommu_type1_dirty_bitmap_get *range; -+ VFIODMARange *qrange; - uint64_t pages; - int ret; - -+ qrange = vfio_lookup_match_range(container, iova, size); -+ /* the same as vfio_dma_unmap() */ -+ assert(qrange); -+ - dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); - - dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); -@@ -1007,11 +1060,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size; - range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / - BITS_PER_BYTE; -- range->bitmap.data = g_try_malloc0(range->bitmap.size); -- if (!range->bitmap.data) { -- ret = -ENOMEM; -- goto err_out; -- } -+ -+ range->bitmap.data = (__u64 *)qrange->bitmap; - - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); - if (ret) { -@@ -1027,7 +1077,6 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size, - range->bitmap.size, ram_addr); - err_out: -- g_free(range->bitmap.data); - g_free(dbitmap); - - return ret; -@@ -1681,6 +1730,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->dirty_pages_supported = false; +@@ -552,6 +600,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->iova_ranges = NULL; QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->hostwin_list); + QLIST_INIT(&container->vrdl_list); + QLIST_INIT(&container->dma_list); ret = vfio_init_container(container, group->fd, errp); if (ret) { diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 475aa9fb40..2853dc861e 100644 +index a4a22accb9..b131d04c9c 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h -@@ -76,6 +76,14 @@ typedef struct VFIOAddressSpace { +@@ -80,6 +80,14 @@ typedef struct VFIOAddressSpace { struct VFIOGroup; @@ -178,14 +181,24 @@ index 475aa9fb40..2853dc861e 100644 typedef struct VFIOContainer { VFIOAddressSpace *space; int fd; /* /dev/vfio/vfio, empowered by the attached groups */ -@@ -91,6 +99,7 @@ typedef struct VFIOContainer { - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; +@@ -97,6 +105,7 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_HEAD(, VFIODMARange) dma_list; QLIST_ENTRY(VFIOContainer) next; - } VFIOContainer; + QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; +@@ -212,6 +221,9 @@ void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + /* container->fd */ ++VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++ hwaddr start_addr, hwaddr size); ++void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange); + int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + ram_addr_t size, IOMMUTLBEntry *iotlb); + int vfio_dma_map(VFIOContainer *container, hwaddr iova, -- 2.27.0 diff --git a/vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch b/vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..6afb13850cebd59cb2c2c839ad76eaab17b587b0 --- /dev/null +++ b/vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch @@ -0,0 +1,287 @@ +From f702d050b4309bb7e7ffc159a3c41c82fe34ba07 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:54 +0800 +Subject: [PATCH] vfio: Make VFIOContainerBase poiner parameter const in + VFIOIOMMUOps callbacks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some of the callbacks in VFIOIOMMUOps pass VFIOContainerBase poiner, +those callbacks only need read access to the sub object of VFIOContainerBase. +So make VFIOContainerBase, VFIOContainer and VFIOIOMMUFDContainer as const +in these callbacks. + +Local functions called by those callbacks also need same changes to avoid +build error. + +Modify vfio_lookup_match_range/vfio_legacy_dma_map during backporting. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 9 +++---- + hw/vfio/container-base.c | 2 +- + hw/vfio/container.c | 34 ++++++++++++++------------- + hw/vfio/iommufd.c | 8 +++---- + include/hw/vfio/vfio-common.h | 14 ++++++----- + include/hw/vfio/vfio-container-base.h | 12 ++++++---- + 6 files changed, 43 insertions(+), 36 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 0e900c6746..d572ec5880 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -204,7 +204,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) + return true; + } + +-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) ++bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer) + { + VFIODevice *vbasedev; + +@@ -221,7 +221,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) ++bool ++vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer) + { + VFIODevice *vbasedev; + +@@ -1139,7 +1140,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, + return 0; + } + +-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { +@@ -1162,7 +1163,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + return 0; + } + +-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, ++int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) + { + bool all_device_dirty_tracking = +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index eee2dcfe76..1ffd25bbfa 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -63,7 +63,7 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); + } + +-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size) + { +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index e32e1b51e0..67aeaa825b 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -63,11 +63,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) + } + } + +-static int vfio_dma_unmap_bitmap(VFIOContainer *container, ++static int vfio_dma_unmap_bitmap(const VFIOContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ const VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; +@@ -116,7 +116,7 @@ unmap_exit: + return ret; + } + +-VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++VFIODMARange *vfio_lookup_match_range(const VFIOContainer *container, + hwaddr start_addr, hwaddr size) + { + VFIODMARange *qrange; +@@ -142,11 +142,12 @@ void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange) + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +-static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb) ++static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, +@@ -216,11 +217,11 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + return 0; + } + +-static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ++static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) + { + VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, +@@ -257,11 +258,12 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, + return -errno; + } + +-static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, +- bool start) ++static int ++vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, ++ bool start) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), +@@ -283,12 +285,12 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + return ret; + } + +-static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 5accd26484..87a561c545 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -26,10 +26,10 @@ + #include "qemu/chardev_open.h" + #include "pci.h" + +-static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, ++static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) + { +- VFIOIOMMUFDContainer *container = ++ const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_dma(container->be, +@@ -37,11 +37,11 @@ static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, + iova, size, vaddr, readonly); + } + +-static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, ++static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { +- VFIOIOMMUFDContainer *container = ++ const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 5f35f2900b..37f01410d5 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -186,7 +186,7 @@ typedef struct VFIODisplay { + VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + +-VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++VFIODMARange *vfio_lookup_match_range(const VFIOContainer *container, + hwaddr start_addr, hwaddr size); + void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange); + +@@ -258,13 +258,15 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); +-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); +-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++bool ++vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer); ++bool ++vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer); ++int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, +- uint64_t size, ram_addr_t ram_addr); ++int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, ++ uint64_t size, ram_addr_t ram_addr); + + /* Returns 0 on success, or a negative errno. */ + int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 45bb19c767..2ae297ccda 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -82,7 +82,7 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, + MemoryRegionSection *section); + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start); +-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + +@@ -93,18 +93,20 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + struct VFIOIOMMUOps { + /* basic feature */ +- int (*dma_map)(VFIOContainerBase *bcontainer, ++ int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +- int (*dma_unmap)(VFIOContainerBase *bcontainer, ++ int (*dma_unmap)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); + int (*attach_device)(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); + void (*detach_device)(VFIODevice *vbasedev); + /* migration feature */ +- int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); +- int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, ++ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, ++ bool start); ++ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + /* PCI specific */ + int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); +-- +2.41.0.windows.1 + diff --git a/vfio-Make-migration-support-experimental.patch b/vfio-Make-migration-support-experimental.patch deleted file mode 100644 index 3bf32ecaf443b40929932743cd3d9f3b951011b2..0000000000000000000000000000000000000000 --- a/vfio-Make-migration-support-experimental.patch +++ /dev/null @@ -1,72 +0,0 @@ -From d0a8ba1957743c55547ec2ccd8cb09b84a3354d2 Mon Sep 17 00:00:00 2001 -From: Alex Williamson -Date: Mon, 9 Nov 2020 11:56:02 -0700 -Subject: [PATCH] vfio: Make migration support experimental - -Support for migration of vfio devices is still in flux. Developers -are attempting to add support for new devices and new architectures, -but none are yet readily available for validation. We have concerns -whether we're transferring device resources at the right point in the -migration, whether we're guaranteeing that updates during pre-copy are -migrated, and whether we can provide bit-stream compatibility should -any of this change. Even the question of whether devices should -participate in dirty page tracking during pre-copy seems contentious. -In short, migration support has not had enough soak time and it feels -premature to mark it as supported. - -Create an experimental option such that we can continue to develop. - -[Retaining previous acks/reviews for a previously identical code - change with different specifics in the commit log.] - -Reviewed-by: Dr. David Alan Gilbert -Acked-by: Cornelia Huck -Signed-off-by: Alex Williamson -Signed-off-by: Kunkun Jiang ---- - hw/vfio/migration.c | 2 +- - hw/vfio/pci.c | 2 ++ - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 8546075706..033cb2b0c9 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -888,7 +888,7 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) - Error *local_err = NULL; - int ret = -ENOTSUP; - -- if (!container->dirty_pages_supported) { -+ if (!vbasedev->enable_migration || !container->dirty_pages_supported) { - goto add_blocker; - } - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index da7c740bce..2795b8bd12 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3192,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = { - VFIO_FEATURE_ENABLE_REQ_BIT, true), - DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), -+ DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, -+ vbasedev.enable_migration, false), - DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), - DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, - vbasedev.balloon_allowed, false), -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 048731e81f..7398631d4c 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -123,6 +123,7 @@ typedef struct VFIODevice { - bool needs_reset; - bool no_mmap; - bool balloon_allowed; -+ bool enable_migration; - VFIODeviceOps *ops; - unsigned int num_irqs; - unsigned int num_regions; --- -2.27.0 - diff --git a/vfio-Make-vfio-pci-device-migration-capable.patch b/vfio-Make-vfio-pci-device-migration-capable.patch deleted file mode 100644 index 7e87ec7d7820b8ee4be0de5d55039336c47988fd..0000000000000000000000000000000000000000 --- a/vfio-Make-vfio-pci-device-migration-capable.patch +++ /dev/null @@ -1,73 +0,0 @@ -From b20bf027d44809dd6c6376cf0b77e5c5b2057cba Mon Sep 17 00:00:00 2001 -From: Jens Freimann -Date: Tue, 29 Oct 2019 12:49:05 +0100 -Subject: [PATCH] vfio: Make vfio-pci device migration capable - -If the device is not a failover primary device, call -vfio_migration_probe() and vfio_migration_finalize() to enable -migration support for those devices that support it respectively to -tear it down again. -Removed migration blocker from VFIO PCI device specific structure and use -migration blocker from generic structure of VFIO device. - -Note: Since the current version don't add the failover feature for assigned -PCI devices, just remove the failover related code in the original patch for -simplicity. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Cornelia Huck -Signed-off-by: Alex Williamson -Signed-off-by: Shenming Lu ---- - hw/vfio/pci.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index b9fae3ad28..a637c35e7a 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3049,6 +3049,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - } - } - -+ ret = vfio_migration_probe(&vdev->vbasedev, errp); -+ if (ret) { -+ error_report("%s: Migration disabled", vdev->vbasedev.name); -+ } -+ - vfio_register_err_notifier(vdev); - vfio_register_req_notifier(vdev); - vfio_setup_resetfn_quirk(vdev); -@@ -3096,6 +3101,7 @@ static void vfio_exitfn(PCIDevice *pdev) - } - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); -+ vfio_migration_finalize(&vdev->vbasedev); - } - - static void vfio_pci_reset(DeviceState *dev) -@@ -3204,11 +3210,6 @@ static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_END_OF_LIST(), - }; - --static const VMStateDescription vfio_pci_vmstate = { -- .name = "vfio-pci", -- .unmigratable = 1, --}; -- - static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -@@ -3216,7 +3217,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) - - dc->reset = vfio_pci_reset; - dc->props = vfio_pci_dev_properties; -- dc->vmsd = &vfio_pci_vmstate; - dc->desc = "VFIO-based PCI device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - pdc->realize = vfio_realize; --- -2.27.0 - diff --git a/vfio-Move-the-saving-of-the-config-space-to-the-righ.patch b/vfio-Move-the-saving-of-the-config-space-to-the-righ.patch deleted file mode 100644 index 438c426803c4f77ea21220e6917ecf27ab566857..0000000000000000000000000000000000000000 --- a/vfio-Move-the-saving-of-the-config-space-to-the-righ.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 483baf4c668fbd2da76e6948576e13eded1c54ec Mon Sep 17 00:00:00 2001 -From: Shenming Lu -Date: Wed, 10 Mar 2021 11:02:31 +0800 -Subject: [PATCH] vfio: Move the saving of the config space to the right place - in VFIO migration - -On ARM64 the VFIO SET_IRQS ioctl is dependent on the VM interrupt -setup, if the restoring of the VFIO PCI device config space is -before the VGIC, an error might occur in the kernel. - -So we move the saving of the config space to the non-iterable -process, thus it will be called after the VGIC according to -their priorities. - -As for the possible dependence of the device specific migration -data on it's config space, we can let the vendor driver to -include any config info it needs in its own data stream. - -Signed-off-by: Shenming Lu -Reviewed-by: Kirti Wankhede -Message-Id: <20210310030233.1133-2-lushenming@huawei.com> -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 25 +++++++++++++++---------- - 1 file changed, 15 insertions(+), 10 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index b77c66557e..ea36ae5225 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -575,11 +575,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - return ret; - } - -- ret = vfio_save_device_config_state(f, opaque); -- if (ret) { -- return ret; -- } -- - ret = vfio_update_pending(vbasedev); - if (ret) { - return ret; -@@ -620,6 +615,19 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - return ret; - } - -+static void vfio_save_state(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ int ret; -+ -+ ret = vfio_save_device_config_state(f, opaque); -+ if (ret) { -+ error_report("%s: Failed to save device config space", -+ vbasedev->name); -+ qemu_file_set_error(f, ret); -+ } -+} -+ - static int vfio_load_setup(QEMUFile *f, void *opaque) - { - VFIODevice *vbasedev = opaque; -@@ -670,11 +678,7 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - switch (data) { - case VFIO_MIG_FLAG_DEV_CONFIG_STATE: - { -- ret = vfio_load_device_config_state(f, opaque); -- if (ret) { -- return ret; -- } -- break; -+ return vfio_load_device_config_state(f, opaque); - } - case VFIO_MIG_FLAG_DEV_SETUP_STATE: - { -@@ -720,6 +724,7 @@ static SaveVMHandlers savevm_vfio_handlers = { - .save_live_pending = vfio_save_pending, - .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, -+ .save_state = vfio_save_state, - .load_setup = vfio_load_setup, - .load_cleanup = vfio_load_cleanup, - .load_state = vfio_load_state, --- -2.27.0 - diff --git a/vfio-Only-map-shared-region-for-CSV3-virtual-machine.patch b/vfio-Only-map-shared-region-for-CSV3-virtual-machine.patch new file mode 100644 index 0000000000000000000000000000000000000000..3c0d37715fd29001bbf093f41fb764147e640630 --- /dev/null +++ b/vfio-Only-map-shared-region-for-CSV3-virtual-machine.patch @@ -0,0 +1,397 @@ +From 5631d7e167d87c4e2f9283cfac39f2f4107203cc Mon Sep 17 00:00:00 2001 +From: liuyafei +Date: Mon, 22 May 2023 20:37:40 +0800 +Subject: [PATCH] vfio: Only map shared region for CSV3 virtual machine + +qemu vfio listener map/unmap all of the virtual machine's memory. +It does not work for CSV3 virtual machine, as only shared memory +should be accessed by device. + +Signed-off-by: liuyafei +Signed-off-by: hanliyang +--- + hw/vfio/container.c | 46 +++++++++++- + include/exec/memory.h | 11 +++ + system/memory.c | 18 +++++ + target/i386/csv-sysemu-stub.c | 10 +++ + target/i386/csv.c | 134 ++++++++++++++++++++++++++++++++++ + target/i386/csv.h | 12 +++ + target/i386/kvm/kvm.c | 2 + + 7 files changed, 230 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 422235a221..77e61cfedd 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -30,6 +30,7 @@ + #include "qemu/error-report.h" + #include "qemu/range.h" + #include "sysemu/reset.h" ++#include "sysemu/kvm.h" + #include "trace.h" + #include "qapi/error.h" + #include "migration/migration.h" +@@ -534,6 +535,32 @@ static void vfio_free_container(VFIOContainer *container) + g_free(container); + } + ++static SharedRegionListener *g_shl; ++ ++static void shared_memory_listener_register(MemoryListener *listener, ++ AddressSpace *as) ++{ ++ SharedRegionListener *shl; ++ ++ shl = g_new0(SharedRegionListener, 1); ++ ++ shl->listener = listener; ++ shl->as = as; ++ ++ shared_region_register_listener(shl); ++ g_shl = shl; ++} ++ ++static void shared_memory_listener_unregister(void) ++{ ++ SharedRegionListener *shl = g_shl; ++ ++ shared_region_unregister_listener(shl); ++ ++ g_free(shl); ++ g_shl = NULL; ++} ++ + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -681,7 +708,12 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + container->listener = vfio_memory_listener; + +- memory_listener_register(&container->listener, container->space->as); ++ if (kvm_csv3_enabled()) { ++ shared_memory_listener_register(&container->listener, ++ container->space->as); ++ } else { ++ memory_listener_register(&container->listener, container->space->as); ++ } + + if (container->error) { + ret = -1; +@@ -697,7 +729,11 @@ listener_release_exit: + QLIST_REMOVE(group, container_next); + QLIST_REMOVE(container, next); + vfio_kvm_device_del_group(group); +- memory_listener_unregister(&container->listener); ++ if (kvm_csv3_enabled()) { ++ shared_memory_listener_unregister(); ++ } else { ++ memory_listener_unregister(&container->listener); ++ } + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); +@@ -731,7 +767,11 @@ static void vfio_disconnect_container(VFIOGroup *group) + * group. + */ + if (QLIST_EMPTY(&container->group_list)) { +- memory_listener_unregister(&container->listener); ++ if (kvm_csv3_enabled()) { ++ shared_memory_listener_unregister(); ++ } else { ++ memory_listener_unregister(&container->listener); ++ } + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 73d274d8f3..542c9da918 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -775,6 +775,17 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + ram_addr_t *ram_addr, bool *read_only, + bool *mr_has_discard_manager); + ++typedef struct SharedRegionListener SharedRegionListener; ++struct SharedRegionListener { ++ MemoryListener *listener; ++ AddressSpace *as; ++ QTAILQ_ENTRY(SharedRegionListener) next; ++}; ++ ++void shared_region_register_listener(SharedRegionListener *shl); ++void shared_region_unregister_listener(SharedRegionListener *shl); ++void *shared_region_listeners_get(void); ++ + typedef struct CoalescedMemoryRange CoalescedMemoryRange; + typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; + +diff --git a/system/memory.c b/system/memory.c +index 1ae03074f3..9db07fd832 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -48,6 +48,9 @@ static QTAILQ_HEAD(, MemoryListener) memory_listeners + static QTAILQ_HEAD(, AddressSpace) address_spaces + = QTAILQ_HEAD_INITIALIZER(address_spaces); + ++static QTAILQ_HEAD(, SharedRegionListener) shared_region_listeners ++ = QTAILQ_HEAD_INITIALIZER(shared_region_listeners); ++ + static GHashTable *flat_views; + + typedef struct AddrRange AddrRange; +@@ -2226,6 +2229,21 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + return true; + } + ++void shared_region_register_listener(SharedRegionListener *shl) ++{ ++ QTAILQ_INSERT_TAIL(&shared_region_listeners, shl, next); ++} ++ ++void shared_region_unregister_listener(SharedRegionListener *shl) ++{ ++ QTAILQ_REMOVE(&shared_region_listeners, shl, next); ++} ++ ++void *shared_region_listeners_get(void) ++{ ++ return &shared_region_listeners; ++} ++ + void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) + { + uint8_t mask = 1 << client; +diff --git a/target/i386/csv-sysemu-stub.c b/target/i386/csv-sysemu-stub.c +index 23d885f0f3..db22c299a6 100644 +--- a/target/i386/csv-sysemu-stub.c ++++ b/target/i386/csv-sysemu-stub.c +@@ -29,3 +29,13 @@ int csv3_launch_encrypt_vmcb(void) + { + g_assert_not_reached(); + } ++ ++int csv3_shared_region_dma_map(uint64_t start, uint64_t end) ++{ ++ return 0; ++} ++ ++void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) ++{ ++ ++} +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 65d87de003..e4706efa27 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -15,6 +15,7 @@ + #include "qemu/error-report.h" + #include "qapi/error.h" + #include "sysemu/kvm.h" ++#include "exec/address-spaces.h" + + #include + +@@ -67,6 +68,8 @@ csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) + csv3_guest.state = state; + csv3_guest.sev_ioctl = ops->sev_ioctl; + csv3_guest.fw_error_to_str = ops->fw_error_to_str; ++ QTAILQ_INIT(&csv3_guest.dma_map_regions_list); ++ qemu_mutex_init(&csv3_guest.dma_map_regions_list_mutex); + } + return 0; + } +@@ -167,3 +170,134 @@ csv3_launch_encrypt_vmcb(void) + err: + return ret; + } ++ ++int csv3_shared_region_dma_map(uint64_t start, uint64_t end) ++{ ++ MemoryRegionSection section; ++ AddressSpace *as; ++ QTAILQ_HEAD(, SharedRegionListener) *shared_region_listeners; ++ SharedRegionListener *shl; ++ MemoryListener *listener; ++ uint64_t size; ++ Csv3GuestState *s = &csv3_guest; ++ struct dma_map_region *region, *pos; ++ int ret = 0; ++ ++ if (!csv3_enabled()) ++ return 0; ++ ++ if (end <= start) ++ return 0; ++ ++ shared_region_listeners = shared_region_listeners_get(); ++ if (QTAILQ_EMPTY(shared_region_listeners)) ++ return 0; ++ ++ size = end - start; ++ ++ qemu_mutex_lock(&s->dma_map_regions_list_mutex); ++ QTAILQ_FOREACH(pos, &s->dma_map_regions_list, list) { ++ if (start >= (pos->start + pos->size)) { ++ continue; ++ } else if ((start + size) <= pos->start) { ++ break; ++ } else { ++ goto end; ++ } ++ } ++ QTAILQ_FOREACH(shl, shared_region_listeners, next) { ++ listener = shl->listener; ++ as = shl->as; ++ section = memory_region_find(as->root, start, size); ++ if (!section.mr) { ++ goto end; ++ } ++ ++ if (!memory_region_is_ram(section.mr)) { ++ memory_region_unref(section.mr); ++ goto end; ++ } ++ ++ if (listener->region_add) { ++ listener->region_add(listener, §ion); ++ } ++ memory_region_unref(section.mr); ++ } ++ ++ region = g_malloc0(sizeof(*region)); ++ if (!region) { ++ ret = -1; ++ goto end; ++ } ++ region->start = start; ++ region->size = size; ++ ++ if (pos) { ++ QTAILQ_INSERT_BEFORE(pos, region, list); ++ } else { ++ QTAILQ_INSERT_TAIL(&s->dma_map_regions_list, region, list); ++ } ++ ++end: ++ qemu_mutex_unlock(&s->dma_map_regions_list_mutex); ++ return ret; ++} ++ ++void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end) ++{ ++ MemoryRegionSection section; ++ AddressSpace *as; ++ QTAILQ_HEAD(, SharedRegionListener) *shared_region_listeners; ++ SharedRegionListener *shl; ++ MemoryListener *listener; ++ uint64_t size; ++ Csv3GuestState *s = &csv3_guest; ++ struct dma_map_region *pos, *next_pos; ++ ++ if (!csv3_enabled()) ++ return; ++ ++ if (end <= start) ++ return; ++ ++ shared_region_listeners = shared_region_listeners_get(); ++ if (QTAILQ_EMPTY(shared_region_listeners)) ++ return; ++ ++ size = end - start; ++ ++ qemu_mutex_lock(&s->dma_map_regions_list_mutex); ++ QTAILQ_FOREACH_SAFE(pos, &s->dma_map_regions_list, list, next_pos) { ++ uint64_t l, r; ++ uint64_t curr_end = pos->start + pos->size; ++ ++ l = MAX(start, pos->start); ++ r = MIN(start + size, pos->start + pos->size); ++ if (l < r) { ++ if ((start <= pos->start) && (start + size >= pos->start + pos->size)) { ++ QTAILQ_FOREACH(shl, shared_region_listeners, next) { ++ listener = shl->listener; ++ as = shl->as; ++ section = memory_region_find(as->root, pos->start, pos->size); ++ if (!section.mr) { ++ goto end; ++ } ++ if (listener->region_del) { ++ listener->region_del(listener, §ion); ++ } ++ memory_region_unref(section.mr); ++ } ++ ++ QTAILQ_REMOVE(&s->dma_map_regions_list, pos, list); ++ g_free(pos); ++ } ++ break; ++ } ++ if ((start + size) <= curr_end) { ++ break; ++ } ++ } ++end: ++ qemu_mutex_unlock(&s->dma_map_regions_list_mutex); ++ return; ++} +diff --git a/target/i386/csv.h b/target/i386/csv.h +index 3caf216743..12733341b3 100644 +--- a/target/i386/csv.h ++++ b/target/i386/csv.h +@@ -15,6 +15,8 @@ + #define I386_CSV_H + + #include "qapi/qapi-commands-misc-target.h" ++#include "qemu/thread.h" ++#include "qemu/queue.h" + #include "sev.h" + + #define GUEST_POLICY_CSV3_BIT (1 << 6) +@@ -74,12 +76,19 @@ int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent); + int csv_load_incoming_cpu_state(QEMUFile *f); + + /* CSV3 */ ++struct dma_map_region { ++ uint64_t start, size; ++ QTAILQ_ENTRY(dma_map_region) list; ++}; ++ + struct Csv3GuestState { + uint32_t policy; + int sev_fd; + void *state; + int (*sev_ioctl)(int fd, int cmd, void *data, int *error); + const char *(*fw_error_to_str)(int code); ++ QTAILQ_HEAD(, dma_map_region) dma_map_regions_list; ++ QemuMutex dma_map_regions_list_mutex; + }; + + typedef struct Csv3GuestState Csv3GuestState; +@@ -90,4 +99,7 @@ extern int csv3_launch_encrypt_vmcb(void); + + int csv3_load_data(uint64_t gpa, uint8_t *ptr, uint64_t len, Error **errp); + ++int csv3_shared_region_dma_map(uint64_t start, uint64_t end); ++void csv3_shared_region_dma_unmap(uint64_t start, uint64_t end); ++ + #endif +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 2866a6d0ec..925f4f8040 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5026,8 +5026,10 @@ static int kvm_handle_exit_hypercall(X86CPU *cpu, struct kvm_run *run) + + if (enc) { + sev_remove_shared_regions_list(gfn_start, gfn_end); ++ csv3_shared_region_dma_unmap(gpa, gfn_end << TARGET_PAGE_BITS); + } else { + sev_add_shared_regions_list(gfn_start, gfn_end); ++ csv3_shared_region_dma_map(gpa, gfn_end << TARGET_PAGE_BITS); + } + } + return 0; +-- +2.41.0.windows.1 + diff --git a/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch b/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch deleted file mode 100644 index 1ad94b06ad73ccf44d049daa2b8ff35b3624d539..0000000000000000000000000000000000000000 --- a/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch +++ /dev/null @@ -1,262 +0,0 @@ -From 1729ae16dc557c0ad54cab3096b5cb6649d181ae Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 14 Aug 2018 08:08:11 -0400 -Subject: [PATCH] vfio: Pass stage 1 MSI bindings to the host - -We register the stage1 MSI bindings when enabling the vectors -and we unregister them on msi disable. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 59 +++++++++++++++++++++++++++ - hw/vfio/pci.c | 76 ++++++++++++++++++++++++++++++++++- - hw/vfio/trace-events | 2 + - include/hw/vfio/vfio-common.h | 12 ++++++ - 4 files changed, 147 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index cc50efdbc1..db9af3b0e5 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -709,6 +709,65 @@ static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - } - } - -+int vfio_iommu_set_msi_binding(VFIOContainer *container, int n, -+ IOMMUTLBEntry *iotlb) -+{ -+ struct vfio_iommu_type1_set_msi_binding ustruct; -+ VFIOMSIBinding *binding; -+ int ret; -+ -+ QLIST_FOREACH(binding, &container->msibinding_list, next) { -+ if (binding->index == n) { -+ return 0; -+ } -+ } -+ -+ ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding); -+ ustruct.iova = iotlb->iova; -+ ustruct.flags = VFIO_IOMMU_BIND_MSI; -+ ustruct.gpa = iotlb->translated_addr; -+ ustruct.size = iotlb->addr_mask + 1; -+ ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct); -+ if (ret) { -+ error_report("%s: failed to register the stage1 MSI binding (%m)", -+ __func__); -+ return ret; -+ } -+ binding = g_new0(VFIOMSIBinding, 1); -+ binding->iova = ustruct.iova; -+ binding->gpa = ustruct.gpa; -+ binding->size = ustruct.size; -+ binding->index = n; -+ -+ QLIST_INSERT_HEAD(&container->msibinding_list, binding, next); -+ return 0; -+} -+ -+int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n) -+{ -+ struct vfio_iommu_type1_set_msi_binding ustruct; -+ VFIOMSIBinding *binding, *tmp; -+ int ret; -+ -+ ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding); -+ QLIST_FOREACH_SAFE(binding, &container->msibinding_list, next, tmp) { -+ if (binding->index != n) { -+ continue; -+ } -+ ustruct.flags = VFIO_IOMMU_UNBIND_MSI; -+ ustruct.iova = binding->iova; -+ ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct); -+ if (ret) { -+ error_report("Failed to unregister the stage1 MSI binding " -+ "for iova=0x%"PRIx64" (%m)", binding->iova); -+ } -+ QLIST_REMOVE(binding, next); -+ g_free(binding); -+ return ret; -+ } -+ return 0; -+} -+ - static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - { - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6c90ec9278..bbcba3fd16 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -360,6 +360,65 @@ static void vfio_msi_interrupt(void *opaque) - notify(&vdev->pdev, nr); - } - -+static bool vfio_iommu_require_msi_binding(IOMMUMemoryRegion *iommu_mr) -+{ -+ bool msi_translate = false, nested = false; -+ -+ memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_MSI_TRANSLATE, -+ (void *)&msi_translate); -+ memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED, -+ (void *)&nested); -+ if (!nested || !msi_translate) { -+ return false; -+ } -+ return true; -+} -+ -+static int vfio_register_msi_binding(VFIOPCIDevice *vdev, -+ int vector_n, bool set) -+{ -+ VFIOContainer *container = vdev->vbasedev.group->container; -+ PCIDevice *dev = &vdev->pdev; -+ AddressSpace *as = pci_device_iommu_address_space(dev); -+ IOMMUMemoryRegionClass *imrc; -+ IOMMUMemoryRegion *iommu_mr; -+ IOMMUTLBEntry entry; -+ MSIMessage msg; -+ -+ if (as == &address_space_memory) { -+ return 0; -+ } -+ -+ iommu_mr = IOMMU_MEMORY_REGION(as->root); -+ if (!vfio_iommu_require_msi_binding(iommu_mr)) { -+ return 0; -+ } -+ -+ /* MSI doorbell address is translated by an IOMMU */ -+ -+ if (!set) { /* unregister */ -+ trace_vfio_unregister_msi_binding(vdev->vbasedev.name, vector_n); -+ -+ return vfio_iommu_unset_msi_binding(container, vector_n); -+ } -+ -+ msg = pci_get_msi_message(dev, vector_n); -+ imrc = memory_region_get_iommu_class_nocheck(iommu_mr); -+ -+ rcu_read_lock(); -+ entry = imrc->translate(iommu_mr, msg.address, IOMMU_WO, 0); -+ rcu_read_unlock(); -+ -+ if (entry.perm == IOMMU_NONE) { -+ return -ENOENT; -+ } -+ -+ trace_vfio_register_msi_binding(vdev->vbasedev.name, vector_n, -+ msg.address, entry.translated_addr); -+ -+ return vfio_iommu_set_msi_binding(container, vector_n, &entry); -+} -+ - static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) - { - struct vfio_irq_set *irq_set; -@@ -377,7 +436,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) - fds = (int32_t *)&irq_set->data; - - for (i = 0; i < vdev->nr_vectors; i++) { -- int fd = -1; -+ int ret, fd = -1; - - /* - * MSI vs MSI-X - The guest has direct access to MSI mask and pending -@@ -386,6 +445,12 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) - * KVM signaling path only when configured and unmasked. - */ - if (vdev->msi_vectors[i].use) { -+ ret = vfio_register_msi_binding(vdev, i, true); -+ if (ret) { -+ error_report("%s failed to register S1 MSI binding " -+ "for vector %d(%d)", vdev->vbasedev.name, i, ret); -+ goto out; -+ } - if (vdev->msi_vectors[i].virq < 0 || - (msix && msix_is_masked(&vdev->pdev, i))) { - fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); -@@ -399,6 +464,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); - -+out: - g_free(irq_set); - - return ret; -@@ -712,7 +778,8 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) - - static void vfio_msix_disable(VFIOPCIDevice *vdev) - { -- int i; -+ int ret, i; -+ - - msix_unset_vector_notifiers(&vdev->pdev); - -@@ -724,6 +791,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) - if (vdev->msi_vectors[i].use) { - vfio_msix_vector_release(&vdev->pdev, i); - msix_vector_unuse(&vdev->pdev, i); -+ ret = vfio_register_msi_binding(vdev, i, false); -+ if (ret) { -+ error_report("%s: failed to unregister S1 MSI binding " -+ "for vector %d(%d)", vdev->vbasedev.name, i, ret); -+ } - } - } - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index ee9a67d3ef..247b72c1eb 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -120,6 +120,8 @@ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype - vfio_dma_unmap_overflow_workaround(void) "" - vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d" - vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d" -+vfio_register_msi_binding(const char *name, int vector, uint64_t giova, uint64_t gdb) "%s: register vector %d gIOVA=0x%"PRIx64 "-> gDB=0x%"PRIx64" stage 1 mapping" -+vfio_unregister_msi_binding(const char *name, int vector) "%s: unregister vector %d stage 1 mapping" - - # platform.c - vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 1277914ca8..b175158138 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -74,6 +74,14 @@ typedef struct VFIOAddressSpace { - QLIST_ENTRY(VFIOAddressSpace) list; - } VFIOAddressSpace; - -+typedef struct VFIOMSIBinding { -+ int index; -+ hwaddr iova; -+ hwaddr gpa; -+ hwaddr size; -+ QLIST_ENTRY(VFIOMSIBinding) next; -+} VFIOMSIBinding; -+ - struct VFIOGroup; - - typedef struct VFIODMARange { -@@ -101,6 +109,7 @@ typedef struct VFIOContainer { - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIODMARange) dma_list; -+ QLIST_HEAD(, VFIOMSIBinding) msibinding_list; - QLIST_ENTRY(VFIOContainer) next; - } VFIOContainer; - -@@ -210,6 +219,9 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); - void vfio_put_group(VFIOGroup *group); - int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp); -+int vfio_iommu_set_msi_binding(VFIOContainer *container, int n, -+ IOMMUTLBEntry *entry); -+int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n); - - extern const MemoryRegionOps vfio_region_ops; - typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; --- -2.27.0 - diff --git a/vfio-Register-SaveVMHandlers-for-VFIO-device.patch b/vfio-Register-SaveVMHandlers-for-VFIO-device.patch deleted file mode 100644 index 8e12cd2bb2bbfede3be871fb31a3a96562fedc15..0000000000000000000000000000000000000000 --- a/vfio-Register-SaveVMHandlers-for-VFIO-device.patch +++ /dev/null @@ -1,183 +0,0 @@ -From cd5b58f2ba20e59f2c29d955b8bbd7f5016030b7 Mon Sep 17 00:00:00 2001 -From: Kirti Wankhede -Date: Mon, 26 Oct 2020 15:06:17 +0530 -Subject: [PATCH] vfio: Register SaveVMHandlers for VFIO device - -Define flags to be used as delimiter in migration stream for VFIO devices. -Added .save_setup and .save_cleanup functions. Map & unmap migration -region from these functions at source during saving or pre-copy phase. - -Set VFIO device state depending on VM's state. During live migration, VM is -running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO -device. During save-restore, VM is paused, _SAVING state is set for VFIO device. - -Signed-off-by: Kirti Wankhede -Reviewed-by: Neo Jia -Reviewed-by: Cornelia Huck -Reviewed-by: Yan Zhao -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 102 +++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 2 + - 2 files changed, 104 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 0c6c9b655f..405228fc5a 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -8,12 +8,15 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/main-loop.h" -+#include "qemu/cutils.h" - #include - - #include "sysemu/sysemu.h" - #include "hw/vfio/vfio-common.h" - #include "cpu.h" - #include "migration/migration.h" -+#include "migration/vmstate.h" - #include "migration/qemu-file.h" - #include "migration/register.h" - #include "migration/blocker.h" -@@ -25,6 +28,22 @@ - #include "trace.h" - #include "hw/hw.h" - -+/* -+ * Flags to be used as unique delimiters for VFIO devices in the migration -+ * stream. These flags are composed as: -+ * 0xffffffff => MSB 32-bit all 1s -+ * 0xef10 => Magic ID, represents emulated (virtual) function IO -+ * 0x0000 => 16-bits reserved for flags -+ * -+ * The beginning of state information is marked by _DEV_CONFIG_STATE, -+ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a -+ * certain state information is marked by _END_OF_STATE. -+ */ -+#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) -+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) -+#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) -+#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) -+ - static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, - off_t off, bool iswrite) - { -@@ -129,6 +148,75 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, - return 0; - } - -+static void vfio_migration_cleanup(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ if (migration->region.mmaps) { -+ vfio_region_unmap(&migration->region); -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int vfio_save_setup(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ int ret; -+ -+ trace_vfio_save_setup(vbasedev->name); -+ -+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); -+ -+ if (migration->region.mmaps) { -+ /* -+ * Calling vfio_region_mmap() from migration thread. Memory API called -+ * from this function require locking the iothread when called from -+ * outside the main loop thread. -+ */ -+ qemu_mutex_lock_iothread(); -+ ret = vfio_region_mmap(&migration->region); -+ qemu_mutex_unlock_iothread(); -+ if (ret) { -+ error_report("%s: Failed to mmap VFIO migration region: %s", -+ vbasedev->name, strerror(-ret)); -+ error_report("%s: Falling back to slow path", vbasedev->name); -+ } -+ } -+ -+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, -+ VFIO_DEVICE_STATE_SAVING); -+ if (ret) { -+ error_report("%s: Failed to set state SAVING", vbasedev->name); -+ return ret; -+ } -+ -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ -+ ret = qemu_file_get_error(f); -+ if (ret) { -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static void vfio_save_cleanup(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ -+ vfio_migration_cleanup(vbasedev); -+ trace_vfio_save_cleanup(vbasedev->name); -+} -+ -+static SaveVMHandlers savevm_vfio_handlers = { -+ .save_setup = vfio_save_setup, -+ .save_cleanup = vfio_save_cleanup, -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ - static void vfio_vmstate_change(void *opaque, int running, RunState state) - { - VFIODevice *vbasedev = opaque; -@@ -215,6 +303,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, - int ret; - Object *obj; - VFIOMigration *migration; -+ char id[256] = ""; -+ g_autofree char *path = NULL, *oid = NULL; - - if (!vbasedev->ops->vfio_get_object) { - return -EINVAL; -@@ -244,6 +334,18 @@ static int vfio_migration_init(VFIODevice *vbasedev, - - migration = vbasedev->migration; - migration->vbasedev = vbasedev; -+ -+ oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); -+ if (oid) { -+ path = g_strdup_printf("%s/vfio", oid); -+ } else { -+ path = g_strdup("vfio"); -+ } -+ strpadcpy(id, sizeof(id), path, '\0'); -+ -+ register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, -+ vbasedev); -+ - migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, - vbasedev); - migration->migration_state.notify = vfio_migration_state_notifier; -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index bd3d47b005..86c18def01 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -149,3 +149,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" - vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" - vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" - vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" -+vfio_save_setup(const char *name) " (%s)" -+vfio_save_cleanup(const char *name) " (%s)" --- -2.27.0 - diff --git a/vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch b/vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch deleted file mode 100644 index d2138b57e71e494198068253be7d7229b510e598..0000000000000000000000000000000000000000 --- a/vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch +++ /dev/null @@ -1,41 +0,0 @@ -From b9d74bcf6aefe8ab607439ad1c518a453053ccee Mon Sep 17 00:00:00 2001 -From: Shenming Lu -Date: Wed, 10 Mar 2021 11:02:32 +0800 -Subject: [PATCH] vfio: Set the priority of the VFIO VM state change handler - explicitly - -In the VFIO VM state change handler when stopping the VM, the _RUNNING -bit in device_state is cleared which makes the VFIO device stop, including -no longer generating interrupts. Then we can save the pending states of -all interrupts in the GIC VM state change handler (on ARM). - -So we have to set the priority of the VFIO VM state change handler -explicitly (like virtio devices) to ensure it is called before the -GIC's in saving. - -Signed-off-by: Shenming Lu -Reviewed-by: Kirti Wankhede -Reviewed-by: Cornelia Huck -Message-Id: <20210310030233.1133-3-lushenming@huawei.com> -Signed-off-by: Alex Williamson ---- - hw/vfio/migration.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index ea36ae5225..1a97784486 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -862,7 +862,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, - register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, - vbasedev); - -- migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, -+ migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, -+ vfio_vmstate_change, - vbasedev); - migration->migration_state.notify = vfio_migration_state_notifier; - add_migration_state_change_notifier(&migration->migration_state); --- -2.27.0 - diff --git a/vfio-Set-up-nested-stage-mappings.patch b/vfio-Set-up-nested-stage-mappings.patch deleted file mode 100644 index 66659e81a8dacc392f4f6a56ab988c36a52af8a2..0000000000000000000000000000000000000000 --- a/vfio-Set-up-nested-stage-mappings.patch +++ /dev/null @@ -1,277 +0,0 @@ -From a65c40f9d1025a9843dec38070d9f26792b00892 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 29 Aug 2018 18:10:12 +0200 -Subject: [PATCH] vfio: Set up nested stage mappings - -In nested mode, legacy vfio_iommu_map_notify cannot be used as -there is no "caching" mode and we do not trap on map. - -On Intel, vfio_iommu_map_notify was used to DMA map the RAM -through the host single stage. - -With nested mode, we need to setup the stage 2 and the stage 1 -separately. This patch introduces a prereg_listener to setup -the stage 2 mapping. - -The stage 1 mapping, owned by the guest, is passed to the host -when the guest invalidates the stage 1 configuration, through -a dedicated PCIPASIDOps callback. Guest IOTLB invalidations -are cascaded downto the host through another IOMMU MR UNMAP -notifier. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 136 +++++++++++++++++++++++++++++++++++++++++-- - hw/vfio/pci.c | 21 +++++++ - hw/vfio/trace-events | 2 + - 3 files changed, 154 insertions(+), 5 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 8837d33c57..cc50efdbc1 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -642,6 +642,73 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - return true; - } - -+/* Propagate a guest IOTLB invalidation to the host (nested mode) */ -+static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) -+{ -+ VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); -+ struct vfio_iommu_type1_cache_invalidate ustruct = {}; -+ VFIOContainer *container = giommu->container; -+ int ret; -+ -+ assert(iotlb->perm == IOMMU_NONE); -+ -+ ustruct.argsz = sizeof(ustruct); -+ ustruct.flags = 0; -+ ustruct.info.argsz = sizeof(struct iommu_cache_invalidate_info); -+ ustruct.info.version = IOMMU_CACHE_INVALIDATE_INFO_VERSION_1; -+ ustruct.info.cache = IOMMU_CACHE_INV_TYPE_IOTLB; -+ -+ switch (iotlb->granularity) { -+ case IOMMU_INV_GRAN_DOMAIN: -+ ustruct.info.granularity = IOMMU_INV_GRANU_DOMAIN; -+ break; -+ case IOMMU_INV_GRAN_PASID: -+ { -+ struct iommu_inv_pasid_info *pasid_info; -+ int archid = -1; -+ -+ pasid_info = &ustruct.info.granu.pasid_info; -+ ustruct.info.granularity = IOMMU_INV_GRANU_PASID; -+ if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) { -+ pasid_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID; -+ archid = iotlb->arch_id; -+ } -+ pasid_info->archid = archid; -+ trace_vfio_iommu_asid_inv_iotlb(archid); -+ break; -+ } -+ case IOMMU_INV_GRAN_ADDR: -+ { -+ hwaddr start = iotlb->iova + giommu->iommu_offset; -+ struct iommu_inv_addr_info *addr_info; -+ size_t size = iotlb->addr_mask + 1; -+ int archid = -1; -+ -+ addr_info = &ustruct.info.granu.addr_info; -+ ustruct.info.granularity = IOMMU_INV_GRANU_ADDR; -+ if (iotlb->leaf) { -+ addr_info->flags |= IOMMU_INV_ADDR_FLAGS_LEAF; -+ } -+ if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) { -+ addr_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID; -+ archid = iotlb->arch_id; -+ } -+ addr_info->archid = archid; -+ addr_info->addr = start; -+ addr_info->granule_size = size; -+ addr_info->nb_granules = 1; -+ trace_vfio_iommu_addr_inv_iotlb(archid, start, size, -+ 1, iotlb->leaf); -+ break; -+ } -+ } -+ -+ ret = ioctl(container->fd, VFIO_IOMMU_CACHE_INVALIDATE, &ustruct); -+ if (ret) { -+ error_report("%p: failed to invalidate CACHE (%d)", container, ret); -+ } -+} -+ - static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - { - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); -@@ -823,6 +890,32 @@ static void vfio_dma_unmap_ram_section(VFIOContainer *container, - } - } - -+static void vfio_prereg_listener_region_add(MemoryListener *listener, -+ MemoryRegionSection *section) -+{ -+ VFIOContainer *container = -+ container_of(listener, VFIOContainer, prereg_listener); -+ -+ if (!memory_region_is_ram(section->mr)) { -+ return; -+ } -+ -+ vfio_dma_map_ram_section(container, section); -+} -+ -+static void vfio_prereg_listener_region_del(MemoryListener *listener, -+ MemoryRegionSection *section) -+{ -+ VFIOContainer *container = -+ container_of(listener, VFIOContainer, prereg_listener); -+ -+ if (!memory_region_is_ram(section->mr)) { -+ return; -+ } -+ -+ vfio_dma_unmap_ram_section(container, section); -+} -+ - static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -@@ -920,9 +1013,10 @@ static void vfio_listener_region_add(MemoryListener *listener, - memory_region_ref(section->mr); - - if (memory_region_is_iommu(section->mr)) { -+ IOMMUNotify notify; - VFIOGuestIOMMU *giommu; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); -- int iommu_idx; -+ int iommu_idx, flags; - - trace_vfio_listener_region_add_iommu(iova, end); - /* -@@ -941,15 +1035,27 @@ static void vfio_listener_region_add(MemoryListener *listener, - llend = int128_sub(llend, int128_one()); - iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, - MEMTXATTRS_UNSPECIFIED); -- iommu_notifier_init(&giommu->n, vfio_iommu_map_notify, -- IOMMU_NOTIFIER_ALL, -+ -+ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { -+ /* IOTLB unmap notifier to propagate guest IOTLB invalidations */ -+ flags = IOMMU_NOTIFIER_UNMAP; -+ notify = vfio_iommu_unmap_notify; -+ } else { -+ /* MAP/UNMAP IOTLB notifier */ -+ flags = IOMMU_NOTIFIER_ALL; -+ notify = vfio_iommu_map_notify; -+ } -+ -+ iommu_notifier_init(&giommu->n, notify, flags, - section->offset_within_region, - int128_get64(llend), - iommu_idx); - QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); - - memory_region_register_iommu_notifier(section->mr, &giommu->n); -- memory_region_iommu_replay(giommu->iommu, &giommu->n); -+ if (flags & IOMMU_NOTIFIER_MAP) { -+ memory_region_iommu_replay(giommu->iommu, &giommu->n); -+ } - - return; - } -@@ -1367,10 +1473,16 @@ static const MemoryListener vfio_memory_listener = { - .log_clear = vfio_listener_log_clear, - }; - -+static MemoryListener vfio_memory_prereg_listener = { -+ .region_add = vfio_prereg_listener_region_add, -+ .region_del = vfio_prereg_listener_region_del, -+}; -+ - static void vfio_listener_release(VFIOContainer *container) - { - memory_listener_unregister(&container->listener); -- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || -+ container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { - memory_listener_unregister(&container->prereg_listener); - } - } -@@ -1976,6 +2088,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - vfio_get_iommu_info_migration(container, info); - } - g_free(info); -+ -+ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { -+ container->prereg_listener = vfio_memory_prereg_listener; -+ memory_listener_register(&container->prereg_listener, -+ &address_space_memory); -+ if (container->error) { -+ memory_listener_unregister(&container->prereg_listener); -+ ret = container->error; -+ error_setg(errp, -+ "RAM memory listener initialization failed " -+ "for container"); -+ goto free_container_exit; -+ } -+ } - break; - } - case VFIO_SPAPR_TCE_v2_IOMMU: -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 3641ad0c5c..6c90ec9278 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2766,6 +2766,25 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) - vdev->req_enabled = false; - } - -+static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn, -+ IOMMUConfig *config) -+{ -+ PCIDevice *pdev = bus->devices[devfn]; -+ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); -+ VFIOContainer *container = vdev->vbasedev.group->container; -+ struct vfio_iommu_type1_set_pasid_table info; -+ -+ info.argsz = sizeof(info); -+ info.flags = VFIO_PASID_TABLE_FLAG_SET; -+ memcpy(&info.config, &config->pasid_cfg, sizeof(config->pasid_cfg)); -+ -+ return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info); -+} -+ -+static PCIPASIDOps vfio_pci_pasid_ops = { -+ .set_pasid_table = vfio_iommu_set_pasid_table, -+}; -+ - static void vfio_realize(PCIDevice *pdev, Error **errp) - { - VFIOPCIDevice *vdev = PCI_VFIO(pdev); -@@ -3072,6 +3091,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - vfio_register_req_notifier(vdev); - vfio_setup_resetfn_quirk(vdev); - -+ pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops); -+ - return; - - out_teardown: -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 9b6c7ca61b..ee9a67d3ef 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -118,6 +118,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic - vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" - vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" - vfio_dma_unmap_overflow_workaround(void) "" -+vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d" -+vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d" - - # platform.c - vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" --- -2.27.0 - diff --git a/vfio-Support-host-translation-granule-size.patch b/vfio-Support-host-translation-granule-size.patch deleted file mode 100644 index d5eab65155770160c38615d038ea66264e284acb..0000000000000000000000000000000000000000 --- a/vfio-Support-host-translation-granule-size.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 594cba5943b3e8bf1bd5720b1fa20d4662920ae0 Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Thu, 4 Mar 2021 21:34:46 +0800 -Subject: [PATCH] vfio: Support host translation granule size - -The cpu_physical_memory_set_dirty_lebitmap() can quickly deal with -the dirty pages of memory by bitmap-traveling, regardless of whether -the bitmap is aligned correctly or not. - -cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of -host page size. So it'd better to set bitmap_pgsize to host page size -to support more translation granule sizes. - -[aw: The Fixes commit below introduced code to restrict migration -support to configurations where the target page size intersects the -host dirty page support. For example, a 4K guest on a 4K host. -Due to the above flexibility in bitmap handling, this restriction -unnecessarily prevents mixed target/host pages size that could -otherwise be supported. Use host page size for dirty bitmap.] - -Fixes: fc49c9cbf2 ("vfio: Get migration capability flags for container") -Signed-off-by: Kunkun Jiang -Message-Id: <20210304133446.1521-1-jiangkunkun@huawei.com> -Signed-off-by: Alex Williamson ---- - hw/vfio/common.c | 48 +++++++++++++++++++++++++----------------------- - 1 file changed, 25 insertions(+), 23 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index ebd701faa0..a7817c90cc 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -377,7 +377,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, - { - struct vfio_iommu_type1_dma_unmap *unmap; - struct vfio_bitmap *bitmap; -- uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS; -+ uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size; - int ret; - - unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); -@@ -389,12 +389,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, - bitmap = (struct vfio_bitmap *)&unmap->data; - - /* -- * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of -- * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to -- * TARGET_PAGE_SIZE. -+ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of -+ * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize -+ * to qemu_real_host_page_size. - */ - -- bitmap->pgsize = TARGET_PAGE_SIZE; -+ bitmap->pgsize = qemu_real_host_page_size; - bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / - BITS_PER_BYTE; - -@@ -672,16 +672,17 @@ static void vfio_listener_region_add(MemoryListener *listener, - return; - } - -- if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != -- (section->offset_within_region & ~TARGET_PAGE_MASK))) { -+ if (unlikely((section->offset_within_address_space & -+ ~qemu_real_host_page_mask) != -+ (section->offset_within_region & ~qemu_real_host_page_mask))) { - error_report("%s received unaligned region", __func__); - return; - } - -- iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); -+ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); - llend = int128_make64(section->offset_within_address_space); - llend = int128_add(llend, section->size); -- llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); -+ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); - - if (int128_ge(int128_make64(iova), llend)) { - return; -@@ -866,8 +867,9 @@ static void vfio_listener_region_del(MemoryListener *listener, - return; - } - -- if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != -- (section->offset_within_region & ~TARGET_PAGE_MASK))) { -+ if (unlikely((section->offset_within_address_space & -+ ~qemu_real_host_page_mask) != -+ (section->offset_within_region & ~qemu_real_host_page_mask))) { - error_report("%s received unaligned region", __func__); - return; - } -@@ -895,10 +897,10 @@ static void vfio_listener_region_del(MemoryListener *listener, - */ - } - -- iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); -+ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); - llend = int128_make64(section->offset_within_address_space); - llend = int128_add(llend, section->size); -- llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); -+ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); - - if (int128_ge(int128_make64(iova), llend)) { - return; -@@ -967,13 +969,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - range->size = size; - - /* -- * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of -- * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to -- * TARGET_PAGE_SIZE. -+ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of -+ * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize -+ * to qemu_real_host_page_size. - */ -- range->bitmap.pgsize = TARGET_PAGE_SIZE; -+ range->bitmap.pgsize = qemu_real_host_page_size; - -- pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS; -+ pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size; - range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / - BITS_PER_BYTE; - range->bitmap.data = g_try_malloc0(range->bitmap.size); -@@ -1077,8 +1079,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - section->offset_within_region; - - return vfio_get_dirty_bitmap(container, -- TARGET_PAGE_ALIGN(section->offset_within_address_space), -- int128_get64(section->size), ram_addr); -+ REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), -+ int128_get64(section->size), ram_addr); - } - - static void vfio_listener_log_sync(MemoryListener *listener, -@@ -1572,10 +1574,10 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - header); - - /* -- * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of -- * TARGET_PAGE_SIZE to mark those dirty. -+ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of -+ * qemu_real_host_page_size to mark those dirty. - */ -- if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) { -+ if (cap_mig->pgsize_bitmap & qemu_real_host_page_size) { - container->dirty_pages_supported = true; - container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; - container->dirty_pgsizes = cap_mig->pgsize_bitmap; --- -2.27.0 - diff --git a/vfio-Synthesize-vPASID-capability-to-VM.patch b/vfio-Synthesize-vPASID-capability-to-VM.patch new file mode 100644 index 0000000000000000000000000000000000000000..48637f50c8520b4aea8899100413e6f32ed5bb9f --- /dev/null +++ b/vfio-Synthesize-vPASID-capability-to-VM.patch @@ -0,0 +1,114 @@ +From da7cdc41aa3813f6bb1c87ced178f60185dac692 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Thu, 12 Sep 2024 01:38:46 -0700 +Subject: [PATCH] vfio: Synthesize vPASID capability to VM + +If user wants to expose PASID capability in vIOMMU, then VFIO would also +report the PASID cap for this device if the underlying hardware supports +it as well. + +As a start, this chooses to put the vPASID cap in the last 8 bytes of the +vconfig space. This is a choice in the good hope of no conflict with any +existing cap or hidden registers. For the devices that has hidden registers, +user should figure out a proper offset for the vPASID cap. This may require +an option for user to config it. Here we leave it as a future extension. +There are more discussions on the mechanism of finding the proper offset. + +https://lore.kernel.org/kvm/BN9PR11MB5276318969A212AD0649C7BE8CBE2@BN9PR11MB5276.namprd11.prod.outlook.com/ + +Signed-off-by: Yi Liu +--- + hw/pci/pcie.c | 12 ++++++++++++ + hw/vfio/pci.c | 28 ++++++++++++++++++++++++++++ + include/hw/pci/pcie.h | 4 ++++ + 3 files changed, 44 insertions(+) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index 04fbd794a8..a5b4e54bd7 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -1123,3 +1123,15 @@ void pcie_acs_reset(PCIDevice *dev) + pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0); + } + } ++ ++void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint16_t caps) ++{ ++ pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, 1, ++ offset, PCI_EXT_CAP_PASID_SIZEOF); ++ ++ dev->exp.pasid_cap = offset; ++ ++ pci_set_word(dev->config + offset + PCI_PASID_CAP, caps); ++ ++ pci_set_word(dev->wmask + dev->exp.pasid_cap + PCI_PASID_CTRL, 0x7); ++} +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index f585f285f4..293deb8737 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -21,6 +21,7 @@ + #include "qemu/osdep.h" + #include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include ++#include + #include + + #include "hw/hw.h" +@@ -2348,6 +2349,33 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) + + } + ++ { ++ HostIOMMUDeviceCaps *caps = &vdev->vbasedev.hiod->caps; ++ ++ /* ++ * TODO: Add option for enabling pasid at a safe offset, this adds the ++ * pasid capability in the end of the PCIE config space. ++ */ ++ if (caps->max_pasid_log2 && pci_device_get_pasid_cap(&vdev->pdev)) { ++ uint16_t pasid_caps = (caps->max_pasid_log2 << 8) & PCI_PASID_CAP_WIDTH; ++ ++ if (caps->hw_caps & IOMMU_HW_CAP_PCI_PASID_EXEC) { ++ pasid_caps |= PCI_PASID_CAP_EXEC; ++ } ++ ++ if (caps->hw_caps & IOMMU_HW_CAP_PCI_PASID_PRIV) { ++ pasid_caps |= PCI_PASID_CAP_PRIV; ++ } ++ ++ pcie_pasid_init(pdev, ++ PCIE_CONFIG_SPACE_SIZE - PCI_EXT_CAP_PASID_SIZEOF, ++ pasid_caps); ++ ++ /* PASID capability is fully emulated by QEMU */ ++ memset(vdev->emulated_config_bits + pdev->exp.pasid_cap, 0xff, 8); ++ } ++ } ++ + /* Cleanup chain head ID if necessary */ + if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) { + pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0); +diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h +index 11f5a91bbb..41ee27f023 100644 +--- a/include/hw/pci/pcie.h ++++ b/include/hw/pci/pcie.h +@@ -79,6 +79,9 @@ struct PCIExpressDevice { + uint16_t sriov_cap; + PCIESriovPF sriov_pf; + PCIESriovVF sriov_vf; ++ ++ /* Offset of PASID capability in config space */ ++ uint16_t pasid_cap; + }; + + #define COMPAT_PROP_PCP "power_controller_present" +@@ -147,4 +150,5 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); ++void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint16_t caps); + #endif /* QEMU_PCIE_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-add-quirk-device-write-method.patch b/vfio-add-quirk-device-write-method.patch deleted file mode 100644 index d7e2c99dc212605291627dac4dee0512e1b34f86..0000000000000000000000000000000000000000 --- a/vfio-add-quirk-device-write-method.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 95ee5273e25ed606aa86f8a154c06887efc20494 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Mar 2021 17:12:57 +0800 -Subject: [PATCH] vfio: add quirk device write method - ---- - hw/vfio/pci-quirks.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index b35a640030..9ce790bdd2 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -12,6 +12,7 @@ - - #include "qemu/osdep.h" - #include "qemu/units.h" -+#include "qemu/log.h" - #include "qemu/error-report.h" - #include "qemu/main-loop.h" - #include "qemu/module.h" -@@ -275,8 +276,15 @@ static uint64_t vfio_ati_3c3_quirk_read(void *opaque, - return data; - } - -+static void vfio_ati_3c3_quirk_write(void *opaque, hwaddr addr, -+ uint64_t data, unsigned size) -+{ -+ qemu_log_mask(LOG_GUEST_ERROR, "%s not implemented\n", __func__); -+} -+ - static const MemoryRegionOps vfio_ati_3c3_quirk = { - .read = vfio_ati_3c3_quirk_read, -+ .write = vfio_ati_3c3_quirk_write, - .endianness = DEVICE_LITTLE_ENDIAN, - }; - --- -2.27.0 - diff --git a/vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch b/vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch new file mode 100644 index 0000000000000000000000000000000000000000..140933a793509a0211e3594f284d33af3673b13e --- /dev/null +++ b/vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch @@ -0,0 +1,67 @@ +From 6b9f02dbde780118d33abb998bc72ed246f50b6a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:50 +0800 +Subject: [PATCH] vfio/ap: Allow the selection of a given iommu backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-ap device: + +if the user wants to use the legacy backend, it shall not +link the vfio-ap device with any iommufd object: + + -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-ap device options: + + -object iommufd,id=iommufd0 + -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/ap.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index bbf69ff55a..80629609ae 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -11,10 +11,12 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + #include "qapi/error.h" + #include "hw/vfio/vfio-common.h" ++#include "sysemu/iommufd.h" + #include "hw/s390x/ap-device.h" + #include "qemu/error-report.h" + #include "qemu/event_notifier.h" +@@ -204,6 +206,10 @@ static void vfio_ap_unrealize(DeviceState *dev) + + static Property vfio_ap_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOAPDevice, vdev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.41.0.windows.1 + diff --git a/vfio-ap-Don-t-initialize-HOST_IOMMU_DEVICE-with-mdev.patch b/vfio-ap-Don-t-initialize-HOST_IOMMU_DEVICE-with-mdev.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4dcfd1b75526ab733750a4c6acbaab00171bb70 --- /dev/null +++ b/vfio-ap-Don-t-initialize-HOST_IOMMU_DEVICE-with-mdev.patch @@ -0,0 +1,35 @@ +From 44d573b10c45746e81d0d1786fe61d45160f2181 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 22 Jul 2024 15:07:12 +0800 +Subject: [PATCH] vfio/ap: Don't initialize HOST_IOMMU_DEVICE with mdev + +mdevs aren't "physical" devices and when asking for backing IOMMU info, +it fails the entire provisioning of the guest. Fix that by setting +vbasedev->mdev true so skipping HostIOMMUDevice initialization in the +presence of mdevs. + +Fixes: 930589520128 ("vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler") +Signed-off-by: Zhenzhong Duan +Reviewed-by: Joao Martins +Reviewed-by: Eric Auger +--- + hw/vfio/ap.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index e157aa1ff7..6b2bc32549 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -234,6 +234,9 @@ static void vfio_ap_instance_init(Object *obj) + */ + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, + DEVICE(vapdev), true); ++ ++ /* AP device is mdev type device */ ++ vbasedev->mdev = true; + } + + #ifdef CONFIG_IOMMUFD +-- +2.41.0.windows.1 + diff --git a/vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch b/vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch new file mode 100644 index 0000000000000000000000000000000000000000..f799c6f9ae969a364e40a3c18c97bad9ce10e707 --- /dev/null +++ b/vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch @@ -0,0 +1,78 @@ +From e4e2a6414eabe80d0d9f57446626c91c55b40afa Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:51 +0800 +Subject: [PATCH] vfio/ap: Make vfio cdev pre-openable by passing a file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/ap.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index 80629609ae..f180e4a32a 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -160,7 +160,10 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); + VFIODevice *vbasedev = &vapdev->vdev; + +- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); ++ if (vfio_device_get_name(vbasedev, errp) < 0) { ++ return; ++ } ++ + vbasedev->ops = &vfio_ap_ops; + vbasedev->type = VFIO_DEVICE_TYPE_AP; + vbasedev->dev = dev; +@@ -230,11 +233,28 @@ static const VMStateDescription vfio_ap_vmstate = { + .unmigratable = 1, + }; + ++static void vfio_ap_instance_init(Object *obj) ++{ ++ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); ++ ++ vapdev->vdev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_ap_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_AP_DEVICE(obj)->vdev, str, errp); ++} ++#endif ++ + static void vfio_ap_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, vfio_ap_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_ap_set_fd); ++#endif + dc->vmsd = &vfio_ap_vmstate; + dc->desc = "VFIO-based AP device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +@@ -249,6 +269,7 @@ static const TypeInfo vfio_ap_info = { + .name = TYPE_VFIO_AP_DEVICE, + .parent = TYPE_AP_DEVICE, + .instance_size = sizeof(VFIOAPDevice), ++ .instance_init = vfio_ap_instance_init, + .class_init = vfio_ap_class_init, + }; + +-- +2.41.0.windows.1 + diff --git a/vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch b/vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ae9791865ad777e509f51bdb390699adc1b04e5 --- /dev/null +++ b/vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch @@ -0,0 +1,73 @@ +From 69da3907dc07bdb3cab4519922842820388bac4c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:53:00 +0800 +Subject: [PATCH] vfio/ap: Move VFIODevice initializations in + vfio_ap_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some of the VFIODevice initializations is in vfio_ap_realize, +move all of them in vfio_ap_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/ap.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index f180e4a32a..95fe7cd98b 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -164,18 +164,6 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + return; + } + +- vbasedev->ops = &vfio_ap_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_AP; +- vbasedev->dev = dev; +- +- /* +- * vfio-ap devices operate in a way compatible with discarding of +- * memory in RAM blocks, as no pages are pinned in the host. +- * This needs to be set before vfio_get_device() for vfio common to +- * handle ram_block_discard_disable(). +- */ +- vapdev->vdev.ram_block_discard_allowed = true; +- + ret = vfio_attach_device(vbasedev->name, vbasedev, + &address_space_memory, errp); + if (ret) { +@@ -236,8 +224,20 @@ static const VMStateDescription vfio_ap_vmstate = { + static void vfio_ap_instance_init(Object *obj) + { + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); ++ VFIODevice *vbasedev = &vapdev->vdev; + +- vapdev->vdev.fd = -1; ++ vbasedev->type = VFIO_DEVICE_TYPE_AP; ++ vbasedev->ops = &vfio_ap_ops; ++ vbasedev->dev = DEVICE(vapdev); ++ vbasedev->fd = -1; ++ ++ /* ++ * vfio-ap devices operate in a way compatible with discarding of ++ * memory in RAM blocks, as no pages are pinned in the host. ++ * This needs to be set before vfio_get_device() for vfio common to ++ * handle ram_block_discard_disable(). ++ */ ++ vbasedev->ram_block_discard_allowed = true; + } + + #ifdef CONFIG_IOMMUFD +-- +2.41.0.windows.1 + diff --git a/vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch b/vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch new file mode 100644 index 0000000000000000000000000000000000000000..beb4a137d1581c197ba2aa98960824c897248dc1 --- /dev/null +++ b/vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch @@ -0,0 +1,70 @@ +From 5e743a2f7791f4fb3eea40806ca69f6cce1258c2 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:52 +0800 +Subject: [PATCH] vfio/ccw: Allow the selection of a given iommu backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-ccw device: + +If the user wants to use the legacy backend, it shall not +link the vfio-ccw device with any iommufd object: + + -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-ccw device options: + + -object iommufd,id=iommufd0 + -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/ccw.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index d857bb8d0f..d2d58bb677 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -15,12 +15,14 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + #include + + #include "qapi/error.h" + #include "hw/vfio/vfio-common.h" ++#include "sysemu/iommufd.h" + #include "hw/s390x/s390-ccw.h" + #include "hw/s390x/vfio-ccw.h" + #include "hw/qdev-properties.h" +@@ -677,6 +679,10 @@ static void vfio_ccw_unrealize(DeviceState *dev) + static Property vfio_ccw_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), + DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOCCWDevice, vdev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.41.0.windows.1 + diff --git a/vfio-ccw-Don-t-initialize-HOST_IOMMU_DEVICE-with-mde.patch b/vfio-ccw-Don-t-initialize-HOST_IOMMU_DEVICE-with-mde.patch new file mode 100644 index 0000000000000000000000000000000000000000..ab58961a272e330760c83b6f9a8e231ab361db63 --- /dev/null +++ b/vfio-ccw-Don-t-initialize-HOST_IOMMU_DEVICE-with-mde.patch @@ -0,0 +1,36 @@ +From ffcda8cc141e14528fd73aea750be822575eedcc Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 22 Jul 2024 15:07:13 +0800 +Subject: [PATCH] vfio/ccw: Don't initialize HOST_IOMMU_DEVICE with mdev + +mdevs aren't "physical" devices and when asking for backing IOMMU info, +it fails the entire provisioning of the guest. Fix that by setting +vbasedev->mdev true so skipping HostIOMMUDevice initialization in the +presence of mdevs. + +Fixes: 930589520128 ("vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler") +Signed-off-by: Zhenzhong Duan +Reviewed-by: Joao Martins +Acked-by: Eric Farman +Reviewed-by: Eric Auger +--- + hw/vfio/ccw.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 90e4a53437..257e9723cf 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -683,6 +683,9 @@ static void vfio_ccw_instance_init(Object *obj) + VFIOCCWDevice *vcdev = VFIO_CCW(obj); + VFIODevice *vbasedev = &vcdev->vdev; + ++ /* CCW device is mdev type device */ ++ vbasedev->mdev = true; ++ + /* + * All vfio-ccw devices are believed to operate in a way compatible with + * discarding of memory in RAM blocks, ie. pages pinned in the host are +-- +2.41.0.windows.1 + diff --git a/vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch new file mode 100644 index 0000000000000000000000000000000000000000..23fd6aab97666125369f42e6b7ef578efd8b20aa --- /dev/null +++ b/vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch @@ -0,0 +1,85 @@ +From 0f9545907220680ee7e85a823a0e19b216a8b7d9 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:53 +0800 +Subject: [PATCH] vfio/ccw: Make vfio cdev pre-openable by passing a file + handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/ccw.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index d2d58bb677..2afdf17dbe 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -590,11 +590,12 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + } + } + ++ if (vfio_device_get_name(vbasedev, errp) < 0) { ++ return; ++ } ++ + vbasedev->ops = &vfio_ccw_ops; + vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, +- vcdev->cdev.hostid.ssid, +- vcdev->cdev.hostid.devid); + vbasedev->dev = dev; + + /* +@@ -691,12 +692,29 @@ static const VMStateDescription vfio_ccw_vmstate = { + .unmigratable = 1, + }; + ++static void vfio_ccw_instance_init(Object *obj) ++{ ++ VFIOCCWDevice *vcdev = VFIO_CCW(obj); ++ ++ vcdev->vdev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_ccw_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_CCW(obj)->vdev, str, errp); ++} ++#endif ++ + static void vfio_ccw_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + device_class_set_props(dc, vfio_ccw_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_ccw_set_fd); ++#endif + dc->vmsd = &vfio_ccw_vmstate; + dc->desc = "VFIO-based subchannel assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +@@ -714,6 +732,7 @@ static const TypeInfo vfio_ccw_info = { + .name = TYPE_VFIO_CCW, + .parent = TYPE_S390_CCW, + .instance_size = sizeof(VFIOCCWDevice), ++ .instance_init = vfio_ccw_instance_init, + .class_init = vfio_ccw_class_init, + }; + +-- +2.41.0.windows.1 + diff --git a/vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch b/vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch new file mode 100644 index 0000000000000000000000000000000000000000..2630a2fcaafb0f5cf9dc81dde900e054c9a6d405 --- /dev/null +++ b/vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch @@ -0,0 +1,77 @@ +From 4d12d39e824a35014f753a25e5aa8ec0e275a38c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:53:01 +0800 +Subject: [PATCH] vfio/ccw: Move VFIODevice initializations in + vfio_ccw_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some of the VFIODevice initializations is in vfio_ccw_realize, +move all of them in vfio_ccw_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/ccw.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 2afdf17dbe..6305a4c1b8 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -594,20 +594,6 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + return; + } + +- vbasedev->ops = &vfio_ccw_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->dev = dev; +- +- /* +- * All vfio-ccw devices are believed to operate in a way compatible with +- * discarding of memory in RAM blocks, ie. pages pinned in the host are +- * in the current working set of the guest driver and therefore never +- * overlap e.g., with pages available to the guest balloon driver. This +- * needs to be set before vfio_get_device() for vfio common to handle +- * ram_block_discard_disable(). +- */ +- vbasedev->ram_block_discard_allowed = true; +- + ret = vfio_attach_device(cdev->mdevid, vbasedev, + &address_space_memory, errp); + if (ret) { +@@ -695,8 +681,22 @@ static const VMStateDescription vfio_ccw_vmstate = { + static void vfio_ccw_instance_init(Object *obj) + { + VFIOCCWDevice *vcdev = VFIO_CCW(obj); ++ VFIODevice *vbasedev = &vcdev->vdev; ++ ++ vbasedev->type = VFIO_DEVICE_TYPE_CCW; ++ vbasedev->ops = &vfio_ccw_ops; ++ vbasedev->dev = DEVICE(vcdev); ++ vbasedev->fd = -1; + +- vcdev->vdev.fd = -1; ++ /* ++ * All vfio-ccw devices are believed to operate in a way compatible with ++ * discarding of memory in RAM blocks, ie. pages pinned in the host are ++ * in the current working set of the guest driver and therefore never ++ * overlap e.g., with pages available to the guest balloon driver. This ++ * needs to be set before vfio_get_device() for vfio common to handle ++ * ram_block_discard_disable(). ++ */ ++ vbasedev->ram_block_discard_allowed = true; + } + + #ifdef CONFIG_IOMMUFD +-- +2.41.0.windows.1 + diff --git a/vfio-common-Allow-disabling-device-dirty-page-tracki.patch b/vfio-common-Allow-disabling-device-dirty-page-tracki.patch new file mode 100644 index 0000000000000000000000000000000000000000..f019d6e34973bf183ecbc160455d77899a8443d6 --- /dev/null +++ b/vfio-common-Allow-disabling-device-dirty-page-tracki.patch @@ -0,0 +1,81 @@ +From b0fe5a6794c5403f4ab9859ec2ced338246690bd Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:26 +0100 +Subject: [PATCH] vfio/common: Allow disabling device dirty page tracking + +The property 'x-pre-copy-dirty-page-tracking' allows disabling the whole +tracking of VF pre-copy phase of dirty page tracking, though it means +that it will only be used at the start of the switchover phase. + +Add an option that disables the VF dirty page tracking, and fall +back into container-based dirty page tracking. This also allows to +use IOMMU dirty tracking even on VFs with their own dirty +tracker scheme. + +Signed-off-by: Joao Martins +Reviewed-by: Zhenzhong Duan +--- + hw/vfio/common.c | 3 +++ + hw/vfio/migration.c | 4 +++- + hw/vfio/pci.c | 3 +++ + include/hw/vfio/vfio-common.h | 1 + + 4 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 65e1c9f810..a8bc1c6055 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -208,6 +208,9 @@ bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer) + VFIODevice *vbasedev; + + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { ++ if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) { ++ return false; ++ } + if (!vbasedev->dirty_pages_supported) { + return false; + } +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index db128204af..3924beb289 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -945,7 +945,9 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + return !vfio_block_migration(vbasedev, err, errp); + } + +- if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) { ++ if ((!vbasedev->dirty_pages_supported || ++ vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) && ++ !vbasedev->iommu_dirty_tracking) { + if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { + error_setg(&err, + "%s: VFIO device doesn't support device and " +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 19211f4368..f585f285f4 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3350,6 +3350,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, + vbasedev.pre_copy_dirty_page_tracking, + ON_OFF_AUTO_ON), ++ DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice, ++ vbasedev.device_dirty_page_tracking, ++ ON_OFF_AUTO_ON), + DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice, + display, ON_OFF_AUTO_OFF), + DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0), +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 22a7386591..abae8655c4 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -147,6 +147,7 @@ typedef struct VFIODevice { + VFIOMigration *migration; + Error *migration_blocker; + OnOffAuto pre_copy_dirty_page_tracking; ++ OnOffAuto device_dirty_page_tracking; + bool dirty_pages_supported; + bool dirty_tracking; + bool iommu_dirty_tracking; +-- +2.41.0.windows.1 + diff --git a/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch b/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch deleted file mode 100644 index efcbd1fd03162efd34a1c11bc169e39da757da6b..0000000000000000000000000000000000000000 --- a/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 55f3bdd0866be2b1a6223bacf9e00a032daf957c Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Sat, 31 Jul 2021 10:02:18 +0800 -Subject: [PATCH] vfio/common: Avoid unmap ram section at - vfio_listener_region_del() in nested mode - -The ram section will be unmapped at vfio_prereg_listener_region_del() -in nested mode. So let's avoid unmap ram section at -vfio_listener_region_dev(). - -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 98dc9e6f84..21a866e545 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1179,6 +1179,16 @@ static void vfio_listener_region_del(MemoryListener *listener, - } - } - -+ /* -+ * In nested mode, stage 2 (gpa->hpa) and the stage 1 -+ * (giova->gpa) are set separately. The ram section -+ * will be unmapped in vfio_prereg_listener_region_del(). -+ * Hence it doesn't need to unmap ram section here. -+ */ -+ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { -+ return; -+ } -+ - /* - * FIXME: We assume the one big unmap below is adequate to - * remove any individual page mappings in the IOMMU which --- -2.27.0 - diff --git a/vfio-common-Introduce-vfio_container_init-destroy-he.patch b/vfio-common-Introduce-vfio_container_init-destroy-he.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ae516a32a649db7647d3feadc2833dfd3a1fd1b --- /dev/null +++ b/vfio-common-Introduce-vfio_container_init-destroy-he.patch @@ -0,0 +1,89 @@ +From ff4e67fa5ceb31f1dc686a661cbf37c1a81cd644 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:21 +0800 +Subject: [PATCH] vfio/common: Introduce vfio_container_init/destroy helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This adds two helper functions vfio_container_init/destroy which will be +used by both legacy and iommufd containers to do base container specific +initialization and release. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/container-base.c | 9 +++++++++ + hw/vfio/container.c | 4 +++- + include/hw/vfio/vfio-container-base.h | 4 ++++ + 3 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 55d3a35fa4..e929435751 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -30,3 +30,12 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + g_assert(bcontainer->ops->dma_unmap); + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } ++ ++void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) ++{ ++ bcontainer->ops = ops; ++} ++ ++void vfio_container_destroy(VFIOContainerBase *bcontainer) ++{ ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 40e378e888..5a8c55056b 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -653,7 +653,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + QLIST_INIT(&container->vrdl_list); + QLIST_INIT(&container->dma_list); + bcontainer = &container->bcontainer; +- bcontainer->ops = &vfio_legacy_ops; ++ vfio_container_init(bcontainer, &vfio_legacy_ops); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -765,6 +765,7 @@ put_space_exit: + static void vfio_disconnect_container(VFIOGroup *group) + { + VFIOContainer *container = group->container; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + + QLIST_REMOVE(group, container_next); + group->container = NULL; +@@ -803,6 +804,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } ++ vfio_container_destroy(bcontainer); + + trace_vfio_disconnect_container(container->fd); + close(container->fd); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 56b033f59f..577f52ccbc 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -38,6 +38,10 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); + ++void vfio_container_init(VFIOContainerBase *bcontainer, ++ const VFIOIOMMUOps *ops); ++void vfio_container_destroy(VFIOContainerBase *bcontainer); ++ + struct VFIOIOMMUOps { + /* basic feature */ + int (*dma_map)(VFIOContainerBase *bcontainer, +-- +2.41.0.windows.1 + diff --git a/vfio-common-Move-giommu_list-in-base-container.patch b/vfio-common-Move-giommu_list-in-base-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..a517cab68a98f0b5e4a1714ee2997a9115c494e6 --- /dev/null +++ b/vfio-common-Move-giommu_list-in-base-container.patch @@ -0,0 +1,213 @@ +From 350f1a4d221849cc26a6d3950c128f951648c391 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:22 +0800 +Subject: [PATCH] vfio/common: Move giommu_list in base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Move the giommu_list field in the base container and store +the base container in the VFIOGuestIOMMU. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 17 +++++++++++------ + hw/vfio/container-base.c | 9 +++++++++ + hw/vfio/container.c | 8 -------- + include/hw/vfio/vfio-common.h | 9 --------- + include/hw/vfio/vfio-container-base.h | 9 +++++++++ + 5 files changed, 29 insertions(+), 23 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index ea63271167..b8007b22c3 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +- VFIOContainerBase *bcontainer = &giommu->container->bcontainer; ++ VFIOContainerBase *bcontainer = giommu->bcontainer; + hwaddr iova = iotlb->iova + giommu->iommu_offset; + void *vaddr; + int ret; +@@ -569,6 +569,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -612,7 +613,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + giommu->iommu_mr = iommu_mr; + giommu->iommu_offset = section->offset_within_address_space - + section->offset_within_region; +- giommu->container = container; ++ giommu->bcontainer = bcontainer; + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); +@@ -647,7 +648,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + g_free(giommu); + goto fail; + } +- QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); ++ QLIST_INSERT_HEAD(&bcontainer->giommu_list, giommu, giommu_next); + memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); + + return; +@@ -732,6 +733,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -744,7 +746,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + +- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + memory_region_unregister_iommu_notifier(section->mr, +@@ -1211,7 +1213,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + vfio_giommu_dirty_notifier *gdn = container_of(n, + vfio_giommu_dirty_notifier, n); + VFIOGuestIOMMU *giommu = gdn->giommu; +- VFIOContainer *container = giommu->container; ++ VFIOContainerBase *bcontainer = giommu->bcontainer; ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova = iotlb->iova + giommu->iommu_offset; + ram_addr_t translated_addr; + int ret = -EINVAL; +@@ -1289,12 +1293,13 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, + static int vfio_sync_dirty_bitmap(VFIOContainer *container, + MemoryRegionSection *section) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + ram_addr_t ram_addr; + + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + +- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + Int128 llend; +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index e929435751..20bcb9669a 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -34,8 +34,17 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; ++ QLIST_INIT(&bcontainer->giommu_list); + } + + void vfio_container_destroy(VFIOContainerBase *bcontainer) + { ++ VFIOGuestIOMMU *giommu, *tmp; ++ ++ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { ++ memory_region_unregister_iommu_notifier( ++ MEMORY_REGION(giommu->iommu_mr), &giommu->n); ++ QLIST_REMOVE(giommu, giommu_next); ++ g_free(giommu); ++ } + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 5a8c55056b..03791601d0 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -649,7 +649,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->dirty_pages_supported = false; + container->dma_max_mappings = 0; + container->iova_ranges = NULL; +- QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); + QLIST_INIT(&container->dma_list); + bcontainer = &container->bcontainer; +@@ -794,16 +793,9 @@ static void vfio_disconnect_container(VFIOGroup *group) + + if (QLIST_EMPTY(&container->group_list)) { + VFIOAddressSpace *space = container->space; +- VFIOGuestIOMMU *giommu, *tmp; + + QLIST_REMOVE(container, next); + +- QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { +- memory_region_unregister_iommu_notifier( +- MEMORY_REGION(giommu->iommu_mr), &giommu->n); +- QLIST_REMOVE(giommu, giommu_next); +- g_free(giommu); +- } + vfio_container_destroy(bcontainer); + + trace_vfio_disconnect_container(container->fd); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index f94baf72db..6f02952ff6 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -104,7 +104,6 @@ typedef struct VFIOContainer { + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + unsigned int dma_max_mappings; +- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +@@ -114,14 +113,6 @@ typedef struct VFIOContainer { + GList *iova_ranges; + } VFIOContainer; + +-typedef struct VFIOGuestIOMMU { +- VFIOContainer *container; +- IOMMUMemoryRegion *iommu_mr; +- hwaddr iommu_offset; +- IOMMUNotifier n; +- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; +-} VFIOGuestIOMMU; +- + typedef struct VFIORamDiscardListener { + VFIOContainer *container; + MemoryRegion *mr; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 577f52ccbc..a11aec5755 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -29,8 +29,17 @@ typedef struct { + */ + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; ++ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + } VFIOContainerBase; + ++typedef struct VFIOGuestIOMMU { ++ VFIOContainerBase *bcontainer; ++ IOMMUMemoryRegion *iommu_mr; ++ hwaddr iommu_offset; ++ IOMMUNotifier n; ++ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; ++} VFIOGuestIOMMU; ++ + int vfio_container_dma_map(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.41.0.windows.1 + diff --git a/vfio-common-return-early-if-space-isn-t-empty.patch b/vfio-common-return-early-if-space-isn-t-empty.patch new file mode 100644 index 0000000000000000000000000000000000000000..e3b3009352f8230930a4ece1088b83332c356571 --- /dev/null +++ b/vfio-common-return-early-if-space-isn-t-empty.patch @@ -0,0 +1,47 @@ +From bf4c408cd5d3daadbfd11136655e5bcb40dcbba0 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:39 +0800 +Subject: [PATCH] vfio/common: return early if space isn't empty +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is a trivial optimization. If there is active container in space, +vfio_reset_handler will never be unregistered. So revert the check of +space->containers and return early. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 679fee4321..f6c2029aec 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1608,10 +1608,13 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) + + void vfio_put_address_space(VFIOAddressSpace *space) + { +- if (QLIST_EMPTY(&space->containers)) { +- QLIST_REMOVE(space, list); +- g_free(space); ++ if (!QLIST_EMPTY(&space->containers)) { ++ return; + } ++ ++ QLIST_REMOVE(space, list); ++ g_free(space); ++ + if (QLIST_EMPTY(&vfio_address_spaces)) { + qemu_unregister_reset(vfio_reset_handler, NULL); + } +-- +2.41.0.windows.1 + diff --git a/vfio-container-Convert-functions-to-base-container.patch b/vfio-container-Convert-functions-to-base-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..1aeb934d7ee566085d45f819d7ca13d1d9656cbe --- /dev/null +++ b/vfio-container-Convert-functions-to-base-container.patch @@ -0,0 +1,263 @@ +From 718cfbf181541fa4142aba10d5aee839e06b4d66 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:26 +0800 +Subject: [PATCH] vfio/container: Convert functions to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In the prospect to get rid of VFIOContainer refs +in common.c lets convert misc functions to use the base +container object instead: + +vfio_devices_all_dirty_tracking +vfio_devices_all_device_dirty_tracking +vfio_devices_all_running_and_mig_active +vfio_devices_query_dirty_bitmap +vfio_get_dirty_bitmap + +Modify vfio_get_dirty_bitmap/vfio_listener_log_clear during backporting. + +Signed-off-by: Eric Auger +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 46 ++++++++++++++++------------------- + hw/vfio/container.c | 6 ++--- + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 9 +++---- + 4 files changed, 29 insertions(+), 34 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b952d1c811..b663d0bcc0 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -177,9 +177,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) + migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P; + } + +-static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) ++static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +@@ -204,9 +203,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + return true; + } + +-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) ++bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { +@@ -222,9 +220,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) ++bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + if (!migration_is_active(migrate_get_current())) { +@@ -1082,7 +1079,7 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + int ret; + +- if (vfio_devices_all_device_dirty_tracking(container)) { ++ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { + ret = vfio_devices_dma_logging_start(container); + } else { + ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +@@ -1101,7 +1098,7 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + int ret = 0; + +- if (vfio_devices_all_device_dirty_tracking(container)) { ++ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { + vfio_devices_dma_logging_stop(container); + } else { + ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +@@ -1141,11 +1138,10 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, + return 0; + } + +-int vfio_devices_query_dirty_bitmap(VFIOContainer *container, ++int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret; + +@@ -1165,18 +1161,19 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + return 0; + } + +-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) + { + bool all_device_dirty_tracking = +- vfio_devices_all_device_dirty_tracking(container); ++ vfio_devices_all_device_dirty_tracking(bcontainer); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + uint64_t dirty_pages; + VFIOBitmap vbmap; + VFIODMARange *qrange; + int ret; + +- if (!container->bcontainer.dirty_pages_supported && +- !all_device_dirty_tracking) { ++ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); +@@ -1195,10 +1192,9 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + vbmap.bitmap = qrange->bitmap; + + if (all_device_dirty_tracking) { +- ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); ++ ret = vfio_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size); + } else { +- ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, +- iova, size); ++ ret = vfio_container_query_dirty_bitmap(bcontainer, &vbmap, iova, size); + } + + if (ret) { +@@ -1208,8 +1204,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, + vbmap.pages); + +- trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, +- ram_addr, dirty_pages); ++ trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, dirty_pages); + out: + return ret; + } +@@ -1241,8 +1236,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + + rcu_read_lock(); + if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { +- ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, +- translated_addr); ++ ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, ++ iotlb->addr_mask + 1, translated_addr); + if (ret) { + error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +@@ -1271,7 +1266,8 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, + * Sync the whole mapped region (spanning multiple individual mappings) + * in one go. + */ +- return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr); ++ return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, ++ ram_addr); + } + + static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, +@@ -1340,7 +1336,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +- return vfio_get_dirty_bitmap(container, ++ return vfio_get_dirty_bitmap(&container->bcontainer, + REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), + int128_get64(section->size), ram_addr); + } +@@ -1355,7 +1351,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_dirty_tracking(container)) { ++ if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { + ret = vfio_sync_dirty_bitmap(container, section); + if (ret) { + error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, +@@ -1495,7 +1491,7 @@ static void vfio_listener_log_clear(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_dirty_tracking(container)) { ++ if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { + vfio_physical_log_clear(container, section); + } + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 74d236ddee..9a542368ab 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -155,8 +155,8 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + bool need_dirty_sync = false; + int ret; + +- if (iotlb && vfio_devices_all_running_and_mig_active(container)) { +- if (!vfio_devices_all_device_dirty_tracking(container) && ++ if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { ++ if (!vfio_devices_all_device_dirty_tracking(bcontainer) && + container->bcontainer.dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } +@@ -204,7 +204,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + } + + if (need_dirty_sync) { +- ret = vfio_get_dirty_bitmap(container, iova, size, ++ ret = vfio_get_dirty_bitmap(bcontainer, iova, size, + iotlb->translated_addr); + if (ret) { + return ret; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 9f7fedee98..08a1f9dfa4 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -117,7 +117,7 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" + vfio_legacy_dma_unmap_overflow_workaround(void) "" +-vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 ++vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 + vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 + + # platform.c +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index e27854228c..0295ede7ba 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -196,7 +196,6 @@ typedef struct VFIODisplay { + + VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); +-bool vfio_devices_all_running_and_saving(VFIOContainer *container); + + VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, + hwaddr start_addr, hwaddr size); +@@ -274,11 +273,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container); +-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container); +-int vfio_devices_query_dirty_bitmap(VFIOContainer *container, ++bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); ++bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); ++int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-container-Implement-HostIOMMUDeviceClass-get_ca.patch b/vfio-container-Implement-HostIOMMUDeviceClass-get_ca.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6ec9243db7ea417cbea8a444de6c655698d63a1 --- /dev/null +++ b/vfio-container-Implement-HostIOMMUDeviceClass-get_ca.patch @@ -0,0 +1,51 @@ +From b6830d3caff821b2472e369042c169935c906ef2 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:35 +0800 +Subject: [PATCH] vfio/container: Implement HostIOMMUDeviceClass::get_cap() + handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/vfio/container.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index fbe2bc50d4..ed54ce6d0c 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1261,11 +1261,26 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + return true; + } + ++static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, ++ Error **errp) ++{ ++ HostIOMMUDeviceCaps *caps = &hiod->caps; ++ ++ switch (cap) { ++ case HOST_IOMMU_DEVICE_CAP_AW_BITS: ++ return caps->aw_bits; ++ default: ++ error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); ++ return -EINVAL; ++ } ++} ++ + static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) + { + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->realize = hiod_legacy_vfio_realize; ++ hioc->get_cap = hiod_legacy_vfio_get_cap; + }; + + static const TypeInfo types[] = { +-- +2.41.0.windows.1 + diff --git a/vfio-container-Implement-HostIOMMUDeviceClass-realiz.patch b/vfio-container-Implement-HostIOMMUDeviceClass-realiz.patch new file mode 100644 index 0000000000000000000000000000000000000000..802f46c515958a2a7c8d3a13b555be04e5494c1b --- /dev/null +++ b/vfio-container-Implement-HostIOMMUDeviceClass-realiz.patch @@ -0,0 +1,100 @@ +From c66d22fa4ee9f6f38193256d7ce1494c32e10581 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:32 +0800 +Subject: [PATCH] vfio/container: Implement HostIOMMUDeviceClass::realize() + handler + +The realize function populates the capabilities. For now only the +aw_bits caps is computed for legacy backend. + +Introduce a helper function vfio_device_get_aw_bits() which calls +range_get_last_bit() to get host aw_bits and package it in +HostIOMMUDeviceCaps for query with .get_cap(). This helper will +also be used by iommufd backend. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/vfio/container.c | 20 +++++++++++++++++++- + hw/vfio/helpers.c | 17 +++++++++++++++++ + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 37 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index dcf49af2d0..fbe2bc50d4 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1250,6 +1250,24 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) + vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; + }; + ++static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, ++ Error **errp) ++{ ++ VFIODevice *vdev = opaque; ++ ++ hiod->name = g_strdup(vdev->name); ++ hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); ++ ++ return true; ++} ++ ++static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) ++{ ++ HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); ++ ++ hioc->realize = hiod_legacy_vfio_realize; ++}; ++ + static const TypeInfo types[] = { + { + .name = TYPE_VFIO_IOMMU_LEGACY, +@@ -1258,8 +1276,8 @@ static const TypeInfo types[] = { + }, { + .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE, ++ .class_init = hiod_legacy_vfio_class_init, + } +- + }; + + DEFINE_TYPES(types) +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 6789870802..35b8e42304 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -663,3 +663,20 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, + + vbasedev->ram_block_discard_allowed = ram_discard; + } ++ ++int vfio_device_get_aw_bits(VFIODevice *vdev) ++{ ++ /* ++ * iova_ranges is a sorted list. For old kernels that support ++ * VFIO but not support query of iova ranges, iova_ranges is NULL, ++ * in this case HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX(64) is returned. ++ */ ++ GList *l = g_list_last(vdev->bcontainer->iova_ranges); ++ ++ if (l) { ++ Range *range = l->data; ++ return range_get_last_bit(range) + 1; ++ } ++ ++ return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX; ++} +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 2cfc8521cd..376b8350b9 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -277,4 +277,5 @@ int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); + void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); + void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, + DeviceState *dev, bool ram_discard); ++int vfio_device_get_aw_bits(VFIODevice *vdev); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-container-Implement-attach-detach_device.patch b/vfio-container-Implement-attach-detach_device.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ce52489a87db68bec4ecdc29d3e643c717aa9d7 --- /dev/null +++ b/vfio-container-Implement-attach-detach_device.patch @@ -0,0 +1,89 @@ +From 1ba796aff9476e5850df910304eb3720a09feef2 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:32 +0800 +Subject: [PATCH] vfio/container: Implement attach/detach_device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 16 ++++++++++++++++ + hw/vfio/container.c | 12 +++++------- + 2 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 9926454527..488aa43c9b 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1644,3 +1644,19 @@ retry: + + return info; + } ++ ++int vfio_attach_device(char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) ++{ ++ const VFIOIOMMUOps *ops = &vfio_legacy_ops; ++ ++ return ops->attach_device(name, vbasedev, as, errp); ++} ++ ++void vfio_detach_device(VFIODevice *vbasedev) ++{ ++ if (!vbasedev->bcontainer) { ++ return; ++ } ++ vbasedev->bcontainer->ops->detach_device(vbasedev); ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 13d42aad0d..62af0f2bdd 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -986,8 +986,8 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) + * @name and @vbasedev->name are likely to be different depending + * on the type of the device, hence the need for passing @name + */ +-int vfio_attach_device(char *name, VFIODevice *vbasedev, +- AddressSpace *as, Error **errp) ++static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) + { + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; +@@ -1027,14 +1027,10 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + return ret; + } + +-void vfio_detach_device(VFIODevice *vbasedev) ++static void vfio_legacy_detach_device(VFIODevice *vbasedev) + { + VFIOGroup *group = vbasedev->group; + +- if (!vbasedev->bcontainer) { +- return; +- } +- + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); + vbasedev->bcontainer = NULL; +@@ -1046,6 +1042,8 @@ void vfio_detach_device(VFIODevice *vbasedev) + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, ++ .attach_device = vfio_legacy_attach_device, ++ .detach_device = vfio_legacy_detach_device, + .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, + .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, + }; +-- +2.41.0.windows.1 + diff --git a/vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch b/vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7eb4bceb3f045431333b1bdba78d2018eeda8c9 --- /dev/null +++ b/vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch @@ -0,0 +1,55 @@ +From 7a81c3919dda48b4e12b83ceb661896523cce6ab Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:18 +0100 +Subject: [PATCH] vfio/container: Initialize VFIOIOMMUOps under + vfio_init_container() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +vfio_init_container() already defines the IOMMU type of the container. +Do the same for the VFIOIOMMUOps struct. This prepares ground for the +following patches that will deduce the associated VFIOIOMMUOps struct +from the IOMMU type. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/container.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 27ce31c883..dc805ceb12 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -430,7 +430,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, + } + + static int vfio_init_container(VFIOContainer *container, int group_fd, +- Error **errp) ++ VFIOAddressSpace *space, Error **errp) + { + int iommu_type, dirty_log_manual_clear, ret; + +@@ -467,7 +467,7 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + if (dirty_log_manual_clear) { + container->dirty_log_manual_clear = dirty_log_manual_clear; + } +- ++ vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); + return 0; + } + +@@ -679,7 +679,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); + +- ret = vfio_init_container(container, group->fd, errp); ++ ret = vfio_init_container(container, group->fd, space, errp); + if (ret) { + goto free_container_exit; + } +-- +2.41.0.windows.1 + diff --git a/vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch b/vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e9f9387f02530134e1f776b39e67fcadabd51fd --- /dev/null +++ b/vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch @@ -0,0 +1,45 @@ +From b8e67d06ec3036cd3fd6d625c550e0c542e49d60 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:21 +0100 +Subject: [PATCH] vfio/container: Intoduce a new VFIOIOMMUClass::setup handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This will help in converting the sPAPR IOMMU backend to a QOM interface. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/container.c | 1 + + include/hw/vfio/vfio-container-base.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 6b8de8f471..845239eff4 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1248,6 +1248,7 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) + { + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + ++ vioc->setup = vfio_legacy_setup; + vioc->dma_map = vfio_legacy_dma_map; + vioc->dma_unmap = vfio_legacy_dma_unmap; + vioc->attach_device = vfio_legacy_attach_device; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index dce801378b..614de90767 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -109,6 +109,7 @@ struct VFIOIOMMUClass { + InterfaceClass parent_class; + + /* basic feature */ ++ int (*setup)(VFIOContainerBase *bcontainer, Error **errp); + int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.41.0.windows.1 + diff --git a/vfio-container-Introduce-TYPE_HOST_IOMMU_DEVICE_LEGA.patch b/vfio-container-Introduce-TYPE_HOST_IOMMU_DEVICE_LEGA.patch new file mode 100644 index 0000000000000000000000000000000000000000..c5be84d61d0ad2bc3ec6f09a5cb16b35529e1dbe --- /dev/null +++ b/vfio-container-Introduce-TYPE_HOST_IOMMU_DEVICE_LEGA.patch @@ -0,0 +1,65 @@ +From c253a07d9fe1598c4dbbb1cefee457806c417885 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:29 +0800 +Subject: [PATCH] vfio/container: Introduce TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO + device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO represents a host IOMMU device under +VFIO legacy container backend. + +It will have its own realize implementation. + +Suggested-by: Eric Auger +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/vfio/container.c | 6 +++++- + include/hw/vfio/vfio-common.h | 3 +++ + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 4c62f088b1..dcf49af2d0 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1255,7 +1255,11 @@ static const TypeInfo types[] = { + .name = TYPE_VFIO_IOMMU_LEGACY, + .parent = TYPE_VFIO_IOMMU, + .class_init = vfio_iommu_legacy_class_init, +- }, ++ }, { ++ .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, ++ .parent = TYPE_HOST_IOMMU_DEVICE, ++ } ++ + }; + + DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index f3966410c1..0c807c2806 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -31,6 +31,7 @@ + #endif + #include "sysemu/sysemu.h" + #include "hw/vfio/vfio-container-base.h" ++#include "sysemu/host_iommu_device.h" + + #define VFIO_MSG_PREFIX "vfio %s: " + +@@ -75,6 +76,8 @@ typedef struct VFIOMigration { + + struct VFIOGroup; + ++#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" ++ + typedef struct VFIODMARange { + QLIST_ENTRY(VFIODMARange) next; + hwaddr iova; +-- +2.41.0.windows.1 + diff --git a/vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch b/vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch new file mode 100644 index 0000000000000000000000000000000000000000..0dd9efbca45c79d6d82c374115739086cf2659fd --- /dev/null +++ b/vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch @@ -0,0 +1,134 @@ +From 5f62836c64d5abdbdb0d8fb9f0d2fd0d87f47b0a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:19 +0100 +Subject: [PATCH] vfio/container: Introduce a VFIOIOMMU QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +VFIOContainerBase was not introduced as an abstract QOM object because +it felt unnecessary to expose all the IOMMU backends to the QEMU +machine and human interface. However, we can still abstract the IOMMU +backend handlers using a QOM interface class. This provides more +flexibility when referencing the various implementations. + +Simply transform the VFIOIOMMUOps struct in an InterfaceClass and do +some initial name replacements. Next changes will start converting +VFIOIOMMUOps. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 2 +- + hw/vfio/container-base.c | 12 +++++++++++- + hw/vfio/pci.c | 2 +- + include/hw/vfio/vfio-container-base.h | 23 +++++++++++++++++++---- + 4 files changed, 32 insertions(+), 7 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index d572ec5880..abca6aa01a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1649,7 +1649,7 @@ retry: + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +- const VFIOIOMMUOps *ops = &vfio_legacy_ops; ++ const VFIOIOMMUClass *ops = &vfio_legacy_ops; + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 1ffd25bbfa..913ae49077 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -72,7 +72,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + } + + void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, +- const VFIOIOMMUOps *ops) ++ const VFIOIOMMUClass *ops) + { + bcontainer->ops = ops; + bcontainer->space = space; +@@ -99,3 +99,13 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + + g_list_free_full(bcontainer->iova_ranges, g_free); + } ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU, ++ .parent = TYPE_INTERFACE, ++ .class_size = sizeof(VFIOIOMMUClass), ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 1874ec1aba..d84a9e73a6 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2488,7 +2488,7 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + { + VFIODevice *vbasedev = &vdev->vbasedev; +- const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; ++ const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; + + return ops->pci_hot_reset(vbasedev, single); + } +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 2ae297ccda..ce8bf9e2e6 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -16,7 +16,8 @@ + #include "exec/memory.h" + + typedef struct VFIODevice VFIODevice; +-typedef struct VFIOIOMMUOps VFIOIOMMUOps; ++typedef struct VFIOIOMMUClass VFIOIOMMUClass; ++#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ + + typedef struct { + unsigned long *bitmap; +@@ -34,7 +35,7 @@ typedef struct VFIOAddressSpace { + * This is the base object for vfio container backends + */ + typedef struct VFIOContainerBase { +- const VFIOIOMMUOps *ops; ++ const VFIOIOMMUClass *ops; + VFIOAddressSpace *space; + MemoryListener listener; + Error *error; +@@ -88,10 +89,24 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + + void vfio_container_init(VFIOContainerBase *bcontainer, + VFIOAddressSpace *space, +- const VFIOIOMMUOps *ops); ++ const VFIOIOMMUClass *ops); + void vfio_container_destroy(VFIOContainerBase *bcontainer); + +-struct VFIOIOMMUOps { ++ ++#define TYPE_VFIO_IOMMU "vfio-iommu" ++ ++/* ++ * VFIOContainerBase is not an abstract QOM object because it felt ++ * unnecessary to expose all the IOMMU backends to the QEMU machine ++ * and human interface. However, we can still abstract the IOMMU ++ * backend handlers using a QOM interface class. This provides more ++ * flexibility when referencing the various implementations. ++ */ ++DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) ++ ++struct VFIOIOMMUClass { ++ InterfaceClass parent_class; ++ + /* basic feature */ + int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, +-- +2.41.0.windows.1 + diff --git a/vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch b/vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b2a1bb2a66afd16a80bafd08fa410fff270b22f --- /dev/null +++ b/vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch @@ -0,0 +1,166 @@ +From 9f04d045ef1b2d206b002d20b792111b3ce86909 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:20 +0100 +Subject: [PATCH] vfio/container: Introduce a VFIOIOMMU legacy QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Convert the legacy VFIOIOMMUOps struct to the new VFIOIOMMU QOM +interface. The set of of operations for this backend can be referenced +with a literal typename instead of a C struct. This will simplify +support of multiple backends. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 6 ++- + hw/vfio/container.c | 59 ++++++++++++++++++++++----- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 1 + + 4 files changed, 55 insertions(+), 12 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index abca6aa01a..d98c3b7422 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1649,13 +1649,17 @@ retry: + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +- const VFIOIOMMUClass *ops = &vfio_legacy_ops; ++ const VFIOIOMMUClass *ops = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { + ops = &vfio_iommufd_ops; + } + #endif ++ ++ assert(ops); ++ + return ops->attach_device(name, vbasedev, as, errp); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index dc805ceb12..6b8de8f471 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -429,10 +429,30 @@ static int vfio_get_iommu_type(VFIOContainer *container, + return -EINVAL; + } + ++/* ++ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type ++ */ ++static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) ++{ ++ ObjectClass *klass = NULL; ++ ++ switch (iommu_type) { ++ case VFIO_TYPE1v2_IOMMU: ++ case VFIO_TYPE1_IOMMU: ++ klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); ++ break; ++ default: ++ g_assert_not_reached(); ++ }; ++ ++ return VFIO_IOMMU_CLASS(klass); ++} ++ + static int vfio_init_container(VFIOContainer *container, int group_fd, + VFIOAddressSpace *space, Error **errp) + { + int iommu_type, dirty_log_manual_clear, ret; ++ const VFIOIOMMUClass *vioc; + + iommu_type = vfio_get_iommu_type(container, errp); + if (iommu_type < 0) { +@@ -467,7 +487,14 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + if (dirty_log_manual_clear) { + container->dirty_log_manual_clear = dirty_log_manual_clear; + } +- vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); ++ ++ vioc = vfio_get_iommu_class(iommu_type, errp); ++ if (!vioc) { ++ error_setg(errp, "No available IOMMU models"); ++ return -EINVAL; ++ } ++ ++ vfio_container_init(&container->bcontainer, space, vioc); + return 0; + } + +@@ -677,7 +704,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->fd = fd; + QLIST_INIT(&container->dma_list); + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, space, &vfio_legacy_ops); + + ret = vfio_init_container(container, group->fd, space, errp); + if (ret) { +@@ -1218,12 +1244,25 @@ out_single: + return ret; + } + +-const VFIOIOMMUOps vfio_legacy_ops = { +- .dma_map = vfio_legacy_dma_map, +- .dma_unmap = vfio_legacy_dma_unmap, +- .attach_device = vfio_legacy_attach_device, +- .detach_device = vfio_legacy_detach_device, +- .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, +- .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, +- .pci_hot_reset = vfio_legacy_pci_hot_reset, ++static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->dma_map = vfio_legacy_dma_map; ++ vioc->dma_unmap = vfio_legacy_dma_unmap; ++ vioc->attach_device = vfio_legacy_attach_device; ++ vioc->detach_device = vfio_legacy_detach_device; ++ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; ++ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; ++ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; + }; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_LEGACY, ++ .parent = TYPE_VFIO_IOMMU, ++ .class_init = vfio_iommu_legacy_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 151b2ab65f..f78a97006c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -224,7 +224,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +-extern const VFIOIOMMUOps vfio_legacy_ops; + extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index ce8bf9e2e6..dce801378b 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -94,6 +94,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + + #define TYPE_VFIO_IOMMU "vfio-iommu" ++#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.41.0.windows.1 + diff --git a/vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch b/vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6d40421e33683b740c99658146ea4c5dbc0aa4a --- /dev/null +++ b/vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch @@ -0,0 +1,63 @@ +From bda13dc55ae5e16174a4a611353f4bb8a590d510 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:19 +0800 +Subject: [PATCH] vfio/container: Introduce a empty VFIOIOMMUOps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This empty VFIOIOMMUOps named vfio_legacy_ops will hold all general +IOMMU ops of legacy container. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/container.c | 5 +++++ + include/hw/vfio/vfio-common.h | 2 +- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 77e61cfedd..8d8ed13e93 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -565,6 +565,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { + VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + int ret, fd; + VFIOAddressSpace *space; + +@@ -646,6 +647,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); + QLIST_INIT(&container->dma_list); ++ bcontainer = &container->bcontainer; ++ bcontainer->ops = &vfio_legacy_ops; + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -1046,3 +1049,5 @@ void vfio_detach_device(VFIODevice *vbasedev) + vfio_put_base_device(vbasedev); + vfio_put_group(group); + } ++ ++const VFIOIOMMUOps vfio_legacy_ops; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index c89b5886f2..3a0a6ab6ee 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -268,7 +268,7 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +- ++extern const VFIOIOMMUOps vfio_legacy_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +-- +2.41.0.windows.1 + diff --git a/vfio-container-Introduce-vfio_legacy_setup-for-furth.patch b/vfio-container-Introduce-vfio_legacy_setup-for-furth.patch new file mode 100644 index 0000000000000000000000000000000000000000..630d44324ae41876c0a64e89f08730d22cd172c7 --- /dev/null +++ b/vfio-container-Introduce-vfio_legacy_setup-for-furth.patch @@ -0,0 +1,108 @@ +From 1bb64d6e69c385af5817dc6f0c3bbd204783c237 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:17 +0100 +Subject: [PATCH] vfio/container: Introduce vfio_legacy_setup() for further + cleanups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This will help subsequent patches to unify the initialization of type1 +and sPAPR IOMMU backends. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/container.c | 60 +++++++++++++++++++++++++++------------------ + 1 file changed, 36 insertions(+), 24 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 67aeaa825b..27ce31c883 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -567,6 +567,35 @@ static void shared_memory_listener_unregister(void) + g_shl = NULL; + } + ++static int vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) ++{ ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ g_autofree struct vfio_iommu_type1_info *info = NULL; ++ int ret; ++ ++ ret = vfio_get_iommu_info(container, &info); ++ if (ret) { ++ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); ++ return ret; ++ } ++ ++ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { ++ bcontainer->pgsizes = info->iova_pgsizes; ++ } else { ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ } ++ ++ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { ++ bcontainer->dma_max_mappings = 65535; ++ } ++ ++ vfio_get_info_iova_range(info, bcontainer); ++ ++ vfio_get_iommu_info_migration(container, info); ++ return 0; ++} ++ + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -665,31 +694,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + case VFIO_TYPE1v2_S_IOMMU: +- { +- struct vfio_iommu_type1_info *info; +- +- ret = vfio_get_iommu_info(container, &info); +- if (ret) { +- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); +- goto enable_discards_exit; +- } +- +- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { +- bcontainer->pgsizes = info->iova_pgsizes; +- } else { +- bcontainer->pgsizes = qemu_real_host_page_size(); +- } +- +- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { +- bcontainer->dma_max_mappings = 65535; +- } +- +- vfio_get_info_iova_range(info, bcontainer); +- +- vfio_get_iommu_info_migration(container, info); +- g_free(info); ++ ret = vfio_legacy_setup(bcontainer, errp); + break; +- } + case VFIO_SPAPR_TCE_v2_IOMMU: + case VFIO_SPAPR_TCE_IOMMU: + { +@@ -699,6 +705,12 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + } + break; + } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (ret) { ++ goto enable_discards_exit; + } + + vfio_kvm_device_add_group(group); +-- +2.41.0.windows.1 + diff --git a/vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch b/vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch new file mode 100644 index 0000000000000000000000000000000000000000..b0b9ee7fae24c2526a3e99fbb8419b4d8e07150e --- /dev/null +++ b/vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch @@ -0,0 +1,94 @@ +From a59131a461adf9b626735886a53825e2a03f3272 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:30 +0800 +Subject: [PATCH] vfio/container: Move dirty_pgsizes and max_dirty_bitmap_size + to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/container.c | 9 +++++---- + include/hw/vfio/vfio-common.h | 2 -- + include/hw/vfio/vfio-container-base.h | 2 ++ + 3 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 50da1300dd..191597167a 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -66,6 +66,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; +@@ -93,7 +94,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + bitmap->size = vbmap.size; + bitmap->data = (__u64 *)vbmap.bitmap; + +- if (vbmap.size > container->max_dirty_bitmap_size) { ++ if (vbmap.size > bcontainer->max_dirty_bitmap_size) { + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); + ret = -E2BIG; + goto unmap_exit; +@@ -157,7 +158,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + + if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { + if (!vfio_devices_all_device_dirty_tracking(bcontainer) && +- container->bcontainer.dirty_pages_supported) { ++ bcontainer->dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + +@@ -536,8 +537,8 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { + bcontainer->dirty_pages_supported = true; +- container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; +- container->dirty_pgsizes = cap_mig->pgsize_bitmap; ++ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; ++ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; + } + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index c23e7fb8ee..a8da41d27e 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -89,8 +89,6 @@ typedef struct VFIOContainer { + MemoryListener prereg_listener; + unsigned iommu_type; + bool dirty_log_manual_clear; +- uint64_t dirty_pgsizes; +- uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIODMARange) dma_list; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 95f8d319e0..80e4a993c5 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -39,6 +39,8 @@ typedef struct VFIOContainerBase { + MemoryListener listener; + Error *error; + bool initialized; ++ uint64_t dirty_pgsizes; ++ uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + unsigned int dma_max_mappings; + bool dirty_pages_supported; +-- +2.41.0.windows.1 + diff --git a/vfio-container-Move-iova_ranges-to-base-container.patch b/vfio-container-Move-iova_ranges-to-base-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..3580802248dbb230193ad48ce60ea584b9383ac1 --- /dev/null +++ b/vfio-container-Move-iova_ranges-to-base-container.patch @@ -0,0 +1,160 @@ +From 4aac9c99e4f90d400d511bb46809714eab1fbf5f Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:31 +0800 +Subject: [PATCH] vfio/container: Move iova_ranges to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Meanwhile remove the helper function vfio_free_container as it +only calls g_free now. + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 5 +++-- + hw/vfio/container-base.c | 3 +++ + hw/vfio/container.c | 19 ++++++------------- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 1 + + 5 files changed, 13 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4647f4447d..9926454527 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -637,9 +637,10 @@ static void vfio_listener_region_add(MemoryListener *listener, + goto fail; + } + +- if (container->iova_ranges) { ++ if (bcontainer->iova_ranges) { + ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, +- container->iova_ranges, &err); ++ bcontainer->iova_ranges, ++ &err); + if (ret) { + g_free(giommu); + goto fail; +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 7f508669f5..0177f43741 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; ++ bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); + } +@@ -70,4 +71,6 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } ++ ++ g_list_free_full(bcontainer->iova_ranges, g_free); + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 191597167a..13d42aad0d 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -360,7 +360,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, + } + + static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, +- VFIOContainer *container) ++ VFIOContainerBase *bcontainer) + { + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; +@@ -378,8 +378,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); +- container->iova_ranges = +- range_list_insert(container->iova_ranges, range); ++ bcontainer->iova_ranges = ++ range_list_insert(bcontainer->iova_ranges, range); + } + + return true; +@@ -542,12 +542,6 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + } + } + +-static void vfio_free_container(VFIOContainer *container) +-{ +- g_list_free_full(container->iova_ranges, g_free); +- g_free(container); +-} +- + static SharedRegionListener *g_shl; + + static void shared_memory_listener_register(MemoryListener *listener, +@@ -653,7 +647,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + container = g_malloc0(sizeof(*container)); + container->fd = fd; +- container->iova_ranges = NULL; + QLIST_INIT(&container->dma_list); + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); +@@ -692,7 +685,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + bcontainer->dma_max_mappings = 65535; + } + +- vfio_get_info_iova_range(info, container); ++ vfio_get_info_iova_range(info, bcontainer); + + vfio_get_iommu_info_migration(container, info); + g_free(info); +@@ -753,7 +746,7 @@ enable_discards_exit: + vfio_ram_block_discard_disable(container, false); + + free_container_exit: +- vfio_free_container(container); ++ g_free(container); + + close_fd_exit: + close(fd); +@@ -801,7 +794,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + + trace_vfio_disconnect_container(container->fd); + close(container->fd); +- vfio_free_container(container); ++ g_free(container); + + vfio_put_address_space(space); + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index a8da41d27e..9a2e0ace72 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -92,7 +92,6 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIODMARange) dma_list; +- GList *iova_ranges; + } VFIOContainer; + + typedef struct VFIOHostDMAWindow { +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 80e4a993c5..9658ffb526 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -48,6 +48,7 @@ typedef struct VFIOContainerBase { + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_ENTRY(VFIOContainerBase) next; + QLIST_HEAD(, VFIODevice) device_list; ++ GList *iova_ranges; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +-- +2.41.0.windows.1 + diff --git a/vfio-container-Move-listener-to-base-container.patch b/vfio-container-Move-listener-to-base-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..bdba89ac111f4b8037975c8e5069619e9a326074 --- /dev/null +++ b/vfio-container-Move-listener-to-base-container.patch @@ -0,0 +1,546 @@ +From 4515b719fb7a335ce76dd9168a9e4db24fca28df Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:29 +0800 +Subject: [PATCH] vfio/container: Move listener to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Move listener to base container. Also error and initialized fields +are moved at the same time. + +No functional change intended. + +Modify vfio_physical_log_clear/vfio_connect_container during +backporting. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 119 +++++++++++++------------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 23 +++-- + hw/vfio/spapr.c | 11 +-- + include/hw/vfio/vfio-common.h | 3 - + include/hw/vfio/vfio-container-base.h | 3 + + 6 files changed, 82 insertions(+), 78 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e9a19209ab..4647f4447d 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -541,7 +541,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section, + return true; + } + +-static bool vfio_get_section_iova_range(VFIOContainer *container, ++static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + hwaddr *out_iova, hwaddr *out_end, + Int128 *out_llend) +@@ -569,8 +569,10 @@ static bool vfio_get_section_iova_range(VFIOContainer *container, + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -581,7 +583,8 @@ static void vfio_listener_region_add(MemoryListener *listener, + return; + } + +- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { ++ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, ++ &llend)) { + if (memory_region_is_ram_device(section->mr)) { + trace_vfio_listener_region_add_no_dma_map( + memory_region_name(section->mr), +@@ -688,13 +691,12 @@ static void vfio_listener_region_add(MemoryListener *listener, + } + } + +- ret = vfio_container_dma_map(&container->bcontainer, +- iova, int128_get64(llsize), vaddr, +- section->readonly); ++ ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), ++ vaddr, section->readonly); + if (ret) { + error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", +- container, iova, int128_get64(llsize), vaddr, ret, ++ bcontainer, iova, int128_get64(llsize), vaddr, ret, + strerror(-ret)); + if (memory_region_is_ram_device(section->mr)) { + /* Allow unexpected mappings not to be fatal for RAM devices */ +@@ -716,9 +718,9 @@ fail: + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ +- if (!container->initialized) { +- if (!container->error) { +- error_propagate_prepend(&container->error, err, ++ if (!bcontainer->initialized) { ++ if (!bcontainer->error) { ++ error_propagate_prepend(&bcontainer->error, err, + "Region %s: ", + memory_region_name(section->mr)); + } else { +@@ -733,8 +735,10 @@ fail: + static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -767,7 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, + */ + } + +- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { ++ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, ++ &llend)) { + return; + } + +@@ -790,22 +795,22 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (int128_eq(llsize, int128_2_64())) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + llsize = int128_rshift(llsize, 1); +- ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ ret = vfio_container_dma_unmap(bcontainer, iova, + int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, int128_get64(llsize), ret, ++ bcontainer, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + iova += int128_get64(llsize); + } +- ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ ret = vfio_container_dma_unmap(bcontainer, iova, + int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, int128_get64(llsize), ret, ++ bcontainer, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + } +@@ -825,16 +830,15 @@ typedef struct VFIODirtyRanges { + } VFIODirtyRanges; + + typedef struct VFIODirtyRangesListener { +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + VFIODirtyRanges ranges; + MemoryListener listener; + } VFIODirtyRangesListener; + + static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, +- VFIOContainer *container) ++ VFIOContainerBase *bcontainer) + { + VFIOPCIDevice *pcidev; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + Object *owner; + +@@ -863,7 +867,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, + hwaddr iova, end, *min, *max; + + if (!vfio_listener_valid_section(section, "tracking_update") || +- !vfio_get_section_iova_range(dirty->container, section, ++ !vfio_get_section_iova_range(dirty->bcontainer, section, + &iova, &end, NULL)) { + return; + } +@@ -887,7 +891,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, + * The alternative would be an IOVATree but that has a much bigger runtime + * overhead and unnecessary complexity. + */ +- if (vfio_section_is_vfio_pci(section, dirty->container) && ++ if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && + iova >= UINT32_MAX) { + min = &range->minpci64; + max = &range->maxpci64; +@@ -911,7 +915,7 @@ static const MemoryListener vfio_dirty_tracking_listener = { + .region_add = vfio_dirty_tracking_update, + }; + +-static void vfio_dirty_tracking_init(VFIOContainer *container, ++static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer, + VFIODirtyRanges *ranges) + { + VFIODirtyRangesListener dirty; +@@ -921,10 +925,10 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + dirty.ranges.min64 = UINT64_MAX; + dirty.ranges.minpci64 = UINT64_MAX; + dirty.listener = vfio_dirty_tracking_listener; +- dirty.container = container; ++ dirty.bcontainer = bcontainer; + + memory_listener_register(&dirty.listener, +- container->bcontainer.space->as); ++ bcontainer->space->as); + + *ranges = dirty.ranges; + +@@ -936,12 +940,11 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + memory_listener_unregister(&dirty.listener); + } + +-static void vfio_devices_dma_logging_stop(VFIOContainer *container) ++static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) + { + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + feature->argsz = sizeof(buf); +@@ -962,7 +965,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) + } + + static struct vfio_device_feature * +-vfio_device_feature_dma_logging_start_create(VFIOContainer *container, ++vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer, + VFIODirtyRanges *tracking) + { + struct vfio_device_feature *feature; +@@ -1035,16 +1038,15 @@ static void vfio_device_feature_dma_logging_start_destroy( + g_free(feature); + } + +-static int vfio_devices_dma_logging_start(VFIOContainer *container) ++static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer) + { + struct vfio_device_feature *feature; + VFIODirtyRanges ranges; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret = 0; + +- vfio_dirty_tracking_init(container, &ranges); +- feature = vfio_device_feature_dma_logging_start_create(container, ++ vfio_dirty_tracking_init(bcontainer, &ranges); ++ feature = vfio_device_feature_dma_logging_start_create(bcontainer, + &ranges); + if (!feature) { + return -errno; +@@ -1067,7 +1069,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + + out: + if (ret) { +- vfio_devices_dma_logging_stop(container); ++ vfio_devices_dma_logging_stop(bcontainer); + } + + vfio_device_feature_dma_logging_start_destroy(feature); +@@ -1077,14 +1079,14 @@ out: + + static void vfio_listener_log_global_start(MemoryListener *listener) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret; + +- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { +- ret = vfio_devices_dma_logging_start(container); ++ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ++ ret = vfio_devices_dma_logging_start(bcontainer); + } else { +- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +- true); ++ ret = vfio_container_set_dirty_page_tracking(bcontainer, true); + } + + if (ret) { +@@ -1096,14 +1098,14 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + + static void vfio_listener_log_global_stop(MemoryListener *listener) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret = 0; + +- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { +- vfio_devices_dma_logging_stop(container); ++ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ++ vfio_devices_dma_logging_stop(bcontainer); + } else { +- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +- false); ++ ret = vfio_container_set_dirty_page_tracking(bcontainer, false); + } + + if (ret) { +@@ -1221,8 +1223,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + vfio_giommu_dirty_notifier, n); + VFIOGuestIOMMU *giommu = gdn->giommu; + VFIOContainerBase *bcontainer = giommu->bcontainer; +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova = iotlb->iova + giommu->iommu_offset; + ram_addr_t translated_addr; + int ret = -EINVAL; +@@ -1237,12 +1237,12 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + + rcu_read_lock(); + if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { +- ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, +- iotlb->addr_mask + 1, translated_addr); ++ ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, ++ translated_addr); + if (ret) { + error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, iotlb->addr_mask + 1, ret, ++ bcontainer, iova, iotlb->addr_mask + 1, ret, + strerror(-ret)); + } + } +@@ -1298,10 +1298,9 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, + &vrdl); + } + +-static int vfio_sync_dirty_bitmap(VFIOContainer *container, ++static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + ram_addr_t ram_addr; + + if (memory_region_is_iommu(section->mr)) { +@@ -1337,7 +1336,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +- return vfio_get_dirty_bitmap(&container->bcontainer, ++ return vfio_get_dirty_bitmap(bcontainer, + REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), + int128_get64(section->size), ram_addr); + } +@@ -1345,15 +1344,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + static void vfio_listener_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret; + + if (vfio_listener_skipped_section(section)) { + return; + } + +- if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { +- ret = vfio_sync_dirty_bitmap(container, section); ++ if (vfio_devices_all_dirty_tracking(bcontainer)) { ++ ret = vfio_sync_dirty_bitmap(bcontainer, section); + if (ret) { + error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, + strerror(-ret)); +@@ -1485,14 +1485,17 @@ static int vfio_physical_log_clear(VFIOContainer *container, + static void vfio_listener_log_clear(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + + if (vfio_listener_skipped_section(section) || +- !container->bcontainer.dirty_pages_supported) { ++ !bcontainer->dirty_pages_supported) { + return; + } + +- if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { ++ if (vfio_devices_all_dirty_tracking(bcontainer)) { + vfio_physical_log_clear(container, section); + } + } +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 584eee4ba1..7f508669f5 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -51,6 +51,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + { + bcontainer->ops = ops; + bcontainer->space = space; ++ bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 023f220c93..50da1300dd 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -520,6 +520,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + { + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_migration *cap_mig; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); + if (!hdr) { +@@ -534,7 +535,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { +- container->bcontainer.dirty_pages_supported = true; ++ bcontainer->dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +@@ -651,7 +652,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + container = g_malloc0(sizeof(*container)); + container->fd = fd; +- container->error = NULL; + container->iova_ranges = NULL; + QLIST_INIT(&container->dma_list); + bcontainer = &container->bcontainer; +@@ -716,23 +716,22 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + +- container->listener = vfio_memory_listener; +- + if (kvm_csv3_enabled()) { +- shared_memory_listener_register(&container->listener, ++ shared_memory_listener_register(&bcontainer->listener, + bcontainer->space->as); +- } else { +- memory_listener_register(&container->listener, bcontainer->space->as); + } + +- if (container->error) { ++ bcontainer->listener = vfio_memory_listener; ++ memory_listener_register(&bcontainer->listener, bcontainer->space->as); ++ ++ if (bcontainer->error) { + ret = -1; +- error_propagate_prepend(errp, container->error, ++ error_propagate_prepend(errp, bcontainer->error, + "memory listener initialization failed: "); + goto listener_release_exit; + } + +- container->initialized = true; ++ bcontainer->initialized = true; + + return 0; + listener_release_exit: +@@ -742,7 +741,7 @@ listener_release_exit: + if (kvm_csv3_enabled()) { + shared_memory_listener_unregister(); + } else { +- memory_listener_unregister(&container->listener); ++ memory_listener_unregister(&bcontainer->listener); + } + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +@@ -781,7 +780,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + if (kvm_csv3_enabled()) { + shared_memory_listener_unregister(); + } else { +- memory_listener_unregister(&container->listener); ++ memory_listener_unregister(&bcontainer->listener); + } + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 4f76bdd3ca..7a50975f25 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -46,6 +46,7 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + { + VFIOContainer *container = container_of(listener, VFIOContainer, + prereg_listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; + int ret; +@@ -88,9 +89,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ +- if (!container->initialized) { +- if (!container->error) { +- error_setg_errno(&container->error, -ret, ++ if (!bcontainer->initialized) { ++ if (!bcontainer->error) { ++ error_setg_errno(&bcontainer->error, -ret, + "Memory registering failed"); + } + } else { +@@ -445,9 +446,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + + memory_listener_register(&container->prereg_listener, + &address_space_memory); +- if (container->error) { ++ if (bcontainer->error) { + ret = -1; +- error_propagate_prepend(errp, container->error, ++ error_propagate_prepend(errp, bcontainer->error, + "RAM memory listener initialization failed: "); + goto listener_unregister_exit; + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 0174b767ca..c23e7fb8ee 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -86,11 +86,8 @@ typedef struct VFIODMARange { + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +- MemoryListener listener; + MemoryListener prereg_listener; + unsigned iommu_type; +- Error *error; +- bool initialized; + bool dirty_log_manual_clear; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 8e05b5ac5a..95f8d319e0 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,9 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ MemoryListener listener; ++ Error *error; ++ bool initialized; + unsigned long pgsizes; + unsigned int dma_max_mappings; + bool dirty_pages_supported; +-- +2.41.0.windows.1 + diff --git a/vfio-container-Move-per-container-device-list-in-bas.patch b/vfio-container-Move-per-container-device-list-in-bas.patch new file mode 100644 index 0000000000000000000000000000000000000000..bcbaff73749fa53839b3ca4e5ccf40791d2ec403 --- /dev/null +++ b/vfio-container-Move-per-container-device-list-in-bas.patch @@ -0,0 +1,222 @@ +From 22244582a5ff77c0d93008e603a343c1e47ca85d Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:25 +0800 +Subject: [PATCH] vfio/container: Move per container device list in base + container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +VFIO Device is also changed to point to base container instead of +legacy container. + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 23 +++++++++++++++-------- + hw/vfio/container.c | 12 ++++++------ + include/hw/vfio/vfio-common.h | 3 +-- + include/hw/vfio/vfio-container-base.h | 1 + + 4 files changed, 23 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 3be6cecc63..b952d1c811 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) + + bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- return vbasedev->container->bcontainer.space->as != &address_space_memory; ++ return vbasedev->bcontainer->space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +@@ -179,6 +179,7 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) + + static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +@@ -187,7 +188,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + return false; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; + + if (!migration) { +@@ -205,9 +206,10 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + + bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (!vbasedev->dirty_pages_supported) { + return false; + } +@@ -222,13 +224,14 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + */ + bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + if (!migration_is_active(migrate_get_current())) { + return false; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; + + if (!migration) { +@@ -833,12 +836,13 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, + VFIOContainer *container) + { + VFIOPCIDevice *pcidev; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + Object *owner; + + owner = memory_region_owner(section->mr); + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } +@@ -939,13 +943,14 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_SET | + VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (!vbasedev->dirty_tracking) { + continue; + } +@@ -1036,6 +1041,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + { + struct vfio_device_feature *feature; + VFIODirtyRanges ranges; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret = 0; + +@@ -1046,7 +1052,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + return -errno; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->dirty_tracking) { + continue; + } +@@ -1139,10 +1145,11 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + ret = vfio_device_dma_logging_report(vbasedev, iova, size, + vbmap->bitmap); + if (ret) { +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index cf373e42ef..74d236ddee 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1001,7 +1001,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; + VFIOGroup *group; +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + int ret; + + if (groupid < 0) { +@@ -1028,9 +1028,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + return ret; + } + +- container = group->container; +- vbasedev->container = container; +- QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next); ++ bcontainer = &group->container->bcontainer; ++ vbasedev->bcontainer = bcontainer; ++ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + return ret; +@@ -1040,13 +1040,13 @@ void vfio_detach_device(VFIODevice *vbasedev) + { + VFIOGroup *group = vbasedev->group; + +- if (!vbasedev->container) { ++ if (!vbasedev->bcontainer) { + return; + } + + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); +- vbasedev->container = NULL; ++ vbasedev->bcontainer = NULL; + trace_vfio_detach_device(vbasedev->name, group->groupid); + vfio_put_base_device(vbasedev); + vfio_put_group(group); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index af0ef9042d..e27854228c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -100,7 +100,6 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_HEAD(, VFIODMARange) dma_list; +- QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; + } VFIOContainer; + +@@ -128,7 +127,7 @@ typedef struct VFIODevice { + QLIST_ENTRY(VFIODevice) container_next; + QLIST_ENTRY(VFIODevice) global_next; + struct VFIOGroup *group; +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + char *sysfsdev; + char *name; + DeviceState *dev; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index f244f003d0..7090962496 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -39,6 +39,7 @@ typedef struct VFIOContainerBase { + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; ++ QLIST_HEAD(, VFIODevice) device_list; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +-- +2.41.0.windows.1 + diff --git a/vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch b/vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch new file mode 100644 index 0000000000000000000000000000000000000000..86b8b1f36355f82f11dc7ac4df5ca77153adbb07 --- /dev/null +++ b/vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch @@ -0,0 +1,234 @@ +From 961614f6c997caf632ce37ead96b301ec47b1847 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:27 +0800 +Subject: [PATCH] vfio/container: Move pgsizes and dma_max_mappings to base + container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 17 +++++++++-------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 11 +++++------ + hw/vfio/spapr.c | 10 ++++++---- + include/hw/vfio/vfio-common.h | 2 -- + include/hw/vfio/vfio-container-base.h | 2 ++ + 6 files changed, 23 insertions(+), 20 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b663d0bcc0..fd6249c290 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -401,6 +401,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + static void vfio_register_ram_discard_listener(VFIOContainer *container, + MemoryRegionSection *section) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl; + +@@ -419,8 +420,8 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + section->mr); + + g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); +- g_assert(container->pgsizes && +- vrdl->granularity >= 1ULL << ctz64(container->pgsizes)); ++ g_assert(bcontainer->pgsizes && ++ vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes)); + + ram_discard_listener_init(&vrdl->listener, + vfio_ram_discard_notify_populate, +@@ -441,7 +442,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + * number of sections in the address space we could have over time, + * also consuming DMA mappings. + */ +- if (container->dma_max_mappings) { ++ if (bcontainer->dma_max_mappings) { + unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512; + + #ifdef CONFIG_KVM +@@ -462,11 +463,11 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + + if (vrdl_mappings + max_memslots - vrdl_count > +- container->dma_max_mappings) { ++ bcontainer->dma_max_mappings) { + warn_report("%s: possibly running out of DMA mappings. E.g., try" + " increasing the 'block-size' of virtio-mem devies." + " Maximum possible DMA mappings: %d, Maximum possible" +- " memslots: %d", __func__, container->dma_max_mappings, ++ " memslots: %d", __func__, bcontainer->dma_max_mappings, + max_memslots); + } + } +@@ -626,7 +627,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + iommu_idx); + + ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr, +- container->pgsizes, ++ bcontainer->pgsizes, + &err); + if (ret) { + g_free(giommu); +@@ -675,7 +676,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + llsize = int128_sub(llend, int128_make64(iova)); + + if (memory_region_is_ram_device(section->mr)) { +- hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; ++ hwaddr pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + + if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { + trace_vfio_listener_region_add_no_dma_map( +@@ -777,7 +778,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (memory_region_is_ram_device(section->mr)) { + hwaddr pgmask; + +- pgmask = (1ULL << ctz64(container->pgsizes)) - 1; ++ pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); + } else if (memory_region_has_ram_discard_manager(section->mr)) { + vfio_unregister_ram_discard_listener(container, section); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 5d654ae172..dcce111349 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -52,6 +52,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->ops = ops; + bcontainer->space = space; + bcontainer->dirty_pages_supported = false; ++ bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 9a542368ab..116a9e1e73 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -196,7 +196,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { + trace_vfio_legacy_dma_unmap_overflow_workaround(); +- unmap.size -= 1ULL << ctz64(container->pgsizes); ++ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); + continue; + } + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); +@@ -652,7 +652,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + container->error = NULL; +- container->dma_max_mappings = 0; + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); + QLIST_INIT(&container->dma_list); +@@ -684,13 +683,13 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + } + + if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { +- container->pgsizes = info->iova_pgsizes; ++ bcontainer->pgsizes = info->iova_pgsizes; + } else { +- container->pgsizes = qemu_real_host_page_size(); ++ bcontainer->pgsizes = qemu_real_host_page_size(); + } + +- if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { +- container->dma_max_mappings = 65535; ++ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { ++ bcontainer->dma_max_mappings = 65535; + } + + vfio_get_info_iova_range(info, container); +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 83da2f7ec2..4f76bdd3ca 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -226,6 +226,7 @@ static int vfio_spapr_create_window(VFIOContainer *container, + hwaddr *pgsize) + { + int ret = 0; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask; + unsigned entries, bits_total, bits_per_level, max_levels; +@@ -239,13 +240,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, + if (pagesize > rampagesize) { + pagesize = rampagesize; + } +- pgmask = container->pgsizes & (pagesize | (pagesize - 1)); ++ pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1)); + pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0; + if (!pagesize) { + error_report("Host doesn't support page size 0x%"PRIx64 + ", the supported mask is 0x%lx", + memory_region_iommu_get_min_page_size(iommu_mr), +- container->pgsizes); ++ bcontainer->pgsizes); + return -EINVAL; + } + +@@ -421,6 +422,7 @@ void vfio_container_del_section_window(VFIOContainer *container, + + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; +@@ -461,7 +463,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } + + if (v2) { +- container->pgsizes = info.ddw.pgsizes; ++ bcontainer->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this +@@ -476,7 +478,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } + } else { + /* The default table uses 4K pages */ +- container->pgsizes = 0x1000; ++ bcontainer->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 0295ede7ba..3046287070 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -94,8 +94,6 @@ typedef struct VFIOContainer { + bool dirty_log_manual_clear; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; +- unsigned long pgsizes; +- unsigned int dma_max_mappings; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 7090962496..85ec7e1a56 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,8 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ unsigned long pgsizes; ++ unsigned int dma_max_mappings; + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; +-- +2.41.0.windows.1 + diff --git a/vfio-container-Move-space-field-to-base-container.patch b/vfio-container-Move-space-field-to-base-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..94bd76425db5c667d60330cc6170de1ff899f5c7 --- /dev/null +++ b/vfio-container-Move-space-field-to-base-container.patch @@ -0,0 +1,268 @@ +From 97979ab4d92d0006ffefb586675b6110e5b7a746 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:23 +0800 +Subject: [PATCH] vfio/container: Move space field to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Move the space field to the base object. Also the VFIOAddressSpace +now contains a list of base containers. + +No functional change intended. + +Modify hw/vfio/container.c: +vfio_connect_container->shared_memory_listener_register in kvm_csv3_enabled +during backporting. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/ppc/spapr_pci_vfio.c | 10 +++++----- + hw/vfio/common.c | 4 ++-- + hw/vfio/container-base.c | 6 +++++- + hw/vfio/container.c | 20 +++++++++----------- + include/hw/vfio/vfio-common.h | 8 -------- + include/hw/vfio/vfio-container-base.h | 9 +++++++++ + 6 files changed, 30 insertions(+), 27 deletions(-) + +diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c +index f283f7e38d..d1d07bec46 100644 +--- a/hw/ppc/spapr_pci_vfio.c ++++ b/hw/ppc/spapr_pci_vfio.c +@@ -84,27 +84,27 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) + static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) + { + VFIOAddressSpace *space = vfio_get_address_space(as); +- VFIOContainer *container = NULL; ++ VFIOContainerBase *bcontainer = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + +- container = QLIST_FIRST(&space->containers); ++ bcontainer = QLIST_FIRST(&space->containers); + +- if (QLIST_NEXT(container, next)) { ++ if (QLIST_NEXT(bcontainer, next)) { + /* + * We don't yet have logic to synchronize EEH state across + * multiple containers + */ +- container = NULL; ++ bcontainer = NULL; + goto out; + } + + out: + vfio_put_address_space(space); +- return container; ++ return container_of(bcontainer, VFIOContainer, bcontainer); + } + + static bool vfio_eeh_as_ok(AddressSpace *as) +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b8007b22c3..2f3f66991a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) + + bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- return vbasedev->container->space->as != &address_space_memory; ++ return vbasedev->container->bcontainer.space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +@@ -922,7 +922,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + dirty.container = container; + + memory_listener_register(&dirty.listener, +- container->space->as); ++ container->bcontainer.space->as); + + *ranges = dirty.ranges; + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 20bcb9669a..3933391e0d 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,9 +31,11 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + +-void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) ++void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, ++ const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; ++ bcontainer->space = space; + QLIST_INIT(&bcontainer->giommu_list); + } + +@@ -41,6 +43,8 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + { + VFIOGuestIOMMU *giommu, *tmp; + ++ QLIST_REMOVE(bcontainer, next); ++ + QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier( + MEMORY_REGION(giommu->iommu_mr), &giommu->n); +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 03791601d0..b7ab0d7323 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -607,7 +607,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + * details once we know which type of IOMMU we are using. + */ + +- QLIST_FOREACH(container, &space->containers, next) { ++ QLIST_FOREACH(bcontainer, &space->containers, next) { ++ container = container_of(bcontainer, VFIOContainer, bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { +@@ -643,7 +644,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + } + + container = g_malloc0(sizeof(*container)); +- container->space = space; + container->fd = fd; + container->error = NULL; + container->dirty_pages_supported = false; +@@ -652,7 +652,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + QLIST_INIT(&container->vrdl_list); + QLIST_INIT(&container->dma_list); + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, &vfio_legacy_ops); ++ vfio_container_init(bcontainer, space, &vfio_legacy_ops); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -708,7 +708,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + vfio_kvm_device_add_group(group); + + QLIST_INIT(&container->group_list); +- QLIST_INSERT_HEAD(&space->containers, container, next); ++ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); +@@ -717,9 +717,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + if (kvm_csv3_enabled()) { + shared_memory_listener_register(&container->listener, +- container->space->as); ++ bcontainer->space->as); + } else { +- memory_listener_register(&container->listener, container->space->as); ++ memory_listener_register(&container->listener, bcontainer->space->as); + } + + if (container->error) { +@@ -734,7 +734,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + return 0; + listener_release_exit: + QLIST_REMOVE(group, container_next); +- QLIST_REMOVE(container, next); ++ QLIST_REMOVE(bcontainer, next); + vfio_kvm_device_del_group(group); + if (kvm_csv3_enabled()) { + shared_memory_listener_unregister(); +@@ -792,9 +792,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + } + + if (QLIST_EMPTY(&container->group_list)) { +- VFIOAddressSpace *space = container->space; +- +- QLIST_REMOVE(container, next); ++ VFIOAddressSpace *space = bcontainer->space; + + vfio_container_destroy(bcontainer); + +@@ -815,7 +813,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == groupid) { + /* Found it. Now is it already in the right context? */ +- if (group->container->space->as == as) { ++ if (group->container->bcontainer.space->as == as) { + return group; + } else { + error_setg(errp, "group %d used in multiple address spaces", +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6f02952ff6..31c9df4b03 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -73,12 +73,6 @@ typedef struct VFIOMigration { + bool initial_data_sent; + } VFIOMigration; + +-typedef struct VFIOAddressSpace { +- AddressSpace *as; +- QLIST_HEAD(, VFIOContainer) containers; +- QLIST_ENTRY(VFIOAddressSpace) list; +-} VFIOAddressSpace; +- + struct VFIOGroup; + + typedef struct VFIODMARange { +@@ -91,7 +85,6 @@ typedef struct VFIODMARange { + + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; +- VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener listener; + MemoryListener prereg_listener; +@@ -108,7 +101,6 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_HEAD(, VFIODMARange) dma_list; +- QLIST_ENTRY(VFIOContainer) next; + QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; + } VFIOContainer; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index a11aec5755..c7cc6ec9c5 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -24,12 +24,20 @@ typedef struct { + hwaddr pages; + } VFIOBitmap; + ++typedef struct VFIOAddressSpace { ++ AddressSpace *as; ++ QLIST_HEAD(, VFIOContainerBase) containers; ++ QLIST_ENTRY(VFIOAddressSpace) list; ++} VFIOAddressSpace; ++ + /* + * This is the base object for vfio container backends + */ + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; ++ VFIOAddressSpace *space; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; ++ QLIST_ENTRY(VFIOContainerBase) next; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +@@ -48,6 +56,7 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + IOMMUTLBEntry *iotlb); + + void vfio_container_init(VFIOContainerBase *bcontainer, ++ VFIOAddressSpace *space, + const VFIOIOMMUOps *ops); + void vfio_container_destroy(VFIOContainerBase *bcontainer); + +-- +2.41.0.windows.1 + diff --git a/vfio-container-Move-vrdl_list-to-base-container.patch b/vfio-container-Move-vrdl_list-to-base-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..8b265742929605476edb2038f9bc7bac0b49de22 --- /dev/null +++ b/vfio-container-Move-vrdl_list-to-base-container.patch @@ -0,0 +1,248 @@ +From d0234f18616cfe9a43287ba75e4788a10166a526 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:28 +0800 +Subject: [PATCH] vfio/container: Move vrdl_list to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 38 +++++++++++++-------------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 1 - + include/hw/vfio/vfio-common.h | 11 -------- + include/hw/vfio/vfio-container-base.h | 11 ++++++++ + 5 files changed, 31 insertions(+), 31 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index fd6249c290..e9a19209ab 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -351,13 +351,13 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + { + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); ++ VFIOContainerBase *bcontainer = vrdl->bcontainer; + const hwaddr size = int128_get64(section->size); + const hwaddr iova = section->offset_within_address_space; + int ret; + + /* Unmap with a single call. */ +- ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, +- iova, size , NULL); ++ ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); + if (ret) { + error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, + strerror(-ret)); +@@ -369,6 +369,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + { + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); ++ VFIOContainerBase *bcontainer = vrdl->bcontainer; + const hwaddr end = section->offset_within_region + + int128_get64(section->size); + hwaddr start, next, iova; +@@ -387,8 +388,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + section->offset_within_address_space; + vaddr = memory_region_get_ram_ptr(section->mr) + start; + +- ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, +- next - start, vaddr, section->readonly); ++ ret = vfio_container_dma_map(bcontainer, iova, next - start, ++ vaddr, section->readonly); + if (ret) { + /* Rollback */ + vfio_ram_discard_notify_discard(rdl, section); +@@ -398,10 +399,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + return 0; + } + +-static void vfio_register_ram_discard_listener(VFIOContainer *container, ++static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl; + +@@ -412,7 +412,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); + + vrdl = g_new0(VFIORamDiscardListener, 1); +- vrdl->container = container; ++ vrdl->bcontainer = bcontainer; + vrdl->mr = section->mr; + vrdl->offset_within_address_space = section->offset_within_address_space; + vrdl->size = int128_get64(section->size); +@@ -427,7 +427,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + vfio_ram_discard_notify_populate, + vfio_ram_discard_notify_discard, true); + ram_discard_manager_register_listener(rdm, &vrdl->listener, section); +- QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next); ++ QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next); + + /* + * Sanity-check if we have a theoretically problematic setup where we could +@@ -451,7 +451,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + #endif + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + hwaddr start, end; + + start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space, +@@ -473,13 +473,13 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + } + +-static void vfio_unregister_ram_discard_listener(VFIOContainer *container, ++static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { +@@ -663,7 +663,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + * about changes. + */ + if (memory_region_has_ram_discard_manager(section->mr)) { +- vfio_register_ram_discard_listener(container, section); ++ vfio_register_ram_discard_listener(bcontainer, section); + return; + } + +@@ -781,7 +781,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); + } else if (memory_region_has_ram_discard_manager(section->mr)) { +- vfio_unregister_ram_discard_listener(container, section); ++ vfio_unregister_ram_discard_listener(bcontainer, section); + /* Unregistering will trigger an unmap. */ + try_unmap = false; + } +@@ -1267,17 +1267,17 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, + * Sync the whole mapped region (spanning multiple individual mappings) + * in one go. + */ +- return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, +- ram_addr); ++ return vfio_get_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr); + } + +-static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, +- MemoryRegionSection *section) ++static int ++vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { +@@ -1331,7 +1331,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + } + return 0; + } else if (memory_region_has_ram_discard_manager(section->mr)) { +- return vfio_sync_ram_discard_listener_dirty_bitmap(container, section); ++ return vfio_sync_ram_discard_listener_dirty_bitmap(bcontainer, section); + } + + ram_addr = memory_region_get_ram_addr(section->mr) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index dcce111349..584eee4ba1 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); ++ QLIST_INIT(&bcontainer->vrdl_list); + } + + void vfio_container_destroy(VFIOContainerBase *bcontainer) +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 116a9e1e73..023f220c93 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -653,7 +653,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->fd = fd; + container->error = NULL; + container->iova_ranges = NULL; +- QLIST_INIT(&container->vrdl_list); + QLIST_INIT(&container->dma_list); + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 3046287070..0174b767ca 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -96,21 +96,10 @@ typedef struct VFIOContainer { + uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; +- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_HEAD(, VFIODMARange) dma_list; + GList *iova_ranges; + } VFIOContainer; + +-typedef struct VFIORamDiscardListener { +- VFIOContainer *container; +- MemoryRegion *mr; +- hwaddr offset_within_address_space; +- hwaddr size; +- uint64_t granularity; +- RamDiscardListener listener; +- QLIST_ENTRY(VFIORamDiscardListener) next; +-} VFIORamDiscardListener; +- + typedef struct VFIOHostDMAWindow { + hwaddr min_iova; + hwaddr max_iova; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 85ec7e1a56..8e05b5ac5a 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -40,6 +40,7 @@ typedef struct VFIOContainerBase { + unsigned int dma_max_mappings; + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; ++ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_ENTRY(VFIOContainerBase) next; + QLIST_HEAD(, VFIODevice) device_list; + } VFIOContainerBase; +@@ -52,6 +53,16 @@ typedef struct VFIOGuestIOMMU { + QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; + } VFIOGuestIOMMU; + ++typedef struct VFIORamDiscardListener { ++ VFIOContainerBase *bcontainer; ++ MemoryRegion *mr; ++ hwaddr offset_within_address_space; ++ hwaddr size; ++ uint64_t granularity; ++ RamDiscardListener listener; ++ QLIST_ENTRY(VFIORamDiscardListener) next; ++} VFIORamDiscardListener; ++ + int vfio_container_dma_map(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.41.0.windows.1 + diff --git a/vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch b/vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch new file mode 100644 index 0000000000000000000000000000000000000000..db1c212bfcdd66e6c589a1402b13d3a5605adaba --- /dev/null +++ b/vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch @@ -0,0 +1,240 @@ +From c8c17aaddeee1e5002fc4bde7245719db75d4021 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:24 +0800 +Subject: [PATCH] vfio/container: Switch to IOMMU BE + set_dirty_page_tracking/query_dirty_bitmap API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +dirty_pages_supported field is also moved to the base container + +No functional change intended. + +Modify vfio_listener_log_clear during backporting. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 14 +++++++++----- + hw/vfio/container-base.c | 16 ++++++++++++++++ + hw/vfio/container.c | 21 ++++++++++++++------- + include/hw/vfio/vfio-common.h | 5 ----- + include/hw/vfio/vfio-container-base.h | 6 ++++++ + 5 files changed, 45 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 2f3f66991a..3be6cecc63 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1079,7 +1079,8 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + if (vfio_devices_all_device_dirty_tracking(container)) { + ret = vfio_devices_dma_logging_start(container); + } else { +- ret = vfio_set_dirty_page_tracking(container, true); ++ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, ++ true); + } + + if (ret) { +@@ -1097,7 +1098,8 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) + if (vfio_devices_all_device_dirty_tracking(container)) { + vfio_devices_dma_logging_stop(container); + } else { +- ret = vfio_set_dirty_page_tracking(container, false); ++ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, ++ false); + } + + if (ret) { +@@ -1166,7 +1168,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + VFIODMARange *qrange; + int ret; + +- if (!container->dirty_pages_supported && !all_device_dirty_tracking) { ++ if (!container->bcontainer.dirty_pages_supported && ++ !all_device_dirty_tracking) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); +@@ -1187,7 +1190,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + if (all_device_dirty_tracking) { + ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); + } else { +- ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size); ++ ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, ++ iova, size); + } + + if (ret) { +@@ -1480,7 +1484,7 @@ static void vfio_listener_log_clear(MemoryListener *listener, + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + + if (vfio_listener_skipped_section(section) || +- !container->dirty_pages_supported) { ++ !container->bcontainer.dirty_pages_supported) { + return; + } + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 3933391e0d..5d654ae172 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,11 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + ++int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start) ++{ ++ g_assert(bcontainer->ops->set_dirty_page_tracking); ++ return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); ++} ++ ++int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size) ++{ ++ g_assert(bcontainer->ops->query_dirty_bitmap); ++ return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size); ++} ++ + void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; + bcontainer->space = space; ++ bcontainer->dirty_pages_supported = false; + QLIST_INIT(&bcontainer->giommu_list); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index b7ab0d7323..cf373e42ef 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -157,7 +157,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + + if (iotlb && vfio_devices_all_running_and_mig_active(container)) { + if (!vfio_devices_all_device_dirty_tracking(container) && +- container->dirty_pages_supported) { ++ container->bcontainer.dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + +@@ -255,14 +255,17 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, + return -errno; + } + +-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) ++static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), + }; + +- if (!container->dirty_pages_supported) { ++ if (!bcontainer->dirty_pages_supported) { + return 0; + } + +@@ -282,9 +285,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) + return ret; + } + +-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, +- hwaddr iova, hwaddr size) ++static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; +@@ -528,7 +534,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { +- container->dirty_pages_supported = true; ++ container->bcontainer.dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +@@ -646,7 +652,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + container->error = NULL; +- container->dirty_pages_supported = false; + container->dma_max_mappings = 0; + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); +@@ -1050,4 +1055,6 @@ void vfio_detach_device(VFIODevice *vbasedev) + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, ++ .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, ++ .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, + }; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 31c9df4b03..af0ef9042d 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -91,7 +91,6 @@ typedef struct VFIOContainer { + unsigned iommu_type; + Error *error; + bool initialized; +- bool dirty_pages_supported; + bool dirty_log_manual_clear; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; +@@ -200,13 +199,9 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + +-/* container->fd */ + VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, + hwaddr start_addr, hwaddr size); + void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange); +-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); +-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, +- hwaddr iova, hwaddr size); + + /* SPAPR specific */ + int vfio_container_add_section_window(VFIOContainer *container, +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index c7cc6ec9c5..f244f003d0 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,7 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; + } VFIOContainerBase; +@@ -54,6 +55,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, + int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); ++int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start); ++int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size); + + void vfio_container_init(VFIOContainerBase *bcontainer, + VFIOAddressSpace *space, +-- +2.41.0.windows.1 + diff --git a/vfio-container-Switch-to-dma_map-unmap-API.patch b/vfio-container-Switch-to-dma_map-unmap-API.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c51add5d94a31cc2a5e42483f7f55818090ce97 --- /dev/null +++ b/vfio-container-Switch-to-dma_map-unmap-API.patch @@ -0,0 +1,295 @@ +From 775cf7c2a0dc34d7163eeea1aab6bfc6cb28be9b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:20 +0800 +Subject: [PATCH] vfio/container: Switch to dma_map|unmap API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 45 +++++++++++++++------------ + hw/vfio/container-base.c | 32 +++++++++++++++++++ + hw/vfio/container.c | 22 ++++++++----- + hw/vfio/meson.build | 1 + + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 4 --- + include/hw/vfio/vfio-container-base.h | 7 +++++ + 7 files changed, 81 insertions(+), 32 deletions(-) + create mode 100644 hw/vfio/container-base.c + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e08b147b3d..ea63271167 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +- VFIOContainer *container = giommu->container; ++ VFIOContainerBase *bcontainer = &giommu->container->bcontainer; + hwaddr iova = iotlb->iova + giommu->iommu_offset; + void *vaddr; + int ret; +@@ -322,21 +322,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + * of vaddr will always be there, even if the memory object is + * destroyed and its backing memory munmap-ed. + */ +- ret = vfio_dma_map(container, iova, +- iotlb->addr_mask + 1, vaddr, +- read_only); ++ ret = vfio_container_dma_map(bcontainer, iova, ++ iotlb->addr_mask + 1, vaddr, ++ read_only); + if (ret) { +- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", +- container, iova, ++ bcontainer, iova, + iotlb->addr_mask + 1, vaddr, ret, strerror(-ret)); + } + } else { +- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); ++ ret = vfio_container_dma_unmap(bcontainer, iova, ++ iotlb->addr_mask + 1, iotlb); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, ++ bcontainer, iova, + iotlb->addr_mask + 1, ret, strerror(-ret)); + vfio_set_migration_error(ret); + } +@@ -355,9 +356,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + int ret; + + /* Unmap with a single call. */ +- ret = vfio_dma_unmap(vrdl->container, iova, size , NULL); ++ ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, ++ iova, size , NULL); + if (ret) { +- error_report("%s: vfio_dma_unmap() failed: %s", __func__, ++ error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, + strerror(-ret)); + } + } +@@ -385,8 +387,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + section->offset_within_address_space; + vaddr = memory_region_get_ram_ptr(section->mr) + start; + +- ret = vfio_dma_map(vrdl->container, iova, next - start, +- vaddr, section->readonly); ++ ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, ++ next - start, vaddr, section->readonly); + if (ret) { + /* Rollback */ + vfio_ram_discard_notify_discard(rdl, section); +@@ -684,10 +686,11 @@ static void vfio_listener_region_add(MemoryListener *listener, + } + } + +- ret = vfio_dma_map(container, iova, int128_get64(llsize), +- vaddr, section->readonly); ++ ret = vfio_container_dma_map(&container->bcontainer, ++ iova, int128_get64(llsize), vaddr, ++ section->readonly); + if (ret) { +- error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", " ++ error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", + container, iova, int128_get64(llsize), vaddr, ret, + strerror(-ret)); +@@ -784,18 +787,20 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (int128_eq(llsize, int128_2_64())) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + llsize = int128_rshift(llsize, 1); +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ int128_get64(llsize), NULL); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", + container, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + iova += int128_get64(llsize); + } +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ int128_get64(llsize), NULL); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", + container, iova, int128_get64(llsize), ret, + strerror(-ret)); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +new file mode 100644 +index 0000000000..55d3a35fa4 +--- /dev/null ++++ b/hw/vfio/container-base.c +@@ -0,0 +1,32 @@ ++/* ++ * VFIO BASE CONTAINER ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "qemu/error-report.h" ++#include "hw/vfio/vfio-container-base.h" ++ ++int vfio_container_dma_map(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly) ++{ ++ g_assert(bcontainer->ops->dma_map); ++ return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); ++} ++ ++int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ g_assert(bcontainer->ops->dma_unmap); ++ return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 8d8ed13e93..40e378e888 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -140,9 +140,11 @@ void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange) + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb) ++static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, IOMMUTLBEntry *iotlb) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, +@@ -193,7 +195,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + */ + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { +- trace_vfio_dma_unmap_overflow_workaround(); ++ trace_vfio_legacy_dma_unmap_overflow_workaround(); + unmap.size -= 1ULL << ctz64(container->pgsizes); + continue; + } +@@ -212,9 +214,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + return 0; + } + +-int vfio_dma_map(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, void *vaddr, bool readonly) ++static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, +@@ -241,7 +245,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || +- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && ++ (errno == EBUSY && ++ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } +@@ -1050,4 +1055,7 @@ void vfio_detach_device(VFIODevice *vbasedev) + vfio_put_group(group); + } + +-const VFIOIOMMUOps vfio_legacy_ops; ++const VFIOIOMMUOps vfio_legacy_ops = { ++ .dma_map = vfio_legacy_dma_map, ++ .dma_unmap = vfio_legacy_dma_unmap, ++}; +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index b1db4c8605..32a6933280 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -2,6 +2,7 @@ vfio_ss = ss.source_set() + vfio_ss.add(files( + 'helpers.c', + 'common.c', ++ 'container-base.c', + 'container.c', + 'spapr.c', + 'migration.c', +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 0eb2387cf2..9f7fedee98 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -116,7 +116,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re + vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" +-vfio_dma_unmap_overflow_workaround(void) "" ++vfio_legacy_dma_unmap_overflow_workaround(void) "" + vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 + vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 3a0a6ab6ee..f94baf72db 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -221,10 +221,6 @@ bool vfio_devices_all_running_and_saving(VFIOContainer *container); + VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, + hwaddr start_addr, hwaddr size); + void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange); +-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb); +-int vfio_dma_map(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, void *vaddr, bool readonly); + int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); + int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 1d6daaea5d..56b033f59f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -31,6 +31,13 @@ typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + } VFIOContainerBase; + ++int vfio_container_dma_map(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly); ++int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb); ++ + struct VFIOIOMMUOps { + /* basic feature */ + int (*dma_map)(VFIOContainerBase *bcontainer, +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Add-hw_caps-field-to-HostIOMMUDeviceCap.patch b/vfio-iommufd-Add-hw_caps-field-to-HostIOMMUDeviceCap.patch new file mode 100644 index 0000000000000000000000000000000000000000..b94066382488cd009977619210247c540444f4db --- /dev/null +++ b/vfio-iommufd-Add-hw_caps-field-to-HostIOMMUDeviceCap.patch @@ -0,0 +1,57 @@ +From 72660b98e799248338588fe97f191c544c073806 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:20 +0100 +Subject: [PATCH] vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Store the value of @caps returned by iommufd_backend_get_device_info() +in a new field HostIOMMUDeviceCaps::hw_caps. Right now the only value is +whether device IOMMU supports dirty tracking (IOMMU_HW_CAP_DIRTY_TRACKING). + +This is in preparation for HostIOMMUDevice::realize() being called early +during attach_device(). + +Signed-off-by: Joao Martins +Reviewed-by: Cédric Le Goater +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +--- + hw/vfio/iommufd.c | 1 + + include/sysemu/host_iommu_device.h | 4 ++++ + 2 files changed, 5 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 06e6a400be..d9088705de 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -745,6 +745,7 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + + hiod->name = g_strdup(vdev->name); + caps->type = type; ++ caps->hw_caps = hw_caps; + + return true; + } +diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h +index f586908945..e4d8300350 100644 +--- a/include/sysemu/host_iommu_device.h ++++ b/include/sysemu/host_iommu_device.h +@@ -19,9 +19,13 @@ + * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. + * + * @type: host platform IOMMU type. ++ * ++ * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents ++ * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl) + */ + typedef struct HostIOMMUDeviceCaps { + uint32_t type; ++ uint64_t hw_caps; + } HostIOMMUDeviceCaps; + + #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Add-properties-and-handlers-to-TYPE_HOS.patch b/vfio-iommufd-Add-properties-and-handlers-to-TYPE_HOS.patch new file mode 100644 index 0000000000000000000000000000000000000000..c4fc3050db6f72316874639ac5a99b2d7d3e2322 --- /dev/null +++ b/vfio-iommufd-Add-properties-and-handlers-to-TYPE_HOS.patch @@ -0,0 +1,130 @@ +From 0e0956cb785f868dfe48201fcdead71dbdd234b0 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 15 Jan 2024 15:05:19 +0800 +Subject: [PATCH] vfio/iommufd: Add properties and handlers to + TYPE_HOST_IOMMU_DEVICE_IOMMUFD + +New added properties include IOMMUFD handle and devid, ioas. +IOMMUFD handle and devid are used to allocate/free ioas, hwpt. +ioas is used to re-attach IOMMUFD backed device to its +default ioas id, i.e., when vIOMMU is disabled by guest. +These properties are initialized in .realize() handler. + +New added handlers include [at|de]tach_hwpt. They are used to +attaching/detaching hwpt. VFIO and VDPA have different way to +attach and detach, so implementation will be in sub-class +instead of HostIOMMUDeviceIOMMUFD. + +Add two wrappers host_iommu_device_iommufd_[at|de]tach_hwpt to +wrap the two handlers. + +This is a prerequisite patch for following ones. + +Signed-off-by: Zhenzhong Duan +--- + backends/iommufd.c | 22 ++++++++++++++++++ + include/sysemu/iommufd.h | 50 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 72 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index cf24370385..c10aa9b011 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -360,6 +360,26 @@ int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id, + return ret; + } + ++bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, ++ uint32_t hwpt_id, Error **errp) ++{ ++ HostIOMMUDeviceIOMMUFDClass *idevc = ++ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); ++ ++ g_assert(idevc->attach_hwpt); ++ return idevc->attach_hwpt(idev, hwpt_id, errp); ++} ++ ++bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, ++ Error **errp) ++{ ++ HostIOMMUDeviceIOMMUFDClass *idevc = ++ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); ++ ++ g_assert(idevc->detach_hwpt); ++ return idevc->detach_hwpt(idev, errp); ++} ++ + static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) + { + HostIOMMUDeviceCaps *caps = &hiod->caps; +@@ -398,6 +418,8 @@ static const TypeInfo types[] = { + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .parent = TYPE_HOST_IOMMU_DEVICE, ++ .instance_size = sizeof(HostIOMMUDeviceIOMMUFD), ++ .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass), + .class_init = hiod_iommufd_class_init, + .abstract = true, + } +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index f6596f6338..3dc6934144 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -68,4 +68,54 @@ int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id, + uint32_t *entry_num, void *data_ptr); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" ++OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, ++ HOST_IOMMU_DEVICE_IOMMUFD) ++ ++/* Abstract of host IOMMU device with iommufd backend */ ++struct HostIOMMUDeviceIOMMUFD { ++ HostIOMMUDevice parent_obj; ++ ++ IOMMUFDBackend *iommufd; ++ uint32_t devid; ++ uint32_t ioas_id; ++}; ++ ++struct HostIOMMUDeviceIOMMUFDClass { ++ HostIOMMUDeviceClass parent_class; ++ ++ /** ++ * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table. ++ * VFIO and VDPA device can have different implementation. ++ * ++ * Mandatory callback. ++ * ++ * @idev: host IOMMU device backed by IOMMUFD backend. ++ * ++ * @hwpt_id: ID of IOMMUFD hardware page table. ++ * ++ * @errp: pass an Error out when attachment fails. ++ * ++ * Returns: true on success, false on failure. ++ */ ++ bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id, ++ Error **errp); ++ /** ++ * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table. ++ * VFIO and VDPA device can have different implementation. ++ * ++ * Mandatory callback. ++ * ++ * @idev: host IOMMU device backed by IOMMUFD backend. ++ * ++ * @errp: pass an Error out when attachment fails. ++ * ++ * Returns: true on success, false on failure. ++ */ ++ bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp); ++}; ++ ++bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, ++ uint32_t hwpt_id, Error **errp); ++bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, ++ Error **errp); + #endif +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch b/vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch new file mode 100644 index 0000000000000000000000000000000000000000..e37a26eb7ea511cc54e73eb4b3bddce536d3ec8b --- /dev/null +++ b/vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch @@ -0,0 +1,107 @@ +From d6f0612a8760959f25c148ab50a1e7c394d4279a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:42 +0800 +Subject: [PATCH] vfio/iommufd: Add support for iova_ranges and pgsizes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some vIOMMU such as virtio-iommu use IOVA ranges from host side to +setup reserved ranges for passthrough device, so that guest will not +use an IOVA range beyond host support. + +Use an uAPI of IOMMUFD to get IOVA ranges of host side and pass to +vIOMMU just like the legacy backend, if this fails, fallback to +64bit IOVA range. + +Also use out_iova_alignment returned from uAPI as pgsizes instead of +qemu_real_host_page_size() as a fallback. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/iommufd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 55 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 6d31aeac7b..01b448e840 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -261,6 +261,53 @@ static int iommufd_cdev_ram_block_discard_disable(bool state) + return ram_block_uncoordinated_discard_disable(state); + } + ++static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, ++ uint32_t ioas_id, Error **errp) ++{ ++ VFIOContainerBase *bcontainer = &container->bcontainer; ++ struct iommu_ioas_iova_ranges *info; ++ struct iommu_iova_range *iova_ranges; ++ int ret, sz, fd = container->be->fd; ++ ++ info = g_malloc0(sizeof(*info)); ++ info->size = sizeof(*info); ++ info->ioas_id = ioas_id; ++ ++ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); ++ if (ret && errno != EMSGSIZE) { ++ goto error; ++ } ++ ++ sz = info->num_iovas * sizeof(struct iommu_iova_range); ++ info = g_realloc(info, sizeof(*info) + sz); ++ info->allowed_iovas = (uintptr_t)(info + 1); ++ ++ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); ++ if (ret) { ++ goto error; ++ } ++ ++ iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; ++ ++ for (int i = 0; i < info->num_iovas; i++) { ++ Range *range = g_new(Range, 1); ++ ++ range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); ++ bcontainer->iova_ranges = ++ range_list_insert(bcontainer->iova_ranges, range); ++ } ++ bcontainer->pgsizes = info->out_iova_alignment; ++ ++ g_free(info); ++ return 0; ++ ++error: ++ ret = -errno; ++ g_free(info); ++ error_setg_errno(errp, errno, "Cannot get IOVA ranges"); ++ return ret; ++} ++ + static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +@@ -335,7 +382,14 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + goto err_discard_disable; + } + +- bcontainer->pgsizes = qemu_real_host_page_size(); ++ ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err); ++ if (ret) { ++ error_append_hint(&err, ++ "Fallback to default 64bit IOVA range and 4K page size\n"); ++ warn_report_err(err); ++ err = NULL; ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ } + + bcontainer->listener = vfio_memory_listener; + memory_listener_register(&bcontainer->listener, bcontainer->space->as); +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Don-t-initialize-nor-set-a-HOST_IOMMU_D.patch b/vfio-iommufd-Don-t-initialize-nor-set-a-HOST_IOMMU_D.patch new file mode 100644 index 0000000000000000000000000000000000000000..af353c02508b393376a12f659b3a494de2e9f16b --- /dev/null +++ b/vfio-iommufd-Don-t-initialize-nor-set-a-HOST_IOMMU_D.patch @@ -0,0 +1,64 @@ +From b2d58d5b474633514c3195d6948e1cd2a9c78d67 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Fri, 19 Jul 2024 13:04:50 +0100 +Subject: [PATCH] vfio/iommufd: Don't initialize nor set a HOST_IOMMU_DEVICE + with mdev + +mdevs aren't "physical" devices and when asking for backing IOMMU info, it +fails the entire provisioning of the guest. Fix that by skipping +HostIOMMUDevice initialization in the presence of mdevs, and skip setting +an iommu device when it is known to be an mdev. + +Cc: Zhenzhong Duan +Fixes: 930589520128 ("vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler") +Signed-off-by: Joao Martins +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +--- + hw/vfio/common.c | 4 ++++ + hw/vfio/pci.c | 7 +++++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index d5ff65f90a..ceb1da0b94 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1664,6 +1664,10 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + return ret; + } + ++ if (vbasedev->mdev) { ++ return true; ++ } ++ + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); + if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) { + object_unref(hiod); +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index de040e73ca..19211f4368 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3101,7 +3101,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + + vfio_bars_register(vdev); + +- if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { ++ if (!vbasedev->mdev && ++ !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { + error_prepend(errp, "Failed to set iommu_device: "); + goto out_teardown; + } +@@ -3229,7 +3230,9 @@ out_deregister: + timer_free(vdev->intx.mmap_timer); + } + out_unset_idev: +- pci_device_unset_iommu_device(pdev); ++ if (!vbasedev->mdev) { ++ pci_device_unset_iommu_device(pdev); ++ } + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch b/vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch new file mode 100644 index 0000000000000000000000000000000000000000..3829379111bb459f6ac1adda4df927f7072bb061 --- /dev/null +++ b/vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch @@ -0,0 +1,207 @@ +From de17750e24d4e583e9f392bbe47e4bd1aa81d6bc Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:45 +0800 +Subject: [PATCH] vfio/iommufd: Enable pci hot reset through iommufd cdev + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Implement the newly introduced pci_hot_reset callback named +iommufd_cdev_pci_hot_reset to do iommufd specific check and +reset operation. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/iommufd.c | 150 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + 2 files changed, 151 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 01b448e840..6e53e013ef 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "qemu/cutils.h" + #include "qemu/chardev_open.h" ++#include "pci.h" + + static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +@@ -468,9 +469,158 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) + close(vbasedev->fd); + } + ++static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) ++{ ++ VFIODevice *vbasedev_iter; ++ ++ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { ++ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { ++ continue; ++ } ++ if (devid == vbasedev_iter->devid) { ++ return vbasedev_iter; ++ } ++ } ++ return NULL; ++} ++ ++static VFIOPCIDevice * ++iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, ++ VFIODevice *reset_dev) ++{ ++ VFIODevice *vbasedev_tmp; ++ ++ if (dep_dev->devid == reset_dev->devid || ++ dep_dev->devid == VFIO_PCI_DEVID_OWNED) { ++ return NULL; ++ } ++ ++ vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); ++ if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || ++ vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { ++ return NULL; ++ } ++ ++ return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev); ++} ++ ++static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int ret, i; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { ++ goto out_single; ++ } ++ ++ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); ++ ++ devices = &info->devices[0]; ++ ++ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { ++ if (!vdev->has_pm_reset) { ++ for (i = 0; i < info->count; i++) { ++ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { ++ error_report("vfio: Cannot reset device %s, " ++ "depends on device %04x:%02x:%02x.%x " ++ "which is not owned.", ++ vdev->vbasedev.name, devices[i].segment, ++ devices[i].bus, PCI_SLOT(devices[i].devfn), ++ PCI_FUNC(devices[i].devfn)); ++ } ++ } ++ } ++ ret = -EPERM; ++ goto out_single; ++ } ++ ++ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); ++ ++ for (i = 0; i < info->count; i++) { ++ VFIOPCIDevice *tmp; ++ ++ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, ++ devices[i].bus, ++ PCI_SLOT(devices[i].devfn), ++ PCI_FUNC(devices[i].devfn), ++ devices[i].devid); ++ ++ /* ++ * If a VFIO cdev device is resettable, all the dependent devices ++ * are either bound to same iommufd or within same iommu_groups as ++ * one of the iommufd bound devices. ++ */ ++ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); ++ ++ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); ++ if (!tmp) { ++ continue; ++ } ++ ++ if (single) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ vfio_pci_pre_reset(tmp); ++ tmp->vbasedev.needs_reset = false; ++ multi = true; ++ } ++ ++ if (!single && !multi) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ ++ /* Use zero length array for hot reset with iommufd backend */ ++ reset = g_malloc0(sizeof(*reset)); ++ reset->argsz = sizeof(*reset); ++ ++ /* Bus reset! */ ++ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); ++ g_free(reset); ++ if (ret) { ++ ret = -errno; ++ } ++ ++ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, ++ ret ? strerror(errno) : "Success"); ++ ++ /* Re-enable INTx on affected devices */ ++ for (i = 0; i < info->count; i++) { ++ VFIOPCIDevice *tmp; ++ ++ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); ++ if (!tmp) { ++ continue; ++ } ++ vfio_pci_post_reset(tmp); ++ } ++out_single: ++ if (!single) { ++ vfio_pci_post_reset(vdev); ++ } ++ g_free(info); ++ ++ return ret; ++} ++ + const VFIOIOMMUOps vfio_iommufd_ops = { + .dma_map = iommufd_cdev_map, + .dma_unmap = iommufd_cdev_unmap, + .attach_device = iommufd_cdev_attach, + .detach_device = iommufd_cdev_detach, ++ .pci_hot_reset = iommufd_cdev_pci_hot_reset, + }; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 3340c93af0..8fdde54456 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Succ + iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" + iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" + iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" ++iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Implement-HostIOMMUDeviceClass-realize-.patch b/vfio-iommufd-Implement-HostIOMMUDeviceClass-realize-.patch new file mode 100644 index 0000000000000000000000000000000000000000..02adf86047dc51d0c9e0954eb4ce77faec14abe1 --- /dev/null +++ b/vfio-iommufd-Implement-HostIOMMUDeviceClass-realize-.patch @@ -0,0 +1,72 @@ +From c9f1b73eb36a84347c3720ce2a93f72ea47f5daa Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:34 +0800 +Subject: [PATCH] vfio/iommufd: Implement HostIOMMUDeviceClass::realize() + handler + +It calls iommufd_backend_get_device_info() to get host IOMMU +related information and translate it into HostIOMMUDeviceCaps +for query with .get_cap(). + +For aw_bits, use the same way as legacy backend by calling +vfio_device_get_aw_bits() which is common for different vendor +IOMMU. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/vfio/iommufd.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 7a4b818830..2efdba5565 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -636,6 +636,35 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) + vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; + }; + ++static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, ++ Error **errp) ++{ ++ VFIODevice *vdev = opaque; ++ HostIOMMUDeviceCaps *caps = &hiod->caps; ++ enum iommu_hw_info_type type; ++ union { ++ struct iommu_hw_info_vtd vtd; ++ } data; ++ ++ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, ++ &type, &data, sizeof(data), errp)) { ++ return false; ++ } ++ ++ hiod->name = g_strdup(vdev->name); ++ caps->type = type; ++ caps->aw_bits = vfio_device_get_aw_bits(vdev); ++ ++ return true; ++} ++ ++static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) ++{ ++ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); ++ ++ hiodc->realize = hiod_iommufd_vfio_realize; ++}; ++ + static const TypeInfo types[] = { + { + .name = TYPE_VFIO_IOMMU_IOMMUFD, +@@ -644,6 +673,7 @@ static const TypeInfo types[] = { + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, ++ .class_init = hiod_iommufd_vfio_class_init, + } + }; + +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Implement-HostIOMMUDeviceClass-realize_.patch b/vfio-iommufd-Implement-HostIOMMUDeviceClass-realize_.patch new file mode 100644 index 0000000000000000000000000000000000000000..524834d3f990889c6a695038a1b0c7945a12e7d3 --- /dev/null +++ b/vfio-iommufd-Implement-HostIOMMUDeviceClass-realize_.patch @@ -0,0 +1,54 @@ +From b727a28ce2cf062473ca011dd69697e0b7826a25 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 5 Aug 2024 09:29:00 +0800 +Subject: [PATCH] vfio/iommufd: Implement HostIOMMUDeviceClass::realize_late() + handler + +There are three iommufd related elements iommufd handle, devid and +ioas_id. ioas_id is ready only after VFIO device attachment. Device +id and iommufd handle are ready before attachment, but they are all +iommufd related elements, initialize them together with ioas_id. + +Signed-off-by: Zhenzhong Duan +--- + hw/vfio/iommufd.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 3d4f902ae5..47a8823146 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -827,6 +827,22 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) + vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; + }; + ++static bool hiod_iommufd_vfio_realize_late(HostIOMMUDevice *hiod, void *opaque, ++ Error **errp) ++{ ++ VFIODevice *vdev = opaque; ++ VFIOIOMMUFDContainer *container = container_of(vdev->bcontainer, ++ VFIOIOMMUFDContainer, ++ bcontainer); ++ HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); ++ ++ idev->iommufd = vdev->iommufd; ++ idev->devid = vdev->devid; ++ idev->ioas_id = container->ioas_id; ++ ++ return true; ++} ++ + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) + { +@@ -858,6 +874,7 @@ static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + + hiodc->realize = hiod_iommufd_vfio_realize; ++ hiodc->realize_late = hiod_iommufd_vfio_realize_late; + }; + + static const TypeInfo types[] = { +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Implement-VFIOIOMMUClass-query_dirty_bi.patch b/vfio-iommufd-Implement-VFIOIOMMUClass-query_dirty_bi.patch new file mode 100644 index 0000000000000000000000000000000000000000..e6927585c8bf495d7eded15f145a6dfcb3c53ee9 --- /dev/null +++ b/vfio-iommufd-Implement-VFIOIOMMUClass-query_dirty_bi.patch @@ -0,0 +1,154 @@ +From d09cb3d1907e3afbae9b3ea345c9973e207614bf Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:24 +0100 +Subject: [PATCH] vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap + support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +ioctl(iommufd, IOMMU_HWPT_GET_DIRTY_BITMAP, arg) is the UAPI +that fetches the bitmap that tells what was dirty in an IOVA +range. + +A single bitmap is allocated and used across all the hwpts +sharing an IOAS which is then used in log_sync() to set Qemu +global bitmaps. + +Signed-off-by: Joao Martins +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +[Shameer: changed iommufd_query_dirty_bitmap() declaration] +Signed-off-by: Shameer Kolothum +--- + backends/iommufd.c | 29 +++++++++++++++++++++++++++++ + backends/trace-events | 1 + + hw/vfio/iommufd.c | 32 ++++++++++++++++++++++++++++++++ + include/sysemu/iommufd.h | 4 ++++ + 4 files changed, 66 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 785d3fbbad..c1260766f0 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -277,6 +277,35 @@ bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, + return true; + } + ++bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, ++ uint32_t hwpt_id, ++ uint64_t iova, ram_addr_t size, ++ uint64_t page_size, uint64_t *data, ++ Error **errp) ++{ ++ int ret; ++ struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = { ++ .size = sizeof(get_dirty_bitmap), ++ .hwpt_id = hwpt_id, ++ .iova = iova, ++ .length = size, ++ .page_size = page_size, ++ .data = (uintptr_t)data, ++ }; ++ ++ ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap); ++ trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size, ++ page_size, ret ? errno : 0); ++ if (ret) { ++ error_setg_errno(errp, errno, ++ "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx ++ " size: 0x"RAM_ADDR_FMT") failed", iova, size); ++ return false; ++ } ++ ++ return true; ++} ++ + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + uint64_t *caps, Error **errp) +diff --git a/backends/trace-events b/backends/trace-events +index fe3297ca15..b02433710a 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -17,3 +17,4 @@ iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioa + iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)" + iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" + iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" ++iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 11e1392527..3d4f902ae5 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -25,6 +25,7 @@ + #include "qemu/cutils.h" + #include "qemu/chardev_open.h" + #include "pci.h" ++#include "exec/ram_addr.h" + + static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +@@ -152,6 +153,36 @@ err: + return -EINVAL; + } + ++static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, hwaddr iova, ++ hwaddr size) ++{ ++ VFIOIOMMUFDContainer *container = container_of(bcontainer, ++ VFIOIOMMUFDContainer, ++ bcontainer); ++ unsigned long page_size = qemu_real_host_page_size(); ++ VFIOIOASHwpt *hwpt; ++ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ if (!iommufd_hwpt_dirty_tracking(hwpt)) { ++ continue; ++ } ++ ++ if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id, ++ iova, size, page_size, ++ (uint64_t *)vbmap->bitmap, ++ NULL)) { ++ error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 ++ " size: 0x%"PRIx64" err: %d", (uint64_t)iova, ++ (uint64_t)size, errno); ++ ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ + static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) + { + long int ret = -ENOTTY; +@@ -793,6 +824,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) + vioc->detach_device = iommufd_cdev_detach; + vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; + vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking; ++ vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; + }; + + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 4f1dbe827c..3b28c8a81c 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -59,6 +59,10 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + Error **errp); + bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id, + bool start, Error **errp); ++bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, ++ uint64_t iova, ram_addr_t size, ++ uint64_t page_size, uint64_t *data, ++ Error **errp); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + #endif +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Implement-VFIOIOMMUClass-set_dirty_trac.patch b/vfio-iommufd-Implement-VFIOIOMMUClass-set_dirty_trac.patch new file mode 100644 index 0000000000000000000000000000000000000000..536a35d93bc706fbbfffa47e2472cdd31556148f --- /dev/null +++ b/vfio-iommufd-Implement-VFIOIOMMUClass-set_dirty_trac.patch @@ -0,0 +1,134 @@ +From 73b24be504fcd9b453a51e1f2fc8af64b092c586 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:23 +0100 +Subject: [PATCH] vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking + support + +ioctl(iommufd, IOMMU_HWPT_SET_DIRTY_TRACKING, arg) is the UAPI that +enables or disables dirty page tracking. The ioctl is used if the hwpt +has been created with dirty tracking supported domain (stored in +hwpt::flags) and it is called on the whole list of iommu domains. + +Signed-off-by: Joao Martins +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +[Shameer: changed iommufd_set_dirty_page_tracking() declaration] +Signed-off-by: Shameer Kolothum +--- + backends/iommufd.c | 23 +++++++++++++++++++++++ + backends/trace-events | 1 + + hw/vfio/iommufd.c | 34 ++++++++++++++++++++++++++++++++++ + include/sysemu/iommufd.h | 2 ++ + 4 files changed, 60 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 4aebf54765..785d3fbbad 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -254,6 +254,29 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + return true; + } + ++bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, ++ uint32_t hwpt_id, bool start, ++ Error **errp) ++{ ++ int ret; ++ struct iommu_hwpt_set_dirty_tracking set_dirty = { ++ .size = sizeof(set_dirty), ++ .hwpt_id = hwpt_id, ++ .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0, ++ }; ++ ++ ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty); ++ trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0); ++ if (ret) { ++ error_setg_errno(errp, errno, ++ "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed", ++ hwpt_id); ++ return false; ++ } ++ ++ return true; ++} ++ + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + uint64_t *caps, Error **errp) +diff --git a/backends/trace-events b/backends/trace-events +index e248bf039e..fe3297ca15 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -16,3 +16,4 @@ iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t si + iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" + iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)" + iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" ++iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index a9400d8107..11e1392527 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -119,6 +119,39 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt) + return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + } + ++static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, ++ bool start) ++{ ++ const VFIOIOMMUFDContainer *container = ++ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ VFIOIOASHwpt *hwpt; ++ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ if (!iommufd_hwpt_dirty_tracking(hwpt)) { ++ continue; ++ } ++ ++ if (!iommufd_backend_set_dirty_tracking(container->be, ++ hwpt->hwpt_id, start, NULL)) { ++ error_report("Failed to set dirty tracking hwpt_id %u errno: %d", ++ hwpt->hwpt_id, errno); ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ if (!iommufd_hwpt_dirty_tracking(hwpt)) { ++ continue; ++ } ++ iommufd_backend_set_dirty_tracking(container->be, ++ hwpt->hwpt_id, !start, NULL); ++ } ++ return -EINVAL; ++} ++ + static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) + { + long int ret = -ENOTTY; +@@ -759,6 +792,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) + vioc->attach_device = iommufd_cdev_attach; + vioc->detach_device = iommufd_cdev_detach; + vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; ++ vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking; + }; + + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index f6f01e4be8..4f1dbe827c 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -57,6 +57,8 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, + uint32_t data_type, uint32_t data_len, + void *data_ptr, uint32_t *out_hwpt, + Error **errp); ++bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id, ++ bool start, Error **errp); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + #endif +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Implement-at-de-tach_hwpt-handlers.patch b/vfio-iommufd-Implement-at-de-tach_hwpt-handlers.patch new file mode 100644 index 0000000000000000000000000000000000000000..1a7314b5c8bda7f17f15e9f4996fb3c93b9f1943 --- /dev/null +++ b/vfio-iommufd-Implement-at-de-tach_hwpt-handlers.patch @@ -0,0 +1,66 @@ +From aea706f6a71ddbcc9bd342ece14991f8f8261224 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 11 Jan 2024 17:26:50 +0800 +Subject: [PATCH] vfio/iommufd: Implement [at|de]tach_hwpt handlers + +Implement [at|de]tach_hwpt handlers in VFIO subsystem. vIOMMU +utilizes them to attach to or detach from hwpt on host side. + +To achieve that, a new property vdev is add to +TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO which is initialized in +.realize() handler. + +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +[Shameer: Changed ret for host_iommu_device_iommufd_vfio_detach_hwpt()] +Signed-off-by: Shameer Kolothum +--- + hw/vfio/iommufd.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 47a8823146..528023b95b 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -827,6 +827,24 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) + vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; + }; + ++static bool ++host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, ++ uint32_t hwpt_id, Error **errp) ++{ ++ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; ++ ++ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); ++} ++ ++static bool ++host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, ++ Error **errp) ++{ ++ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; ++ ++ return !iommufd_cdev_detach_ioas_hwpt(vbasedev, errp); ++} ++ + static bool hiod_iommufd_vfio_realize_late(HostIOMMUDevice *hiod, void *opaque, + Error **errp) + { +@@ -872,9 +890,13 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) + { + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); ++ HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc); + + hiodc->realize = hiod_iommufd_vfio_realize; + hiodc->realize_late = hiod_iommufd_vfio_realize_late; ++ ++ idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt; ++ idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt; + }; + + static const TypeInfo types[] = { +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Implement-the-iommufd-backend.patch b/vfio-iommufd-Implement-the-iommufd-backend.patch new file mode 100644 index 0000000000000000000000000000000000000000..878f74cf21dbdefcebbb48edc9d05c26bab7841e --- /dev/null +++ b/vfio-iommufd-Implement-the-iommufd-backend.patch @@ -0,0 +1,553 @@ +From 5c034b7ec5ca255551956744a386288a74ab172e Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Sat, 11 Jan 2025 10:52:40 +0800 +Subject: [PATCH] vfio/iommufd: Implement the iommufd backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The iommufd backend is implemented based on the new /dev/iommu user API. +This backend obviously depends on CONFIG_IOMMUFD. + +So far, the iommufd backend doesn't support dirty page sync yet. + +Co-authored-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 6 + + hw/vfio/iommufd.c | 422 ++++++++++++++++++++++++++++++++++ + hw/vfio/meson.build | 3 + + hw/vfio/trace-events | 10 + + include/hw/vfio/vfio-common.h | 11 + + 5 files changed, 452 insertions(+) + create mode 100644 hw/vfio/iommufd.c + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index f6c2029aec..0e900c6746 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #ifdef CONFIG_KVM + #include +@@ -1649,6 +1650,11 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + { + const VFIOIOMMUOps *ops = &vfio_legacy_ops; + ++#ifdef CONFIG_IOMMUFD ++ if (vbasedev->iommufd) { ++ ops = &vfio_iommufd_ops; ++ } ++#endif + return ops->attach_device(name, vbasedev, as, errp); + } + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +new file mode 100644 +index 0000000000..6d31aeac7b +--- /dev/null ++++ b/hw/vfio/iommufd.c +@@ -0,0 +1,422 @@ ++/* ++ * iommufd container backend ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include ++#include ++#include ++ ++#include "hw/vfio/vfio-common.h" ++#include "qemu/error-report.h" ++#include "trace.h" ++#include "qapi/error.h" ++#include "sysemu/iommufd.h" ++#include "hw/qdev-core.h" ++#include "sysemu/reset.h" ++#include "qemu/cutils.h" ++#include "qemu/chardev_open.h" ++ ++static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) ++{ ++ VFIOIOMMUFDContainer *container = ++ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ ++ return iommufd_backend_map_dma(container->be, ++ container->ioas_id, ++ iova, size, vaddr, readonly); ++} ++ ++static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ VFIOIOMMUFDContainer *container = ++ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ ++ /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ ++ return iommufd_backend_unmap_dma(container->be, ++ container->ioas_id, iova, size); ++} ++ ++static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) ++{ ++ return vfio_kvm_device_add_fd(vbasedev->fd, errp); ++} ++ ++static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) ++{ ++ Error *err = NULL; ++ ++ if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { ++ error_report_err(err); ++ } ++} ++ ++static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) ++{ ++ IOMMUFDBackend *iommufd = vbasedev->iommufd; ++ struct vfio_device_bind_iommufd bind = { ++ .argsz = sizeof(bind), ++ .flags = 0, ++ }; ++ int ret; ++ ++ ret = iommufd_backend_connect(iommufd, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ /* ++ * Add device to kvm-vfio to be prepared for the tracking ++ * in KVM. Especially for some emulated devices, it requires ++ * to have kvm information in the device open. ++ */ ++ ret = iommufd_cdev_kvm_device_add(vbasedev, errp); ++ if (ret) { ++ goto err_kvm_device_add; ++ } ++ ++ /* Bind device to iommufd */ ++ bind.iommufd = iommufd->fd; ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); ++ if (ret) { ++ error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", ++ vbasedev->fd, bind.iommufd); ++ goto err_bind; ++ } ++ ++ vbasedev->devid = bind.out_devid; ++ trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, ++ vbasedev->fd, vbasedev->devid); ++ return ret; ++err_bind: ++ iommufd_cdev_kvm_device_del(vbasedev); ++err_kvm_device_add: ++ iommufd_backend_disconnect(iommufd); ++ return ret; ++} ++ ++static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) ++{ ++ /* Unbind is automatically conducted when device fd is closed */ ++ iommufd_cdev_kvm_device_del(vbasedev); ++ iommufd_backend_disconnect(vbasedev->iommufd); ++} ++ ++static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) ++{ ++ long int ret = -ENOTTY; ++ char *path, *vfio_dev_path = NULL, *vfio_path = NULL; ++ DIR *dir = NULL; ++ struct dirent *dent; ++ gchar *contents; ++ struct stat st; ++ gsize length; ++ int major, minor; ++ dev_t vfio_devt; ++ ++ path = g_strdup_printf("%s/vfio-dev", sysfs_path); ++ if (stat(path, &st) < 0) { ++ error_setg_errno(errp, errno, "no such host device"); ++ goto out_free_path; ++ } ++ ++ dir = opendir(path); ++ if (!dir) { ++ error_setg_errno(errp, errno, "couldn't open directory %s", path); ++ goto out_free_path; ++ } ++ ++ while ((dent = readdir(dir))) { ++ if (!strncmp(dent->d_name, "vfio", 4)) { ++ vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); ++ break; ++ } ++ } ++ ++ if (!vfio_dev_path) { ++ error_setg(errp, "failed to find vfio-dev/vfioX/dev"); ++ goto out_close_dir; ++ } ++ ++ if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { ++ error_setg(errp, "failed to load \"%s\"", vfio_dev_path); ++ goto out_free_dev_path; ++ } ++ ++ if (sscanf(contents, "%d:%d", &major, &minor) != 2) { ++ error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); ++ goto out_free_dev_path; ++ } ++ g_free(contents); ++ vfio_devt = makedev(major, minor); ++ ++ vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); ++ ret = open_cdev(vfio_path, vfio_devt); ++ if (ret < 0) { ++ error_setg(errp, "Failed to open %s", vfio_path); ++ } ++ ++ trace_iommufd_cdev_getfd(vfio_path, ret); ++ g_free(vfio_path); ++ ++out_free_dev_path: ++ g_free(vfio_dev_path); ++out_close_dir: ++ closedir(dir); ++out_free_path: ++ if (*errp) { ++ error_prepend(errp, VFIO_MSG_PREFIX, path); ++ } ++ g_free(path); ++ ++ return ret; ++} ++ ++static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, ++ Error **errp) ++{ ++ int ret, iommufd = vbasedev->iommufd->fd; ++ struct vfio_device_attach_iommufd_pt attach_data = { ++ .argsz = sizeof(attach_data), ++ .flags = 0, ++ .pt_id = id, ++ }; ++ ++ /* Attach device to an IOAS or hwpt within iommufd */ ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data); ++ if (ret) { ++ error_setg_errno(errp, errno, ++ "[iommufd=%d] error attach %s (%d) to id=%d", ++ iommufd, vbasedev->name, vbasedev->fd, id); ++ } else { ++ trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, ++ vbasedev->fd, id); ++ } ++ return ret; ++} ++ ++static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) ++{ ++ int ret, iommufd = vbasedev->iommufd->fd; ++ struct vfio_device_detach_iommufd_pt detach_data = { ++ .argsz = sizeof(detach_data), ++ .flags = 0, ++ }; ++ ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data); ++ if (ret) { ++ error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); ++ } else { ++ trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); ++ } ++ return ret; ++} ++ ++static int iommufd_cdev_attach_container(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container, ++ Error **errp) ++{ ++ return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); ++} ++ ++static void iommufd_cdev_detach_container(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container) ++{ ++ Error *err = NULL; ++ ++ if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { ++ error_report_err(err); ++ } ++} ++ ++static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) ++{ ++ VFIOContainerBase *bcontainer = &container->bcontainer; ++ ++ if (!QLIST_EMPTY(&bcontainer->device_list)) { ++ return; ++ } ++ memory_listener_unregister(&bcontainer->listener); ++ vfio_container_destroy(bcontainer); ++ iommufd_backend_free_id(container->be, container->ioas_id); ++ g_free(container); ++} ++ ++static int iommufd_cdev_ram_block_discard_disable(bool state) ++{ ++ /* ++ * We support coordinated discarding of RAM via the RamDiscardManager. ++ */ ++ return ram_block_uncoordinated_discard_disable(state); ++} ++ ++static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) ++{ ++ VFIOContainerBase *bcontainer; ++ VFIOIOMMUFDContainer *container; ++ VFIOAddressSpace *space; ++ struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; ++ int ret, devfd; ++ uint32_t ioas_id; ++ Error *err = NULL; ++ ++ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); ++ if (devfd < 0) { ++ return devfd; ++ } ++ vbasedev->fd = devfd; ++ ++ ret = iommufd_cdev_connect_and_bind(vbasedev, errp); ++ if (ret) { ++ goto err_connect_bind; ++ } ++ ++ space = vfio_get_address_space(as); ++ ++ /* try to attach to an existing container in this space */ ++ QLIST_FOREACH(bcontainer, &space->containers, next) { ++ container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ if (bcontainer->ops != &vfio_iommufd_ops || ++ vbasedev->iommufd != container->be) { ++ continue; ++ } ++ if (iommufd_cdev_attach_container(vbasedev, container, &err)) { ++ const char *msg = error_get_pretty(err); ++ ++ trace_iommufd_cdev_fail_attach_existing_container(msg); ++ error_free(err); ++ err = NULL; ++ } else { ++ ret = iommufd_cdev_ram_block_discard_disable(true); ++ if (ret) { ++ error_setg(errp, ++ "Cannot set discarding of RAM broken (%d)", ret); ++ goto err_discard_disable; ++ } ++ goto found_container; ++ } ++ } ++ ++ /* Need to allocate a new dedicated container */ ++ ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp); ++ if (ret < 0) { ++ goto err_alloc_ioas; ++ } ++ ++ trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); ++ ++ container = g_malloc0(sizeof(*container)); ++ container->be = vbasedev->iommufd; ++ container->ioas_id = ioas_id; ++ ++ bcontainer = &container->bcontainer; ++ vfio_container_init(bcontainer, space, &vfio_iommufd_ops); ++ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); ++ ++ ret = iommufd_cdev_attach_container(vbasedev, container, errp); ++ if (ret) { ++ goto err_attach_container; ++ } ++ ++ ret = iommufd_cdev_ram_block_discard_disable(true); ++ if (ret) { ++ goto err_discard_disable; ++ } ++ ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ ++ bcontainer->listener = vfio_memory_listener; ++ memory_listener_register(&bcontainer->listener, bcontainer->space->as); ++ ++ if (bcontainer->error) { ++ ret = -1; ++ error_propagate_prepend(errp, bcontainer->error, ++ "memory listener initialization failed: "); ++ goto err_listener_register; ++ } ++ ++ bcontainer->initialized = true; ++ ++found_container: ++ ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); ++ if (ret) { ++ error_setg_errno(errp, errno, "error getting device info"); ++ goto err_listener_register; ++ } ++ ++ /* ++ * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level ++ * for discarding incompatibility check as well? ++ */ ++ if (vbasedev->ram_block_discard_allowed) { ++ iommufd_cdev_ram_block_discard_disable(false); ++ } ++ ++ vbasedev->group = 0; ++ vbasedev->num_irqs = dev_info.num_irqs; ++ vbasedev->num_regions = dev_info.num_regions; ++ vbasedev->flags = dev_info.flags; ++ vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); ++ vbasedev->bcontainer = bcontainer; ++ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); ++ QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); ++ ++ trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, ++ vbasedev->num_regions, vbasedev->flags); ++ return 0; ++ ++err_listener_register: ++ iommufd_cdev_ram_block_discard_disable(false); ++err_discard_disable: ++ iommufd_cdev_detach_container(vbasedev, container); ++err_attach_container: ++ iommufd_cdev_container_destroy(container); ++err_alloc_ioas: ++ vfio_put_address_space(space); ++ iommufd_cdev_unbind_and_disconnect(vbasedev); ++err_connect_bind: ++ close(vbasedev->fd); ++ return ret; ++} ++ ++static void iommufd_cdev_detach(VFIODevice *vbasedev) ++{ ++ VFIOContainerBase *bcontainer = vbasedev->bcontainer; ++ VFIOAddressSpace *space = bcontainer->space; ++ VFIOIOMMUFDContainer *container = container_of(bcontainer, ++ VFIOIOMMUFDContainer, ++ bcontainer); ++ QLIST_REMOVE(vbasedev, global_next); ++ QLIST_REMOVE(vbasedev, container_next); ++ vbasedev->bcontainer = NULL; ++ ++ if (!vbasedev->ram_block_discard_allowed) { ++ iommufd_cdev_ram_block_discard_disable(false); ++ } ++ ++ iommufd_cdev_detach_container(vbasedev, container); ++ iommufd_cdev_container_destroy(container); ++ vfio_put_address_space(space); ++ ++ iommufd_cdev_unbind_and_disconnect(vbasedev); ++ close(vbasedev->fd); ++} ++ ++const VFIOIOMMUOps vfio_iommufd_ops = { ++ .dma_map = iommufd_cdev_map, ++ .dma_unmap = iommufd_cdev_unmap, ++ .attach_device = iommufd_cdev_attach, ++ .detach_device = iommufd_cdev_detach, ++}; +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index 32a6933280..bd5cc4ca79 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -7,6 +7,9 @@ vfio_ss.add(files( + 'spapr.c', + 'migration.c', + )) ++vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( ++ 'iommufd.c', ++)) + vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( + 'display.c', + 'pci-quirks.c', +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 08a1f9dfa4..3340c93af0 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -164,3 +164,13 @@ vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcop + vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" + vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" ++ ++#iommufd.c ++ ++iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d" ++iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)" ++iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d" ++iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s" ++iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" ++iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" ++iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 9e22acbfb6..9b9fd7b461 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -99,6 +99,14 @@ typedef struct VFIOHostDMAWindow { + QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; + } VFIOHostDMAWindow; + ++typedef struct IOMMUFDBackend IOMMUFDBackend; ++ ++typedef struct VFIOIOMMUFDContainer { ++ VFIOContainerBase bcontainer; ++ IOMMUFDBackend *be; ++ uint32_t ioas_id; ++} VFIOIOMMUFDContainer; ++ + typedef struct VFIODeviceOps VFIODeviceOps; + + typedef struct VFIODevice { +@@ -126,6 +134,8 @@ typedef struct VFIODevice { + OnOffAuto pre_copy_dirty_page_tracking; + bool dirty_pages_supported; + bool dirty_tracking; ++ int devid; ++ IOMMUFDBackend *iommufd; + } VFIODevice; + + struct VFIODeviceOps { +@@ -215,6 +225,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; + extern const VFIOIOMMUOps vfio_legacy_ops; ++extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch b/vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch new file mode 100644 index 0000000000000000000000000000000000000000..dddcb04f8e9f22bdf3a756b4ad6cbd3f6b53fa3d --- /dev/null +++ b/vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch @@ -0,0 +1,145 @@ +From 66f71e9acdaa0c1c31770f00a21ea32644ebaac9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:23 +0100 +Subject: [PATCH] vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +As previously done for the sPAPR and legacy IOMMU backends, convert +the VFIOIOMMUOps struct to a QOM interface. The set of of operations +for this backend can be referenced with a literal typename instead of +a C struct. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 2 +- + hw/vfio/iommufd.c | 35 ++++++++++++++++++++------- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 2 +- + 4 files changed, 28 insertions(+), 12 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index d98c3b7422..a8b7129fa5 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1654,7 +1654,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { +- ops = &vfio_iommufd_ops; ++ ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + } + #endif + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 87a561c545..d4c586e842 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -319,6 +319,8 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + int ret, devfd; + uint32_t ioas_id; + Error *err = NULL; ++ const VFIOIOMMUClass *iommufd_vioc = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + + if (vbasedev->fd < 0) { + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); +@@ -340,7 +342,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + /* try to attach to an existing container in this space */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); +- if (bcontainer->ops != &vfio_iommufd_ops || ++ if (bcontainer->ops != iommufd_vioc || + vbasedev->iommufd != container->be) { + continue; + } +@@ -374,7 +376,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + container->ioas_id = ioas_id; + + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, space, &vfio_iommufd_ops); ++ vfio_container_init(bcontainer, space, iommufd_vioc); + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + + ret = iommufd_cdev_attach_container(vbasedev, container, errp); +@@ -476,9 +478,11 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) + static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) + { + VFIODevice *vbasedev_iter; ++ const VFIOIOMMUClass *iommufd_vioc = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { +- if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { ++ if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { + continue; + } + if (devid == vbasedev_iter->devid) { +@@ -621,10 +625,23 @@ out_single: + return ret; + } + +-const VFIOIOMMUOps vfio_iommufd_ops = { +- .dma_map = iommufd_cdev_map, +- .dma_unmap = iommufd_cdev_unmap, +- .attach_device = iommufd_cdev_attach, +- .detach_device = iommufd_cdev_detach, +- .pci_hot_reset = iommufd_cdev_pci_hot_reset, ++static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->dma_map = iommufd_cdev_map; ++ vioc->dma_unmap = iommufd_cdev_unmap; ++ vioc->attach_device = iommufd_cdev_attach; ++ vioc->detach_device = iommufd_cdev_detach; ++ vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; + }; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_IOMMUFD, ++ .parent = TYPE_VFIO_IOMMU, ++ .class_init = vfio_iommu_iommufd_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index f78a97006c..f3966410c1 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -224,7 +224,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +-extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 1085109d0c..c12ce4dfcb 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -17,7 +17,6 @@ + + typedef struct VFIODevice VFIODevice; + typedef struct VFIOIOMMUClass VFIOIOMMUClass; +-#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ + + typedef struct { + unsigned long *bitmap; +@@ -96,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + #define TYPE_VFIO_IOMMU "vfio-iommu" + #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" + #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" ++#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Introduce-auto-domain-creation.patch b/vfio-iommufd-Introduce-auto-domain-creation.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4cd4c2baa62ffef4b4748c9722b2002732e5750 --- /dev/null +++ b/vfio-iommufd-Introduce-auto-domain-creation.patch @@ -0,0 +1,275 @@ +From 630efd6ca2f0c9383223f0ea092abda1c7528f21 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:18 +0100 +Subject: [PATCH] vfio/iommufd: Introduce auto domain creation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There's generally two modes of operation for IOMMUFD: + +1) The simple user API which intends to perform relatively simple things +with IOMMUs e.g. DPDK. The process generally creates an IOAS and attaches +to VFIO and mainly performs IOAS_MAP and UNMAP. + +2) The native IOMMUFD API where you have fine grained control of the +IOMMU domain and model it accordingly. This is where most new feature +are being steered to. + +For dirty tracking 2) is required, as it needs to ensure that +the stage-2/parent IOMMU domain will only attach devices +that support dirty tracking (so far it is all homogeneous in x86, likely +not the case for smmuv3). Such invariant on dirty tracking provides a +useful guarantee to VMMs that will refuse incompatible device +attachments for IOMMU domains. + +Dirty tracking insurance is enforced via HWPT_ALLOC, which is +responsible for creating an IOMMU domain. This is contrast to the +'simple API' where the IOMMU domain is created by IOMMUFD automatically +when it attaches to VFIO (usually referred as autodomains) but it has +the needed handling for mdevs. + +To support dirty tracking with the advanced IOMMUFD API, it needs +similar logic, where IOMMU domains are created and devices attached to +compatible domains. Essentially mimicking kernel +iommufd_device_auto_get_domain(). With mdevs given there's no IOMMU domain +it falls back to IOAS attach. + +The auto domain logic allows different IOMMU domains to be created when +DMA dirty tracking is not desired (and VF can provide it), and others where +it is. Here it is not used in this way given how VFIODevice migration +state is initialized after the device attachment. But such mixed mode of +IOMMU dirty tracking + device dirty tracking is an improvement that can +be added on. Keep the 'all of nothing' of type1 approach that we have +been using so far between container vs device dirty tracking. + +Signed-off-by: Joao Martins +Reviewed-by: Zhenzhong Duan +[ clg: Added ERRP_GUARD() in iommufd_cdev_autodomains_get() ] +Signed-off-by: Cédric Le Goater +Reviewed-by: Eric Auger +[Shameer: Changed ret for iommufd_cdev_autodomains_get() ] +Signed-off-by: Shameer Kolothum +--- + backends/iommufd.c | 30 +++++++++++++ + backends/trace-events | 1 + + hw/vfio/iommufd.c | 85 +++++++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 9 ++++ + include/sysemu/iommufd.h | 5 +++ + 5 files changed, 130 insertions(+) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 1ce2a24226..0d995d7563 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -223,6 +223,36 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + return ret; + } + ++bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, ++ uint32_t pt_id, uint32_t flags, ++ uint32_t data_type, uint32_t data_len, ++ void *data_ptr, uint32_t *out_hwpt, ++ Error **errp) ++{ ++ int ret, fd = be->fd; ++ struct iommu_hwpt_alloc alloc_hwpt = { ++ .size = sizeof(struct iommu_hwpt_alloc), ++ .flags = flags, ++ .dev_id = dev_id, ++ .pt_id = pt_id, ++ .data_type = data_type, ++ .data_len = data_len, ++ .data_uptr = (uintptr_t)data_ptr, ++ }; ++ ++ ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt); ++ trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type, ++ data_len, (uintptr_t)data_ptr, ++ alloc_hwpt.out_hwpt_id, ret); ++ if (ret) { ++ error_setg_errno(errp, errno, "Failed to allocate hwpt"); ++ return false; ++ } ++ ++ *out_hwpt = alloc_hwpt.out_hwpt_id; ++ return true; ++} ++ + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + uint64_t *caps, Error **errp) +diff --git a/backends/trace-events b/backends/trace-events +index d45c6e31a6..e248bf039e 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -14,4 +14,5 @@ iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size + iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" + iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" + iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" ++iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)" + iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 5e7788ed59..3b75cba26c 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -225,10 +225,89 @@ static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) + return ret; + } + ++static int iommufd_cdev_autodomains_get(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container, ++ Error **errp) ++{ ++ ERRP_GUARD(); ++ IOMMUFDBackend *iommufd = vbasedev->iommufd; ++ uint32_t flags = 0; ++ VFIOIOASHwpt *hwpt; ++ uint32_t hwpt_id; ++ int ret; ++ ++ /* Try to find a domain */ ++ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { ++ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); ++ if (ret) { ++ /* -EINVAL means the domain is incompatible with the device. */ ++ if (ret == -EINVAL) { ++ /* ++ * It is an expected failure and it just means we will try ++ * another domain, or create one if no existing compatible ++ * domain is found. Hence why the error is discarded below. ++ */ ++ error_free(*errp); ++ *errp = NULL; ++ continue; ++ } ++ ++ return ret; ++ } else { ++ vbasedev->hwpt = hwpt; ++ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); ++ return 0; ++ } ++ } ++ ++ if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, ++ container->ioas_id, flags, ++ IOMMU_HWPT_DATA_NONE, 0, NULL, ++ &hwpt_id, errp)) { ++ return -EINVAL; ++ } ++ ++ hwpt = g_malloc0(sizeof(*hwpt)); ++ hwpt->hwpt_id = hwpt_id; ++ QLIST_INIT(&hwpt->device_list); ++ ++ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); ++ if (ret) { ++ iommufd_backend_free_id(container->be, hwpt->hwpt_id); ++ g_free(hwpt); ++ return ret; ++ } ++ ++ vbasedev->hwpt = hwpt; ++ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); ++ QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); ++ return 0; ++} ++ ++static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container) ++{ ++ VFIOIOASHwpt *hwpt = vbasedev->hwpt; ++ ++ QLIST_REMOVE(vbasedev, hwpt_next); ++ vbasedev->hwpt = NULL; ++ ++ if (QLIST_EMPTY(&hwpt->device_list)) { ++ QLIST_REMOVE(hwpt, next); ++ iommufd_backend_free_id(container->be, hwpt->hwpt_id); ++ g_free(hwpt); ++ } ++} ++ + static int iommufd_cdev_attach_container(VFIODevice *vbasedev, + VFIOIOMMUFDContainer *container, + Error **errp) + { ++ /* mdevs aren't physical devices and will fail with auto domains */ ++ if (!vbasedev->mdev) { ++ return iommufd_cdev_autodomains_get(vbasedev, container, errp); ++ } ++ + return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + } + +@@ -240,6 +319,11 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev, + if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { + error_report_err(err); + } ++ ++ if (vbasedev->hwpt) { ++ iommufd_cdev_autodomains_put(vbasedev, container); ++ } ++ + } + + static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) +@@ -375,6 +459,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + container = g_malloc0(sizeof(*container)); + container->be = vbasedev->iommufd; + container->ioas_id = ioas_id; ++ QLIST_INIT(&container->hwpt_list); + + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, iommufd_vioc); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index e49e5fabba..2093ed2e91 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -107,10 +107,17 @@ typedef struct VFIOHostDMAWindow { + + typedef struct IOMMUFDBackend IOMMUFDBackend; + ++typedef struct VFIOIOASHwpt { ++ uint32_t hwpt_id; ++ QLIST_HEAD(, VFIODevice) device_list; ++ QLIST_ENTRY(VFIOIOASHwpt) next; ++} VFIOIOASHwpt; ++ + typedef struct VFIOIOMMUFDContainer { + VFIOContainerBase bcontainer; + IOMMUFDBackend *be; + uint32_t ioas_id; ++ QLIST_HEAD(, VFIOIOASHwpt) hwpt_list; + } VFIOIOMMUFDContainer; + + typedef struct VFIODeviceOps VFIODeviceOps; +@@ -144,6 +151,8 @@ typedef struct VFIODevice { + HostIOMMUDevice *hiod; + int devid; + IOMMUFDBackend *iommufd; ++ VFIOIOASHwpt *hwpt; ++ QLIST_ENTRY(VFIODevice) hwpt_next; + } VFIODevice; + + struct VFIODeviceOps { +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index a0a0143856..f6f01e4be8 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -52,6 +52,11 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + uint64_t *caps, Error **errp); ++bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id, ++ uint32_t pt_id, uint32_t flags, ++ uint32_t data_type, uint32_t data_len, ++ void *data_ptr, uint32_t *out_hwpt, ++ Error **errp); + + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" + #endif +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Probe-and-request-hwpt-dirty-tracking-c.patch b/vfio-iommufd-Probe-and-request-hwpt-dirty-tracking-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..425bf09c37e02dbc1e4e763a3b750175af02d7f0 --- /dev/null +++ b/vfio-iommufd-Probe-and-request-hwpt-dirty-tracking-c.patch @@ -0,0 +1,119 @@ +From db8ef4524568c2379c25986db6e30cb0f6c0ec05 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:22 +0100 +Subject: [PATCH] vfio/iommufd: Probe and request hwpt dirty tracking + capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In preparation to using the dirty tracking UAPI, probe whether the IOMMU +supports dirty tracking. This is done via the data stored in +hiod::caps::hw_caps initialized from GET_HW_INFO. + +Qemu doesn't know if VF dirty tracking is supported when allocating +hardware pagetable in iommufd_cdev_autodomains_get(). This is because +VFIODevice migration state hasn't been initialized *yet* hence it can't pick +between VF dirty tracking vs IOMMU dirty tracking. So, if IOMMU supports +dirty tracking it always creates HWPTs with IOMMU_HWPT_ALLOC_DIRTY_TRACKING +even if later on VFIOMigration decides to use VF dirty tracking instead. + +Signed-off-by: Joao Martins +[ clg: - Fixed vbasedev->iommu_dirty_tracking assignment in + iommufd_cdev_autodomains_get() + - Added warning for heterogeneous dirty page tracking support + in iommufd_cdev_autodomains_get() ] +Signed-off-by: Cédric Le Goater +Reviewed-by: Zhenzhong Duan +--- + hw/vfio/iommufd.c | 26 ++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 2 ++ + 2 files changed, 28 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 8fd6826826..a9400d8107 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -114,6 +114,11 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) + iommufd_backend_disconnect(vbasedev->iommufd); + } + ++static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt) ++{ ++ return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; ++} ++ + static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) + { + long int ret = -ENOTTY; +@@ -256,10 +261,22 @@ static int iommufd_cdev_autodomains_get(VFIODevice *vbasedev, + } else { + vbasedev->hwpt = hwpt; + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); ++ vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); + return 0; + } + } + ++ /* ++ * This is quite early and VFIO Migration state isn't yet fully ++ * initialized, thus rely only on IOMMU hardware capabilities as to ++ * whether IOMMU dirty tracking is going to be requested. Later ++ * vfio_migration_realize() may decide to use VF dirty tracking ++ * instead. ++ */ ++ if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { ++ flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; ++ } ++ + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, + container->ioas_id, flags, + IOMMU_HWPT_DATA_NONE, 0, NULL, +@@ -269,6 +286,7 @@ static int iommufd_cdev_autodomains_get(VFIODevice *vbasedev, + + hwpt = g_malloc0(sizeof(*hwpt)); + hwpt->hwpt_id = hwpt_id; ++ hwpt->hwpt_flags = flags; + QLIST_INIT(&hwpt->device_list); + + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); +@@ -279,8 +297,16 @@ static int iommufd_cdev_autodomains_get(VFIODevice *vbasedev, + } + + vbasedev->hwpt = hwpt; ++ vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); + QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); ++ container->bcontainer.dirty_pages_supported |= ++ vbasedev->iommu_dirty_tracking; ++ if (container->bcontainer.dirty_pages_supported && ++ !vbasedev->iommu_dirty_tracking) { ++ warn_report("IOMMU instance for device %s doesn't support dirty tracking", ++ vbasedev->name); ++ } + return 0; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 63da291456..22a7386591 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -109,6 +109,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend; + + typedef struct VFIOIOASHwpt { + uint32_t hwpt_id; ++ uint32_t hwpt_flags; + QLIST_HEAD(, VFIODevice) device_list; + QLIST_ENTRY(VFIOIOASHwpt) next; + } VFIOIOASHwpt; +@@ -148,6 +149,7 @@ typedef struct VFIODevice { + OnOffAuto pre_copy_dirty_page_tracking; + bool dirty_pages_supported; + bool dirty_tracking; ++ bool iommu_dirty_tracking; + HostIOMMUDevice *hiod; + int devid; + IOMMUFDBackend *iommufd; +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch b/vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch new file mode 100644 index 0000000000000000000000000000000000000000..a73228e5421aa128de80102c7564b5d058b3c3ca --- /dev/null +++ b/vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch @@ -0,0 +1,63 @@ +From cb2bd16a67cd45a0ad3318098120aee10a298f3b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:41 +0800 +Subject: [PATCH] vfio/iommufd: Relax assert check for iommufd backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently iommufd doesn't support dirty page sync yet, +but it will not block us doing live migration if VFIO +migration is force enabled. + +So in this case we allow set_dirty_page_tracking to be NULL. +Note we don't need same change for query_dirty_bitmap because +when dirty page sync isn't supported, query_dirty_bitmap will +never be called. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/container-base.c | 4 ++++ + hw/vfio/container.c | 4 ---- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 71f7274973..eee2dcfe76 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -55,6 +55,10 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start) + { ++ if (!bcontainer->dirty_pages_supported) { ++ return 0; ++ } ++ + g_assert(bcontainer->ops->set_dirty_page_tracking); + return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 62af0f2bdd..4936b8f27f 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -266,10 +266,6 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + .argsz = sizeof(dirty), + }; + +- if (!bcontainer->dirty_pages_supported) { +- return 0; +- } +- + if (start) { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; + } else { +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch b/vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch new file mode 100644 index 0000000000000000000000000000000000000000..9e0aea77a1a307cdea2d1dfc87c6bf86f4fe3640 --- /dev/null +++ b/vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch @@ -0,0 +1,46 @@ +From 188948043652fbcdd4505fd9672e57bc61647159 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:25 +0100 +Subject: [PATCH] vfio/iommufd: Remove CONFIG_IOMMUFD usage +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Availability of the IOMMUFD backend can now be fully determined at +runtime and the ifdef check was a build time protection (for PPC not +supporting it mostly). + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a8b7129fa5..b5d02df0c2 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -19,7 +19,6 @@ + */ + + #include "qemu/osdep.h" +-#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #ifdef CONFIG_KVM + #include +@@ -1652,11 +1651,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + const VFIOIOMMUClass *ops = + VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + +-#ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { + ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + } +-#endif + + assert(ops); + +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-Return-errno-in-iommufd_cdev_attach_ioa.patch b/vfio-iommufd-Return-errno-in-iommufd_cdev_attach_ioa.patch new file mode 100644 index 0000000000000000000000000000000000000000..04cea10882383896e4ead1f6d29329ba377e1d33 --- /dev/null +++ b/vfio-iommufd-Return-errno-in-iommufd_cdev_attach_ioa.patch @@ -0,0 +1,46 @@ +From 56e5b9cf8e4041a023daca1ce439ca14619afa97 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Fri, 19 Jul 2024 13:04:52 +0100 +Subject: [PATCH] vfio/iommufd: Return errno in iommufd_cdev_attach_ioas_hwpt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In preparation to implement auto domains have the attach function +return the errno it got during domain attach instead of a bool. + +-EINVAL is tracked to track domain incompatibilities, and decide whether +to create a new IOMMU domain. + +Signed-off-by: Joao Martins +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +--- + hw/vfio/iommufd.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index d5b923ca83..5e7788ed59 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -200,11 +200,12 @@ static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, + error_setg_errno(errp, errno, + "[iommufd=%d] error attach %s (%d) to id=%d", + iommufd, vbasedev->name, vbasedev->fd, id); +- } else { +- trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, +- vbasedev->fd, id); ++ return -errno; + } +- return ret; ++ ++ trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, ++ vbasedev->fd, id); ++ return 0; + } + + static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-container-Invoke-HostIOMMUDevice-realiz.patch b/vfio-iommufd-container-Invoke-HostIOMMUDevice-realiz.patch new file mode 100644 index 0000000000000000000000000000000000000000..25878555b0dccbf717ae3b24339f72beed3b474c --- /dev/null +++ b/vfio-iommufd-container-Invoke-HostIOMMUDevice-realiz.patch @@ -0,0 +1,141 @@ +From 2276a3a175576a63da6abd5ccb309dd1cdbc4021 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:21 +0100 +Subject: [PATCH] vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() + during attach_device() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Move the HostIOMMUDevice::realize() to be invoked during the attach of the device +before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use +of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU +behind the device supports dirty tracking. + +Note: The HostIOMMUDevice data from legacy backend is static and doesn't +need any information from the (type1-iommu) backend to be initialized. +In contrast however, the IOMMUFD HostIOMMUDevice data requires the +iommufd FD to be connected and having a devid to be able to successfully +GET_HW_INFO. This means vfio_device_hiod_realize() is called in +different places within the backend .attach_device() implementation. + +Suggested-by: Cédric Le Goater +Signed-off-by: Joao Martins +Reviewed-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +[ clg: Fixed error handling in iommufd_cdev_attach() ] +Signed-off-by: Cédric Le Goater +Reviewed-by: Eric Auger +--- + hw/vfio/common.c | 19 +++++++------------ + hw/vfio/container.c | 4 ++++ + hw/vfio/helpers.c | 11 +++++++++++ + hw/vfio/iommufd.c | 11 +++++++++++ + include/hw/vfio/vfio-common.h | 1 + + 5 files changed, 34 insertions(+), 12 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index ceb1da0b94..65e1c9f810 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1659,22 +1659,17 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + + assert(ops); + +- ret = ops->attach_device(name, vbasedev, as, errp); +- if (ret) { +- return ret; +- } +- +- if (vbasedev->mdev) { +- return true; ++ if (!vbasedev->mdev) { ++ hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); ++ vbasedev->hiod = hiod; + } + +- hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); +- if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) { ++ ret = ops->attach_device(name, vbasedev, as, errp); ++ if (ret) { + object_unref(hiod); +- ops->detach_device(vbasedev); +- return -1; ++ vbasedev->hiod = NULL; ++ return ret; + } +- vbasedev->hiod = hiod; + + return 0; + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 30a62348d3..64eacfd912 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1030,6 +1030,10 @@ static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, + + trace_vfio_attach_device(vbasedev->name, groupid); + ++ if (!vfio_device_hiod_realize(vbasedev, errp)) { ++ return false; ++ } ++ + group = vfio_get_group(groupid, as, errp); + if (!group) { + return -ENOENT; +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 37bc383c69..1f3bfed917 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -694,3 +694,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev) + subsys = realpath(tmp, NULL); + return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0); + } ++ ++bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp) ++{ ++ HostIOMMUDevice *hiod = vbasedev->hiod; ++ ++ if (!hiod) { ++ return true; ++ } ++ ++ return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp); ++} +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index d9088705de..8fd6826826 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -424,6 +424,17 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + + space = vfio_get_address_space(as); + ++ /* ++ * The HostIOMMUDevice data from legacy backend is static and doesn't need ++ * any information from the (type1-iommu) backend to be initialized. In ++ * contrast however, the IOMMUFD HostIOMMUDevice data requires the iommufd ++ * FD to be connected and having a devid to be able to successfully call ++ * iommufd_backend_get_device_info(). ++ */ ++ if (!vfio_device_hiod_realize(vbasedev, errp)) { ++ goto err_alloc_ioas; ++ } ++ + /* try to attach to an existing container in this space */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 2093ed2e91..63da291456 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -230,6 +230,7 @@ void vfio_region_finalize(VFIORegion *region); + void vfio_reset_handler(void *opaque); + struct vfio_device_info *vfio_get_device_info(int fd); + bool vfio_device_is_mdev(VFIODevice *vbasedev); ++bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp); + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); + void vfio_detach_device(VFIODevice *vbasedev); +-- +2.41.0.windows.1 + diff --git a/vfio-iommufd-container-Remove-caps-aw_bits.patch b/vfio-iommufd-container-Remove-caps-aw_bits.patch new file mode 100644 index 0000000000000000000000000000000000000000..0bdf25db7def106538f85ddc740465b90093ed1d --- /dev/null +++ b/vfio-iommufd-container-Remove-caps-aw_bits.patch @@ -0,0 +1,104 @@ +From 7d3634d73af1f53549eba4b3d50bb8f9f49a5243 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:19 +0100 +Subject: [PATCH] vfio/{iommufd,container}: Remove caps::aw_bits +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Remove caps::aw_bits which requires the bcontainer::iova_ranges being +initialized after device is actually attached. Instead defer that to +.get_cap() and call vfio_device_get_aw_bits() directly. + +This is in preparation for HostIOMMUDevice::realize() being called early +during attach_device(). + +Suggested-by: Zhenzhong Duan +Signed-off-by: Joao Martins +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +--- + backends/iommufd.c | 3 ++- + hw/vfio/container.c | 5 +---- + hw/vfio/iommufd.c | 1 - + include/sysemu/host_iommu_device.h | 3 --- + 4 files changed, 3 insertions(+), 9 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 0d995d7563..4aebf54765 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -19,6 +19,7 @@ + #include "qemu/error-report.h" + #include "monitor/monitor.h" + #include "trace.h" ++#include "hw/vfio/vfio-common.h" + #include + #include + +@@ -285,7 +286,7 @@ static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) + case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE: + return caps->type; + case HOST_IOMMU_DEVICE_CAP_AW_BITS: +- return caps->aw_bits; ++ return vfio_device_get_aw_bits(hiod->agent); + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 8a5a112b6b..30a62348d3 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1258,7 +1258,6 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + VFIODevice *vdev = opaque; + + hiod->name = g_strdup(vdev->name); +- hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); + hiod->agent = opaque; + + return true; +@@ -1267,11 +1266,9 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, + Error **errp) + { +- HostIOMMUDeviceCaps *caps = &hiod->caps; +- + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_AW_BITS: +- return caps->aw_bits; ++ return vfio_device_get_aw_bits(hiod->agent); + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 7a069ca576..06e6a400be 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -745,7 +745,6 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + + hiod->name = g_strdup(vdev->name); + caps->type = type; +- caps->aw_bits = vfio_device_get_aw_bits(vdev); + + return true; + } +diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h +index 3e5f058e7b..f586908945 100644 +--- a/include/sysemu/host_iommu_device.h ++++ b/include/sysemu/host_iommu_device.h +@@ -19,12 +19,9 @@ + * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. + * + * @type: host platform IOMMU type. +- * +- * @aw_bits: host IOMMU address width. 0xff if no limitation. + */ + typedef struct HostIOMMUDeviceCaps { + uint32_t type; +- uint8_t aw_bits; + } HostIOMMUDeviceCaps; + + #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" +-- +2.41.0.windows.1 + diff --git a/vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch b/vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch deleted file mode 100644 index 5f543b40bdb7e93d671edbd834b4279dec69c8c9..0000000000000000000000000000000000000000 --- a/vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch +++ /dev/null @@ -1,196 +0,0 @@ -From 74b651428e6ed65177354d80bd888e842a4a5077 Mon Sep 17 00:00:00 2001 -From: Keqian Zhu -Date: Tue, 9 Mar 2021 11:19:13 +0800 -Subject: [PATCH] vfio/migrate: Move switch of dirty tracking into - vfio_memory_listener - -For now the switch of vfio dirty page tracking is integrated into -@vfio_save_handler. The reason is that some PCI vendor driver may -start to track dirty base on _SAVING state of device, so if dirty -tracking is started before setting device state, vfio will report -full-dirty to QEMU. - -However, the dirty bmap of all ramblocks are fully set when setup -ram saving, so it's not matter whether the device is in _SAVING -state when start vfio dirty tracking. - -Moreover, this logic causes some problems [1]. The object of dirty -tracking is guest memory, but the object of @vfio_save_handler is -device state, which produces unnecessary coupling and conflicts: - -1. Coupling: Their saving granule is different (perVM vs perDevice). - vfio will enable dirty_page_tracking for each devices, actually - once is enough. - -2. Conflicts: The ram_save_setup() traverses all memory_listeners - to execute their log_start() and log_sync() hooks to get the - first round dirty bitmap, which is used by the bulk stage of - ram saving. However, as vfio dirty tracking is not yet started, - it can't get dirty bitmap from vfio. Then we give up the chance - to handle vfio dirty page at bulk stage. - -Move the switch of vfio dirty_page_tracking into vfio_memory_listener -can solve above problems. Besides, Do not require devices in SAVING -state for vfio_sync_dirty_bitmap(). - -[1] https://www.spinics.net/lists/kvm/msg229967.html - -Reported-by: Zenghui Yu -Signed-off-by: Keqian Zhu -Suggested-by: Paolo Bonzini -Message-Id: <20210309031913.11508-1-zhukeqian1@huawei.com> -Signed-off-by: Alex Williamson -Signed-off-by: Kunkun Jiang ---- - hw/vfio/common.c | 49 ++++++++++++++++++++++++++++++++++++--------- - hw/vfio/migration.c | 35 -------------------------------- - 2 files changed, 40 insertions(+), 44 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index a7817c90cc..245e32df5b 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -310,7 +310,7 @@ bool vfio_mig_active(void) - return true; - } - --static bool vfio_devices_all_saving(VFIOContainer *container) -+static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - { - VFIOGroup *group; - VFIODevice *vbasedev; -@@ -328,13 +328,8 @@ static bool vfio_devices_all_saving(VFIOContainer *container) - return false; - } - -- if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { -- if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) -- && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { -- return false; -- } -- continue; -- } else { -+ if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) -+ && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { - return false; - } - } -@@ -952,6 +947,40 @@ static void vfio_listener_region_del(MemoryListener *listener, - } - } - -+static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) -+{ -+ int ret; -+ struct vfio_iommu_type1_dirty_bitmap dirty = { -+ .argsz = sizeof(dirty), -+ }; -+ -+ if (start) { -+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; -+ } else { -+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; -+ } -+ -+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); -+ if (ret) { -+ error_report("Failed to set dirty tracking flag 0x%x errno: %d", -+ dirty.flags, errno); -+ } -+} -+ -+static void vfio_listener_log_global_start(MemoryListener *listener) -+{ -+ VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ -+ vfio_set_dirty_page_tracking(container, true); -+} -+ -+static void vfio_listener_log_global_stop(MemoryListener *listener) -+{ -+ VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ -+ vfio_set_dirty_page_tracking(container, false); -+} -+ - static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - uint64_t size, ram_addr_t ram_addr) - { -@@ -1093,7 +1122,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, - return; - } - -- if (vfio_devices_all_saving(container)) { -+ if (vfio_devices_all_dirty_tracking(container)) { - vfio_sync_dirty_bitmap(container, section); - } - } -@@ -1101,6 +1130,8 @@ static void vfio_listener_log_sync(MemoryListener *listener, - static const MemoryListener vfio_memory_listener = { - .region_add = vfio_listener_region_add, - .region_del = vfio_listener_region_del, -+ .log_global_start = vfio_listener_log_global_start, -+ .log_global_stop = vfio_listener_log_global_stop, - .log_sync = vfio_listener_log_sync, - }; - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 033cb2b0c9..f1f006d584 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -395,40 +395,10 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) - return qemu_file_get_error(f); - } - --static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start) --{ -- int ret; -- VFIOMigration *migration = vbasedev->migration; -- VFIOContainer *container = vbasedev->group->container; -- struct vfio_iommu_type1_dirty_bitmap dirty = { -- .argsz = sizeof(dirty), -- }; -- -- if (start) { -- if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { -- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; -- } else { -- return -EINVAL; -- } -- } else { -- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; -- } -- -- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); -- if (ret) { -- error_report("Failed to set dirty tracking flag 0x%x errno: %d", -- dirty.flags, errno); -- return -errno; -- } -- return ret; --} -- - static void vfio_migration_cleanup(VFIODevice *vbasedev) - { - VFIOMigration *migration = vbasedev->migration; - -- vfio_set_dirty_page_tracking(vbasedev, false); -- - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } -@@ -469,11 +439,6 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) - return ret; - } - -- ret = vfio_set_dirty_page_tracking(vbasedev, true); -- if (ret) { -- return ret; -- } -- - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - ret = qemu_file_get_error(f); --- -2.27.0 - diff --git a/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch index c59bc4e1ff70f6993557329480505c4300ff6aa0..06e5781624f27a44b87089188713bc3a9b345261 100644 --- a/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch +++ b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch @@ -1,4 +1,4 @@ -From f9574b63bf5e940d794db2c3aaf928bde36d9521 Mon Sep 17 00:00:00 2001 +From 24c3ff779f35b40967d195e4764d4cb605c1a304 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 8 May 2021 17:31:05 +0800 Subject: [PATCH] vfio/migration: Add support for manual clear vfio dirty log @@ -15,26 +15,16 @@ kernel supports it, deliever the clear message to kernel. Signed-off-by: Zenghui Yu Signed-off-by: Kunkun Jiang --- - hw/vfio/common.c | 149 +++++++++++++++++++++++++++++++++- + hw/vfio/common.c | 136 ++++++++++++++++++++++++++++++++++ + hw/vfio/container.c | 13 +++- include/hw/vfio/vfio-common.h | 1 + - 2 files changed, 148 insertions(+), 2 deletions(-) + 3 files changed, 148 insertions(+), 2 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index c33c4c539d..206fb83e28 100644 +index 564e933135..e08b147b3d 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c -@@ -1045,7 +1045,9 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); - - dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); -- dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; -+ dbitmap->flags = container->dirty_log_manual_clear ? -+ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR : -+ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; - range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; - range->iova = iova; - range->size = size; -@@ -1176,12 +1178,148 @@ static void vfio_listener_log_sync(MemoryListener *listener, +@@ -1344,6 +1344,141 @@ static void vfio_listener_log_sync(MemoryListener *listener, } } @@ -45,11 +35,11 @@ index c33c4c539d..206fb83e28 100644 + * I think the code can be simplified a lot if no alignment requirement. + */ +#define VFIO_CLEAR_LOG_SHIFT 6 -+#define VFIO_CLEAR_LOG_ALIGN (qemu_real_host_page_size << VFIO_CLEAR_LOG_SHIFT) ++#define VFIO_CLEAR_LOG_ALIGN (qemu_real_host_page_size() << VFIO_CLEAR_LOG_SHIFT) +#define VFIO_CLEAR_LOG_MASK (-VFIO_CLEAR_LOG_ALIGN) + -+static int vfio_log_clear_one_range(VFIOContainer *container, -+ VFIODMARange *qrange, uint64_t start, uint64_t size) ++static int vfio_log_clear_one_range(VFIOContainer *container,VFIODMARange *qrange, ++ uint64_t start, uint64_t size) +{ + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; @@ -65,7 +55,7 @@ index c33c4c539d..206fb83e28 100644 + * as the kvm side. + */ + uint64_t end, bmap_start, start_delta, bmap_npages; -+ unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size; ++ unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size(); + int ret; + + bmap_start = start & VFIO_CLEAR_LOG_MASK; @@ -94,7 +84,7 @@ index c33c4c539d..206fb83e28 100644 + range->size = bmap_npages * psize; + range->bitmap.size = ROUND_UP(bmap_npages, sizeof(__u64) * BITS_PER_BYTE) / + BITS_PER_BYTE; -+ range->bitmap.pgsize = qemu_real_host_page_size; ++ range->bitmap.pgsize = qemu_real_host_page_size(); + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + if (ret) { @@ -173,17 +163,33 @@ index c33c4c539d..206fb83e28 100644 + } +} + - static const MemoryListener vfio_memory_listener = { + const MemoryListener vfio_memory_listener = { + .name = "vfio", .region_add = vfio_listener_region_add, - .region_del = vfio_listener_region_del, +@@ -1351,6 +1486,7 @@ const MemoryListener vfio_memory_listener = { .log_global_start = vfio_listener_log_global_start, .log_global_stop = vfio_listener_log_global_stop, .log_sync = vfio_listener_log_sync, + .log_clear = vfio_listener_log_clear, }; - static void vfio_listener_release(VFIOContainer *container) -@@ -1563,7 +1701,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, + void vfio_reset_handler(void *opaque) +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 9a176a0d33..d8b9117f4f 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -285,7 +285,9 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); + + dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); +- dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; ++ dbitmap->flags = container->dirty_log_manual_clear ? ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR : ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; + range->iova = iova; + range->size = size; +@@ -409,7 +411,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, static int vfio_init_container(VFIOContainer *container, int group_fd, Error **errp) { @@ -192,7 +198,7 @@ index c33c4c539d..206fb83e28 100644 iommu_type = vfio_get_iommu_type(container, errp); if (iommu_type < 0) { -@@ -1592,6 +1730,13 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, +@@ -438,6 +440,13 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, } container->iommu_type = iommu_type; @@ -207,11 +213,11 @@ index c33c4c539d..206fb83e28 100644 } diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 2853dc861e..1277914ca8 100644 +index b131d04c9c..fd9828d50b 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h -@@ -93,6 +93,7 @@ typedef struct VFIOContainer { - int error; +@@ -97,6 +97,7 @@ typedef struct VFIOContainer { + Error *error; bool initialized; bool dirty_pages_supported; + bool dirty_log_manual_clear; diff --git a/vfio-migration-Don-t-block-migration-device-dirty-tr.patch b/vfio-migration-Don-t-block-migration-device-dirty-tr.patch new file mode 100644 index 0000000000000000000000000000000000000000..7010f82afd0652521c7d16681d1e8887bd999f1f --- /dev/null +++ b/vfio-migration-Don-t-block-migration-device-dirty-tr.patch @@ -0,0 +1,62 @@ +From 6eab0b4a0c79d53250da601da25e2813177d44fe Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Mon, 22 Jul 2024 22:13:25 +0100 +Subject: [PATCH] vfio/migration: Don't block migration device dirty tracking + is unsupported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +By default VFIO migration is set to auto, which will support live +migration if the migration capability is set *and* also dirty page +tracking is supported. + +For testing purposes one can force enable without dirty page tracking +via enable-migration=on, but that option is generally left for testing +purposes. + +So starting with IOMMU dirty tracking it can use to accommodate the lack of +VF dirty page tracking allowing us to minimize the VF requirements for +migration and thus enabling migration by default for those too. + +While at it change the error messages to mention IOMMU dirty tracking as +well. + +Signed-off-by: Joao Martins +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +[ clg: - spelling in commit log ] +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 28d422b39f..db128204af 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -945,16 +945,16 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + return !vfio_block_migration(vbasedev, err, errp); + } + +- if (!vbasedev->dirty_pages_supported) { ++ if (!vbasedev->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) { + if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { + error_setg(&err, +- "%s: VFIO device doesn't support device dirty tracking", +- vbasedev->name); ++ "%s: VFIO device doesn't support device and " ++ "IOMMU dirty tracking", vbasedev->name); + goto add_blocker; + } + +- warn_report("%s: VFIO device doesn't support device dirty tracking", +- vbasedev->name); ++ warn_report("%s: VFIO device doesn't support device and " ++ "IOMMU dirty tracking", vbasedev->name); + } + + ret = vfio_block_multiple_devices_migration(vbasedev, errp); +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch b/vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch new file mode 100644 index 0000000000000000000000000000000000000000..c81907b10fa105e153040c1e6aaf08ae540479e9 --- /dev/null +++ b/vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch @@ -0,0 +1,72 @@ +From 6576af91f2621c24de4a8bbfa2c6681a16a5d043 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sat, 11 Jan 2025 10:52:46 +0800 +Subject: [PATCH] vfio/pci: Allow the selection of a given iommu backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-pci device: + +If the user wants to use the legacy backend, it shall not +link the vfio-pci device with any iommufd object: + + -device vfio-pci,host=0000:02:00.0 + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-pci device options: + + -object iommufd,id=iommufd0 + -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/pci.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d00c3472c7..c5984b0598 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + +@@ -42,6 +43,7 @@ + #include "qapi/error.h" + #include "migration/blocker.h" + #include "migration/qemu-file.h" ++#include "sysemu/iommufd.h" + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + +@@ -3386,6 +3388,10 @@ static Property vfio_pci_dev_properties[] = { + * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), + * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), + */ ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch b/vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch new file mode 100644 index 0000000000000000000000000000000000000000..6fb6caafbd47feb499b800928e889985a942f545 --- /dev/null +++ b/vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch @@ -0,0 +1,105 @@ +From 9558ea5d0bded6c9189adf2ce317cca205604c15 Mon Sep 17 00:00:00 2001 +From: Binfeng Wu +Date: Tue, 8 Feb 2022 17:00:39 +0800 +Subject: [PATCH] vfio/pci: Ascend310 need 4Bytes quirk in bar4 + +--- + hw/vfio/pci-quirks.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 75 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 84b1a7b948..8fb190ce3c 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1209,6 +1209,80 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + return 0; + } + ++#define PCI_VENDOR_ID_HUAWEI 0x19e5 ++#define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define ASCEND310_XLOADER_SIZE 4 ++#define ASCEND310_XLOADER_OFFSET 0x400 ++ ++typedef struct VFIOAscendBarQuirk { ++ struct VFIOPCIDevice *vdev; ++ pcibus_t offset; ++ uint8_t bar; ++ MemoryRegion *mem; ++} VFIOAscendBarQuirk; ++ ++static uint64_t vfio_ascend_quirk_read(void *opaque, ++ hwaddr addr, unsigned size) ++{ ++ VFIOAscendBarQuirk *quirk = opaque; ++ VFIOPCIDevice *vdev = quirk->vdev; ++ ++ qemu_log("read RO region! addr=0x%" HWADDR_PRIx ", size=%d\n", ++ addr + quirk->offset, size); ++ ++ return vfio_region_read(&vdev->bars[quirk->bar].region, ++ addr + quirk->offset, size); ++} ++ ++static void vfio_ascend_quirk_write(void *opaque, hwaddr addr, ++ uint64_t data, unsigned size) ++{ ++ VFIOAscendBarQuirk *quirk = opaque; ++ ++ qemu_log("modifying RO region is not allowed! addr=0x%" ++ HWADDR_PRIx ", data=0x%" PRIx64 ", size=%d\n", ++ addr + quirk->offset, data, size); ++} ++ ++static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = { ++ .read = vfio_ascend_quirk_read, ++ .write = vfio_ascend_quirk_write, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++}; ++ ++static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr) ++{ ++ VFIOQuirk *quirk; ++ VFIOAscendBarQuirk *bar4_quirk; ++ ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 4 || ++ vdev->device_id != PCI_DEVICE_ID_ASCEND310) { ++ return; ++ } ++ ++ quirk = g_malloc0(sizeof(*quirk)); ++ quirk->nr_mem = 1; ++ quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); ++ bar4_quirk = quirk->data = g_new0(typeof(*bar4_quirk), quirk->nr_mem); ++ bar4_quirk[0].vdev = vdev; ++ bar4_quirk[0].offset = ASCEND310_XLOADER_OFFSET; ++ bar4_quirk[0].bar = nr; ++ ++ /* ++ * intercept w/r to the xloader-updating register, ++ * so the vm can't enable xloader-updating ++ */ ++ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar4_quirk[0], ++ "vfio-ascend310-bar4-intercept-regs-quirk", ++ ASCEND310_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar4_quirk[0].offset, ++ &quirk->mem[0], 1); ++ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); ++} ++ + /* + * Common quirk probe entry points. + */ +@@ -1261,6 +1335,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + #ifdef CONFIG_VFIO_IGD + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif ++ vfio_probe_ascend310_bar4_quirk(vdev, nr); + } + + void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) +-- +2.27.0 + diff --git a/vfio-pci-Ascend710-change-to-bar2-quirk.patch b/vfio-pci-Ascend710-change-to-bar2-quirk.patch new file mode 100644 index 0000000000000000000000000000000000000000..954ced1b7f10a60b09f5b811cfefbc4d2af76485 --- /dev/null +++ b/vfio-pci-Ascend710-change-to-bar2-quirk.patch @@ -0,0 +1,125 @@ +From 782040a627d0c3a44a9259a9055610e25c1f44fe Mon Sep 17 00:00:00 2001 +From: Wu Binfeng +Date: Mon, 25 Apr 2022 15:17:48 +0800 +Subject: [PATCH] vfio/pci: Ascend710 change to bar2 quirk + +Change Ascend710's quirk regions to bar2 for internal causes. +And support Ascend710 2P format now. +--- + hw/vfio/pci-quirks.c | 64 +++++++++++++++++++++++++++++++++++--------- + 1 file changed, 51 insertions(+), 13 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index ba4d8f020c..a71ebe26b4 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1213,10 +1213,17 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + #define PCI_DEVICE_ID_ASCEND910 0xd801 + #define PCI_DEVICE_ID_ASCEND710 0xd500 + #define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define PCI_SUB_DEVICE_ID_ASCEND710_1P_MIN 0x100 ++#define PCI_SUB_DEVICE_ID_ASCEND710_1P_MAX 0x10f ++#define PCI_SUB_DEVICE_ID_ASCEND710_2P_MIN 0x110 ++#define PCI_SUB_DEVICE_ID_ASCEND710_2P_MAX 0x11f + #define ASCEND910_XLOADER_SIZE 4 + #define ASCEND910_XLOADER_OFFSET 0x80400 ++#define ASCEND710_2P_BASE (128 * 1024 * 1024) ++#define ASCEND710_1P_DEVNUM 1 ++#define ASCEND710_2P_DEVNUM 2 + #define ASCEND710_XLOADER_SIZE 4 +-#define ASCEND710_XLOADER_OFFSET 0x20430 ++#define ASCEND710_XLOADER_OFFSET 0x100430 + #define ASCEND310_XLOADER_SIZE 4 + #define ASCEND310_XLOADER_OFFSET 0x400 + +@@ -1289,23 +1296,38 @@ static void vfio_probe_ascend910_bar0_quirk(VFIOPCIDevice *vdev, int nr) + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + } + +-static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) ++static void vfio_probe_ascend710_bar2_quirk(VFIOPCIDevice *vdev, int nr) + { + VFIOQuirk *quirk; +- VFIOAscendBarQuirk *bar0_quirk; ++ VFIOAscendBarQuirk *bar2_quirk; ++ int sub_device_id; ++ int devnum = 0; + +- if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 || ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 2 || + vdev->device_id != PCI_DEVICE_ID_ASCEND710) { + return; + } + ++ sub_device_id = pci_get_word(vdev->pdev.config + PCI_SUBSYSTEM_ID); ++ if (sub_device_id >= PCI_SUB_DEVICE_ID_ASCEND710_1P_MIN && ++ sub_device_id <= PCI_SUB_DEVICE_ID_ASCEND710_1P_MAX) { ++ devnum = ASCEND710_1P_DEVNUM; ++ } else if (sub_device_id >= PCI_SUB_DEVICE_ID_ASCEND710_2P_MIN && ++ sub_device_id <= PCI_SUB_DEVICE_ID_ASCEND710_2P_MAX) { ++ devnum = ASCEND710_2P_DEVNUM; ++ } ++ ++ if (devnum != ASCEND710_1P_DEVNUM && devnum != ASCEND710_2P_DEVNUM) { ++ return; ++ } ++ + quirk = g_malloc0(sizeof(*quirk)); +- quirk->nr_mem = 1; ++ quirk->nr_mem = devnum; + quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); +- bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem); +- bar0_quirk[0].vdev = vdev; +- bar0_quirk[0].offset = ASCEND710_XLOADER_OFFSET; +- bar0_quirk[0].bar = nr; ++ bar2_quirk = quirk->data = g_new0(typeof(*bar2_quirk), quirk->nr_mem); ++ bar2_quirk[0].vdev = vdev; ++ bar2_quirk[0].offset = ASCEND710_XLOADER_OFFSET; ++ bar2_quirk[0].bar = nr; + + /* + * intercept w/r to the xloader-updating register, +@@ -1313,12 +1335,28 @@ static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) + */ + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), + &vfio_ascend_intercept_regs_quirk, +- &bar0_quirk[0], +- "vfio-ascend710-bar0-intercept-regs-quirk", ++ &bar2_quirk[0], ++ "vfio-ascend710-bar2-1p-intercept-regs-quirk", + ASCEND710_XLOADER_SIZE); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, +- bar0_quirk[0].offset, ++ bar2_quirk[0].offset, + &quirk->mem[0], 1); ++ ++ if (devnum == ASCEND710_2P_DEVNUM) { ++ bar2_quirk[1].vdev = vdev; ++ bar2_quirk[1].offset = (ASCEND710_2P_BASE + ASCEND710_XLOADER_OFFSET); ++ bar2_quirk[1].bar = nr; ++ ++ memory_region_init_io(&quirk->mem[1], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar2_quirk[1], ++ "vfio-ascend710-bar2-2p-intercept-regs-quirk", ++ ASCEND710_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar2_quirk[1].offset, ++ &quirk->mem[1], 1); ++ } ++ + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + } + +@@ -1408,7 +1446,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif + vfio_probe_ascend910_bar0_quirk(vdev, nr); +- vfio_probe_ascend710_bar0_quirk(vdev, nr); ++ vfio_probe_ascend710_bar2_quirk(vdev, nr); + vfio_probe_ascend310_bar4_quirk(vdev, nr); + } + +-- +2.27.0 + diff --git a/vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch b/vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch new file mode 100644 index 0000000000000000000000000000000000000000..771650754bfd6754a0e0d780d760507dcbb170a5 --- /dev/null +++ b/vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch @@ -0,0 +1,75 @@ +From f999392631e7f9fb15493f17b535a8a42ac88be2 Mon Sep 17 00:00:00 2001 +From: Binfeng Wu +Date: Tue, 8 Feb 2022 17:16:04 +0800 +Subject: [PATCH] vfio/pci: Ascend710 need 4Bytes quirk in bar0 + +--- + hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 8fb190ce3c..9ef4b63e82 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1210,7 +1210,10 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + } + + #define PCI_VENDOR_ID_HUAWEI 0x19e5 ++#define PCI_DEVICE_ID_ASCEND710 0xd500 + #define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define ASCEND710_XLOADER_SIZE 4 ++#define ASCEND710_XLOADER_OFFSET 0x20430 + #define ASCEND310_XLOADER_SIZE 4 + #define ASCEND310_XLOADER_OFFSET 0x400 + +@@ -1250,6 +1253,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) ++{ ++ VFIOQuirk *quirk; ++ VFIOAscendBarQuirk *bar0_quirk; ++ ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 || ++ vdev->device_id != PCI_DEVICE_ID_ASCEND710) { ++ return; ++ } ++ ++ quirk = g_malloc0(sizeof(*quirk)); ++ quirk->nr_mem = 1; ++ quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); ++ bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem); ++ bar0_quirk[0].vdev = vdev; ++ bar0_quirk[0].offset = ASCEND710_XLOADER_OFFSET; ++ bar0_quirk[0].bar = nr; ++ ++ /* ++ * intercept w/r to the xloader-updating register, ++ * so the vm can't enable xloader-updating ++ */ ++ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar0_quirk[0], ++ "vfio-ascend710-bar0-intercept-regs-quirk", ++ ASCEND710_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar0_quirk[0].offset, ++ &quirk->mem[0], 1); ++ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); ++} ++ + static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr) + { + VFIOQuirk *quirk; +@@ -1335,6 +1371,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + #ifdef CONFIG_VFIO_IGD + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif ++ vfio_probe_ascend710_bar0_quirk(vdev, nr); + vfio_probe_ascend310_bar4_quirk(vdev, nr); + } + +-- +2.27.0 + diff --git a/vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch b/vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch new file mode 100644 index 0000000000000000000000000000000000000000..e273e233d58c24617643a0564c4a27d466bd1297 --- /dev/null +++ b/vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch @@ -0,0 +1,76 @@ +From 5b068100780cf91cc1696589d2115ba3078f9d38 Mon Sep 17 00:00:00 2001 +From: Binfeng Wu +Date: Tue, 8 Feb 2022 19:20:36 +0800 +Subject: [PATCH] vfio/pci: Ascend910 need 4Bytes quirk in bar0 + +--- + hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 9ef4b63e82..ba4d8f020c 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1210,8 +1210,11 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + } + + #define PCI_VENDOR_ID_HUAWEI 0x19e5 ++#define PCI_DEVICE_ID_ASCEND910 0xd801 + #define PCI_DEVICE_ID_ASCEND710 0xd500 + #define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define ASCEND910_XLOADER_SIZE 4 ++#define ASCEND910_XLOADER_OFFSET 0x80400 + #define ASCEND710_XLOADER_SIZE 4 + #define ASCEND710_XLOADER_OFFSET 0x20430 + #define ASCEND310_XLOADER_SIZE 4 +@@ -1253,6 +1256,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static void vfio_probe_ascend910_bar0_quirk(VFIOPCIDevice *vdev, int nr) ++{ ++ VFIOQuirk *quirk; ++ VFIOAscendBarQuirk *bar0_quirk; ++ ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 || ++ vdev->device_id != PCI_DEVICE_ID_ASCEND910) { ++ return; ++ } ++ ++ quirk = g_malloc0(sizeof(*quirk)); ++ quirk->nr_mem = 1; ++ quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); ++ bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem); ++ bar0_quirk[0].vdev = vdev; ++ bar0_quirk[0].offset = ASCEND910_XLOADER_OFFSET; ++ bar0_quirk[0].bar = nr; ++ ++ /* ++ * intercept w/r to the xloader-updating register, ++ * so the vm can't enable xloader-updating ++ */ ++ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar0_quirk[0], ++ "vfio-ascend910-bar0-intercept-regs-quirk", ++ ASCEND910_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar0_quirk[0].offset, ++ &quirk->mem[0], 1); ++ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); ++} ++ + static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) + { + VFIOQuirk *quirk; +@@ -1371,6 +1407,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + #ifdef CONFIG_VFIO_IGD + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif ++ vfio_probe_ascend910_bar0_quirk(vdev, nr); + vfio_probe_ascend710_bar0_quirk(vdev, nr); + vfio_probe_ascend310_bar4_quirk(vdev, nr); + } +-- +2.27.0 + diff --git a/vfio-pci-Extract-mdev-check-into-an-helper.patch b/vfio-pci-Extract-mdev-check-into-an-helper.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c3c1fa684947ff5b77e235b33e7067c0a457dee --- /dev/null +++ b/vfio-pci-Extract-mdev-check-into-an-helper.patch @@ -0,0 +1,103 @@ +From 92da638c3a97679ab4d9f497ae5c7bf652e7bf99 Mon Sep 17 00:00:00 2001 +From: Joao Martins +Date: Fri, 19 Jul 2024 13:04:49 +0100 +Subject: [PATCH] vfio/pci: Extract mdev check into an helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In preparation to skip initialization of the HostIOMMUDevice for mdev, +extract the checks that validate if a device is an mdev into helpers. + +A vfio_device_is_mdev() is created, and subsystems consult VFIODevice::mdev +to check if it's mdev or not. + +Signed-off-by: Joao Martins +Reviewed-by: Cédric Le Goater +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +--- + hw/vfio/helpers.c | 14 ++++++++++++++ + hw/vfio/pci.c | 12 +++--------- + include/hw/vfio/vfio-common.h | 2 ++ + 3 files changed, 19 insertions(+), 9 deletions(-) + +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 35b8e42304..37bc383c69 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -680,3 +680,17 @@ int vfio_device_get_aw_bits(VFIODevice *vdev) + + return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX; + } ++ ++bool vfio_device_is_mdev(VFIODevice *vbasedev) ++{ ++ g_autofree char *subsys = NULL; ++ g_autofree char *tmp = NULL; ++ ++ if (!vbasedev->sysfsdev) { ++ return false; ++ } ++ ++ tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev); ++ subsys = realpath(tmp, NULL); ++ return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0); ++} +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 675a608b9c..de040e73ca 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2942,10 +2942,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + { + VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; +- char *tmp, *subsys; + Error *err = NULL; + int i, ret; +- bool is_mdev; + char uuid[UUID_STR_LEN]; + char *name; + +@@ -2976,15 +2974,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + * stays in sync with the active working set of the guest driver. Prevent + * the x-balloon-allowed option unless this is minimally an mdev device. + */ +- tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev); +- subsys = realpath(tmp, NULL); +- g_free(tmp); +- is_mdev = subsys && (strcmp(subsys, "/sys/bus/mdev") == 0); +- free(subsys); ++ vbasedev->mdev = vfio_device_is_mdev(vbasedev); + +- trace_vfio_mdev(vbasedev->name, is_mdev); ++ trace_vfio_mdev(vbasedev->name, vbasedev->mdev); + +- if (vbasedev->ram_block_discard_allowed && !is_mdev) { ++ if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) { + error_setg(errp, "x-balloon-allowed only potentially compatible " + "with mdev devices"); + goto error; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index d45d40c329..e49e5fabba 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -126,6 +126,7 @@ typedef struct VFIODevice { + DeviceState *dev; + int fd; + int type; ++ bool mdev; + bool reset_works; + bool needs_reset; + bool no_mmap; +@@ -219,6 +220,7 @@ void vfio_region_exit(VFIORegion *region); + void vfio_region_finalize(VFIORegion *region); + void vfio_reset_handler(void *opaque); + struct vfio_device_info *vfio_get_device_info(int fd); ++bool vfio_device_is_mdev(VFIODevice *vbasedev); + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); + void vfio_detach_device(VFIODevice *vbasedev); +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch b/vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch new file mode 100644 index 0000000000000000000000000000000000000000..0cf494377109a14b21c9aa60165208062d4d6768 --- /dev/null +++ b/vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch @@ -0,0 +1,131 @@ +From 0b0701478649baccf3945051822f993619bce01e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:43 +0800 +Subject: [PATCH] vfio/pci: Extract out a helper + vfio_pci_get_pci_hot_reset_info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This helper will be used by both legacy and iommufd backends. + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/pci.c | 54 +++++++++++++++++++++++++++++++++++---------------- + hw/vfio/pci.h | 3 +++ + 2 files changed, 40 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index c62c02f7b6..eb55e8ae88 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2445,22 +2445,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) + return (strcmp(tmp, name) == 0); + } + +-static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) ++int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, ++ struct vfio_pci_hot_reset_info **info_p) + { +- VFIOGroup *group; + struct vfio_pci_hot_reset_info *info; +- struct vfio_pci_dependent_device *devices; +- struct vfio_pci_hot_reset *reset; +- int32_t *fds; +- int ret, i, count; +- bool multi = false; ++ int ret, count; + +- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); +- +- if (!single) { +- vfio_pci_pre_reset(vdev); +- } +- vdev->vbasedev.needs_reset = false; ++ assert(info_p && !*info_p); + + info = g_malloc0(sizeof(*info)); + info->argsz = sizeof(*info); +@@ -2468,24 +2459,53 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + if (ret && errno != ENOSPC) { + ret = -errno; ++ g_free(info); + if (!vdev->has_pm_reset) { + error_report("vfio: Cannot reset device %s, " + "no available reset mechanism.", vdev->vbasedev.name); + } +- goto out_single; ++ return ret; + } + + count = info->count; +- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices))); +- info->argsz = sizeof(*info) + (count * sizeof(*devices)); +- devices = &info->devices[0]; ++ info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0]))); ++ info->argsz = sizeof(*info) + (count * sizeof(info->devices[0])); + + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + if (ret) { + ret = -errno; ++ g_free(info); + error_report("vfio: hot reset info failed: %m"); ++ return ret; ++ } ++ ++ *info_p = info; ++ return 0; ++} ++ ++static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) ++{ ++ VFIOGroup *group; ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int32_t *fds; ++ int ret, i, count; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { + goto out_single; + } ++ devices = &info->devices[0]; + + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); + +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index fba8737ab2..1006061afb 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -218,6 +218,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); + + extern const PropertyInfo qdev_prop_nv_gpudirect_clique; + ++int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, ++ struct vfio_pci_hot_reset_info **info_p); ++ + int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); + + int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Implement-return_page_response-page-respons.patch b/vfio-pci-Implement-return_page_response-page-respons.patch deleted file mode 100644 index 721512e4095c6385efe44279e7e44744ea781899..0000000000000000000000000000000000000000 --- a/vfio-pci-Implement-return_page_response-page-respons.patch +++ /dev/null @@ -1,199 +0,0 @@ -From dab7c3ad6d51e9f0c65d864d6128f62697db4604 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 Nov 2020 12:03:29 -0500 -Subject: [PATCH] vfio/pci: Implement return_page_response page response - callback - -This patch implements the page response path. The -response is written into the page response ring buffer and then -update header's head index is updated. This path is not used -by this series. It is introduced here as a POC for vSVA/ARM -integration. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/pci.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/pci.h | 2 + - 2 files changed, 125 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d1198c8a23..6f4083aec8 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2662,6 +2662,61 @@ out: - g_free(fault_region_info); - } - -+static void vfio_init_fault_response_regions(VFIOPCIDevice *vdev, Error **errp) -+{ -+ struct vfio_region_info *fault_region_info = NULL; -+ struct vfio_region_info_cap_fault *cap_fault; -+ VFIODevice *vbasedev = &vdev->vbasedev; -+ struct vfio_info_cap_header *hdr; -+ char *fault_region_name; -+ int ret; -+ -+ ret = vfio_get_dev_region_info(&vdev->vbasedev, -+ VFIO_REGION_TYPE_NESTED, -+ VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE, -+ &fault_region_info); -+ if (ret) { -+ goto out; -+ } -+ -+ hdr = vfio_get_region_info_cap(fault_region_info, -+ VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE); -+ if (!hdr) { -+ error_setg(errp, "failed to retrieve DMA FAULT RESPONSE capability"); -+ goto out; -+ } -+ cap_fault = container_of(hdr, struct vfio_region_info_cap_fault, -+ header); -+ if (cap_fault->version != 1) { -+ error_setg(errp, "Unsupported DMA FAULT RESPONSE API version %d", -+ cap_fault->version); -+ goto out; -+ } -+ -+ fault_region_name = g_strdup_printf("%s DMA FAULT RESPONSE %d", -+ vbasedev->name, -+ fault_region_info->index); -+ -+ ret = vfio_region_setup(OBJECT(vdev), vbasedev, -+ &vdev->dma_fault_response_region, -+ fault_region_info->index, -+ fault_region_name); -+ g_free(fault_region_name); -+ if (ret) { -+ error_setg_errno(errp, -ret, -+ "failed to set up the DMA FAULT RESPONSE region %d", -+ fault_region_info->index); -+ goto out; -+ } -+ -+ ret = vfio_region_mmap(&vdev->dma_fault_response_region); -+ if (ret) { -+ error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT RESPONSE queue"); -+ } -+out: -+ g_free(fault_region_info); -+} -+ - static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) - { - VFIODevice *vbasedev = &vdev->vbasedev; -@@ -2737,6 +2792,12 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) - return; - } - -+ vfio_init_fault_response_regions(vdev, &err); -+ if (err) { -+ error_propagate(errp, err); -+ return; -+ } -+ - irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); -@@ -2915,8 +2976,68 @@ static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn, - return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info); - } - -+static int vfio_iommu_return_page_response(PCIBus *bus, int32_t devfn, -+ IOMMUPageResponse *resp) -+{ -+ PCIDevice *pdev = bus->devices[devfn]; -+ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); -+ struct iommu_page_response *response = &resp->resp; -+ struct vfio_region_dma_fault_response header; -+ struct iommu_page_response *queue; -+ char *queue_buffer = NULL; -+ ssize_t bytes; -+ -+ if (!vdev->dma_fault_response_region.mem) { -+ return -EINVAL; -+ } -+ -+ /* read the header */ -+ bytes = pread(vdev->vbasedev.fd, &header, sizeof(header), -+ vdev->dma_fault_response_region.fd_offset); -+ if (bytes != sizeof(header)) { -+ error_report("%s unable to read the fault region header (0x%lx)", -+ __func__, bytes); -+ return -1; -+ } -+ -+ /* Normally the fault queue is mmapped */ -+ queue = (struct iommu_page_response *)vdev->dma_fault_response_region.mmaps[0].mmap; -+ if (!queue) { -+ size_t queue_size = header.nb_entries * header.entry_size; -+ -+ error_report("%s: fault queue not mmapped: slower fault handling", -+ vdev->vbasedev.name); -+ -+ queue_buffer = g_malloc(queue_size); -+ bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size, -+ vdev->dma_fault_response_region.fd_offset + header.offset); -+ if (bytes != queue_size) { -+ error_report("%s unable to read the fault queue (0x%lx)", -+ __func__, bytes); -+ return -1; -+ } -+ -+ queue = (struct iommu_page_response *)queue_buffer; -+ } -+ /* deposit the new response in the queue and increment the head */ -+ memcpy(queue + header.head, response, header.entry_size); -+ -+ vdev->fault_response_head_index = -+ (vdev->fault_response_head_index + 1) % header.nb_entries; -+ bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_response_head_index, 4, -+ vdev->dma_fault_response_region.fd_offset); -+ if (bytes != 4) { -+ error_report("%s unable to write the fault response region head index (0x%lx)", -+ __func__, bytes); -+ } -+ g_free(queue_buffer); -+ -+ return 0; -+} -+ - static PCIPASIDOps vfio_pci_pasid_ops = { - .set_pasid_table = vfio_iommu_set_pasid_table, -+ .return_page_response = vfio_iommu_return_page_response, - }; - - static void vfio_dma_fault_notifier_handler(void *opaque) -@@ -3373,6 +3494,7 @@ static void vfio_instance_finalize(Object *obj) - vfio_display_finalize(vdev); - vfio_bars_finalize(vdev); - vfio_region_finalize(&vdev->dma_fault_region); -+ vfio_region_finalize(&vdev->dma_fault_response_region); - g_free(vdev->emulated_config_bits); - g_free(vdev->rom); - /* -@@ -3394,6 +3516,7 @@ static void vfio_exitfn(PCIDevice *pdev) - vfio_unregister_err_notifier(vdev); - vfio_unregister_ext_irq_notifiers(vdev); - vfio_region_exit(&vdev->dma_fault_region); -+ vfio_region_exit(&vdev->dma_fault_response_region); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); - vfio_disable_interrupts(vdev); - if (vdev->intx.mmap_timer) { -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index e31bc0173a..7fdcfa0dc8 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -143,6 +143,8 @@ typedef struct VFIOPCIDevice { - VFIOPCIExtIRQ *ext_irqs; - VFIORegion dma_fault_region; - uint32_t fault_tail_index; -+ VFIORegion dma_fault_response_region; -+ uint32_t fault_response_head_index; - int (*resetfn)(struct VFIOPCIDevice *); - uint32_t vendor_id; - uint32_t device_id; --- -2.27.0 - diff --git a/vfio-pci-Implement-the-DMA-fault-handler.patch b/vfio-pci-Implement-the-DMA-fault-handler.patch deleted file mode 100644 index ca61b01c4469cd30c3b4b781c2cc527b48c45e80..0000000000000000000000000000000000000000 --- a/vfio-pci-Implement-the-DMA-fault-handler.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 139d0b3474c29427fea4a0ed47f51c01a76a8636 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 5 Mar 2019 16:35:32 +0100 -Subject: [PATCH] vfio/pci: Implement the DMA fault handler - -Whenever the eventfd is triggered, we retrieve the DMA fault(s) -from the mmapped fault region and inject them in the iommu -memory region. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/pci.h | 1 + - 2 files changed, 51 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 0db7d68258..d1198c8a23 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2922,10 +2922,60 @@ static PCIPASIDOps vfio_pci_pasid_ops = { - static void vfio_dma_fault_notifier_handler(void *opaque) - { - VFIOPCIExtIRQ *ext_irq = opaque; -+ VFIOPCIDevice *vdev = ext_irq->vdev; -+ PCIDevice *pdev = &vdev->pdev; -+ AddressSpace *as = pci_device_iommu_address_space(pdev); -+ IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(as->root); -+ struct vfio_region_dma_fault header; -+ struct iommu_fault *queue; -+ char *queue_buffer = NULL; -+ ssize_t bytes; - - if (!event_notifier_test_and_clear(&ext_irq->notifier)) { - return; - } -+ -+ bytes = pread(vdev->vbasedev.fd, &header, sizeof(header), -+ vdev->dma_fault_region.fd_offset); -+ if (bytes != sizeof(header)) { -+ error_report("%s unable to read the fault region header (0x%lx)", -+ __func__, bytes); -+ return; -+ } -+ -+ /* Normally the fault queue is mmapped */ -+ queue = (struct iommu_fault *)vdev->dma_fault_region.mmaps[0].mmap; -+ if (!queue) { -+ size_t queue_size = header.nb_entries * header.entry_size; -+ -+ error_report("%s: fault queue not mmapped: slower fault handling", -+ vdev->vbasedev.name); -+ -+ queue_buffer = g_malloc(queue_size); -+ bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size, -+ vdev->dma_fault_region.fd_offset + header.offset); -+ if (bytes != queue_size) { -+ error_report("%s unable to read the fault queue (0x%lx)", -+ __func__, bytes); -+ return; -+ } -+ -+ queue = (struct iommu_fault *)queue_buffer; -+ } -+ -+ while (vdev->fault_tail_index != header.head) { -+ memory_region_inject_faults(iommu_mr, 1, -+ &queue[vdev->fault_tail_index]); -+ vdev->fault_tail_index = -+ (vdev->fault_tail_index + 1) % header.nb_entries; -+ } -+ bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_tail_index, 4, -+ vdev->dma_fault_region.fd_offset); -+ if (bytes != 4) { -+ error_report("%s unable to write the fault region tail index (0x%lx)", -+ __func__, bytes); -+ } -+ g_free(queue_buffer); - } - - static int vfio_register_ext_irq_handler(VFIOPCIDevice *vdev, -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 815154656c..e31bc0173a 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -142,6 +142,7 @@ typedef struct VFIOPCIDevice { - EventNotifier req_notifier; - VFIOPCIExtIRQ *ext_irqs; - VFIORegion dma_fault_region; -+ uint32_t fault_tail_index; - int (*resetfn)(struct VFIOPCIDevice *); - uint32_t vendor_id; - uint32_t device_id; --- -2.27.0 - diff --git a/vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch b/vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch new file mode 100644 index 0000000000000000000000000000000000000000..846896b5dedaed344880e8a1058d9a2a4e8e64ae --- /dev/null +++ b/vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch @@ -0,0 +1,458 @@ +From 32beb7b360416a5f04cebac227ffdf102448d518 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:44 +0800 +Subject: [PATCH] vfio/pci: Introduce a vfio pci hot reset interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Legacy vfio pci and iommufd cdev have different process to hot reset +vfio device, expand current code to abstract out pci_hot_reset callback +for legacy vfio, this same interface will also be used by iommufd +cdev vfio device. + +Rename vfio_pci_hot_reset to vfio_legacy_pci_hot_reset and move it +into container.c. + +vfio_pci_[pre/post]_reset and vfio_pci_host_match are exported so +they could be called in legacy and iommufd pci_hot_reset callback. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/container.c | 170 ++++++++++++++++++++++++++ + hw/vfio/pci.c | 168 +------------------------ + hw/vfio/pci.h | 3 + + include/hw/vfio/vfio-container-base.h | 3 + + 4 files changed, 182 insertions(+), 162 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 4936b8f27f..e32e1b51e0 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -35,6 +35,7 @@ + #include "qapi/error.h" + #include "migration/migration.h" + #include "sysemu/kvm.h" ++#include "pci.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -1035,6 +1036,174 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) + vfio_put_group(group); + } + ++static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ VFIOGroup *group; ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int32_t *fds; ++ int ret, i, count; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { ++ goto out_single; ++ } ++ devices = &info->devices[0]; ++ ++ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); ++ ++ /* Verify that we have all the groups required */ ++ for (i = 0; i < info->count; i++) { ++ PCIHostDeviceAddress host; ++ VFIOPCIDevice *tmp; ++ VFIODevice *vbasedev_iter; ++ ++ host.domain = devices[i].segment; ++ host.bus = devices[i].bus; ++ host.slot = PCI_SLOT(devices[i].devfn); ++ host.function = PCI_FUNC(devices[i].devfn); ++ ++ trace_vfio_pci_hot_reset_dep_devices(host.domain, ++ host.bus, host.slot, host.function, devices[i].group_id); ++ ++ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { ++ continue; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ if (group->groupid == devices[i].group_id) { ++ break; ++ } ++ } ++ ++ if (!group) { ++ if (!vdev->has_pm_reset) { ++ error_report("vfio: Cannot reset device %s, " ++ "depends on group %d which is not owned.", ++ vdev->vbasedev.name, devices[i].group_id); ++ } ++ ret = -EPERM; ++ goto out; ++ } ++ ++ /* Prep dependent devices for reset and clear our marker. */ ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ if (!vbasedev_iter->dev->realized || ++ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { ++ continue; ++ } ++ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); ++ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { ++ if (single) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ vfio_pci_pre_reset(tmp); ++ tmp->vbasedev.needs_reset = false; ++ multi = true; ++ break; ++ } ++ } ++ } ++ ++ if (!single && !multi) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ ++ /* Determine how many group fds need to be passed */ ++ count = 0; ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ for (i = 0; i < info->count; i++) { ++ if (group->groupid == devices[i].group_id) { ++ count++; ++ break; ++ } ++ } ++ } ++ ++ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); ++ reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); ++ fds = &reset->group_fds[0]; ++ ++ /* Fill in group fds */ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ for (i = 0; i < info->count; i++) { ++ if (group->groupid == devices[i].group_id) { ++ fds[reset->count++] = group->fd; ++ break; ++ } ++ } ++ } ++ ++ /* Bus reset! */ ++ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); ++ g_free(reset); ++ if (ret) { ++ ret = -errno; ++ } ++ ++ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, ++ ret ? strerror(errno) : "Success"); ++ ++out: ++ /* Re-enable INTx on affected devices */ ++ for (i = 0; i < info->count; i++) { ++ PCIHostDeviceAddress host; ++ VFIOPCIDevice *tmp; ++ VFIODevice *vbasedev_iter; ++ ++ host.domain = devices[i].segment; ++ host.bus = devices[i].bus; ++ host.slot = PCI_SLOT(devices[i].devfn); ++ host.function = PCI_FUNC(devices[i].devfn); ++ ++ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { ++ continue; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ if (group->groupid == devices[i].group_id) { ++ break; ++ } ++ } ++ ++ if (!group) { ++ break; ++ } ++ ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ if (!vbasedev_iter->dev->realized || ++ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { ++ continue; ++ } ++ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); ++ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { ++ vfio_pci_post_reset(tmp); ++ break; ++ } ++ } ++ } ++out_single: ++ if (!single) { ++ vfio_pci_post_reset(vdev); ++ } ++ g_free(info); ++ ++ return ret; ++} ++ + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, +@@ -1042,4 +1211,5 @@ const VFIOIOMMUOps vfio_legacy_ops = { + .detach_device = vfio_legacy_detach_device, + .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, + .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, ++ .pci_hot_reset = vfio_legacy_pci_hot_reset, + }; +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index eb55e8ae88..d00c3472c7 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2374,7 +2374,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) + return 0; + } + +-static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) ++void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + { + PCIDevice *pdev = &vdev->pdev; + uint16_t cmd; +@@ -2411,7 +2411,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); + } + +-static void vfio_pci_post_reset(VFIOPCIDevice *vdev) ++void vfio_pci_post_reset(VFIOPCIDevice *vdev) + { + Error *err = NULL; + int nr; +@@ -2435,7 +2435,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) + vfio_quirk_reset(vdev); + } + +-static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) ++bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) + { + char tmp[13]; + +@@ -2485,166 +2485,10 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + + static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + { +- VFIOGroup *group; +- struct vfio_pci_hot_reset_info *info = NULL; +- struct vfio_pci_dependent_device *devices; +- struct vfio_pci_hot_reset *reset; +- int32_t *fds; +- int ret, i, count; +- bool multi = false; +- +- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); +- +- if (!single) { +- vfio_pci_pre_reset(vdev); +- } +- vdev->vbasedev.needs_reset = false; +- +- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); +- +- if (ret) { +- goto out_single; +- } +- devices = &info->devices[0]; +- +- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); +- +- /* Verify that we have all the groups required */ +- for (i = 0; i < info->count; i++) { +- PCIHostDeviceAddress host; +- VFIOPCIDevice *tmp; +- VFIODevice *vbasedev_iter; +- +- host.domain = devices[i].segment; +- host.bus = devices[i].bus; +- host.slot = PCI_SLOT(devices[i].devfn); +- host.function = PCI_FUNC(devices[i].devfn); +- +- trace_vfio_pci_hot_reset_dep_devices(host.domain, +- host.bus, host.slot, host.function, devices[i].group_id); +- +- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { +- continue; +- } +- +- QLIST_FOREACH(group, &vfio_group_list, next) { +- if (group->groupid == devices[i].group_id) { +- break; +- } +- } +- +- if (!group) { +- if (!vdev->has_pm_reset) { +- error_report("vfio: Cannot reset device %s, " +- "depends on group %d which is not owned.", +- vdev->vbasedev.name, devices[i].group_id); +- } +- ret = -EPERM; +- goto out; +- } +- +- /* Prep dependent devices for reset and clear our marker. */ +- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { +- if (!vbasedev_iter->dev->realized || +- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { +- continue; +- } +- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); +- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { +- if (single) { +- ret = -EINVAL; +- goto out_single; +- } +- vfio_pci_pre_reset(tmp); +- tmp->vbasedev.needs_reset = false; +- multi = true; +- break; +- } +- } +- } +- +- if (!single && !multi) { +- ret = -EINVAL; +- goto out_single; +- } +- +- /* Determine how many group fds need to be passed */ +- count = 0; +- QLIST_FOREACH(group, &vfio_group_list, next) { +- for (i = 0; i < info->count; i++) { +- if (group->groupid == devices[i].group_id) { +- count++; +- break; +- } +- } +- } +- +- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); +- reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); +- fds = &reset->group_fds[0]; +- +- /* Fill in group fds */ +- QLIST_FOREACH(group, &vfio_group_list, next) { +- for (i = 0; i < info->count; i++) { +- if (group->groupid == devices[i].group_id) { +- fds[reset->count++] = group->fd; +- break; +- } +- } +- } +- +- /* Bus reset! */ +- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); +- g_free(reset); +- +- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, +- ret ? strerror(errno) : "Success"); +- +-out: +- /* Re-enable INTx on affected devices */ +- for (i = 0; i < info->count; i++) { +- PCIHostDeviceAddress host; +- VFIOPCIDevice *tmp; +- VFIODevice *vbasedev_iter; +- +- host.domain = devices[i].segment; +- host.bus = devices[i].bus; +- host.slot = PCI_SLOT(devices[i].devfn); +- host.function = PCI_FUNC(devices[i].devfn); +- +- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { +- continue; +- } +- +- QLIST_FOREACH(group, &vfio_group_list, next) { +- if (group->groupid == devices[i].group_id) { +- break; +- } +- } +- +- if (!group) { +- break; +- } +- +- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { +- if (!vbasedev_iter->dev->realized || +- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { +- continue; +- } +- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); +- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { +- vfio_pci_post_reset(tmp); +- break; +- } +- } +- } +-out_single: +- if (!single) { +- vfio_pci_post_reset(vdev); +- } +- g_free(info); ++ VFIODevice *vbasedev = &vdev->vbasedev; ++ const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; + +- return ret; ++ return ops->pci_hot_reset(vbasedev, single); + } + + /* +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 1006061afb..6e64a2654e 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -218,6 +218,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); + + extern const PropertyInfo qdev_prop_nv_gpudirect_clique; + ++void vfio_pci_pre_reset(VFIOPCIDevice *vdev); ++void vfio_pci_post_reset(VFIOPCIDevice *vdev); ++bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name); + int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + struct vfio_pci_hot_reset_info **info_p); + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 4b6f017c6f..45bb19c767 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -106,6 +106,9 @@ struct VFIOIOMMUOps { + int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); + int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); ++ /* PCI specific */ ++ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); ++ + /* SPAPR specific */ + int (*add_window)(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch new file mode 100644 index 0000000000000000000000000000000000000000..09f0408a9d7de8de9f4f423dff66d2c1c7b9f61e --- /dev/null +++ b/vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch @@ -0,0 +1,224 @@ +From 008d4e37fe67c7f81920efe862352c4b1f3cd1b0 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:47 +0800 +Subject: [PATCH] vfio/pci: Make vfio cdev pre-openable by passing a file + handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Together with the earlier support of pre-opening /dev/iommu device, +now we have full support of passing a vfio device to unprivileged +qemu by management tool. This mode is no more considered for the +legacy backend. So let's remove the "TODO" comment. + +Add helper functions vfio_device_set_fd() and vfio_device_get_name() +to set fd and get device name, they will also be used by other vfio +devices. + +There is no easy way to check if a device is mdev with FD passing, +so fail the x-balloon-allowed check unconditionally in this case. + +There is also no easy way to get BDF as name with FD passing, so +we fake a name by VFIO_FD[fd]. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/helpers.c | 43 +++++++++++++++++++++++++++++++++++ + hw/vfio/iommufd.c | 12 ++++++---- + hw/vfio/pci.c | 28 +++++++++++++---------- + include/hw/vfio/vfio-common.h | 4 ++++ + 4 files changed, 71 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 168847e7c5..3592c3d54e 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -27,6 +27,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "qemu/error-report.h" ++#include "monitor/monitor.h" + + /* + * Common VFIO interrupt disable +@@ -609,3 +610,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) + + return ret; + } ++ ++int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) ++{ ++ struct stat st; ++ ++ if (vbasedev->fd < 0) { ++ if (stat(vbasedev->sysfsdev, &st) < 0) { ++ error_setg_errno(errp, errno, "no such host device"); ++ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); ++ return -errno; ++ } ++ /* User may specify a name, e.g: VFIO platform device */ ++ if (!vbasedev->name) { ++ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); ++ } ++ } else { ++ if (!vbasedev->iommufd) { ++ error_setg(errp, "Use FD passing only with iommufd backend"); ++ return -EINVAL; ++ } ++ /* ++ * Give a name with fd so any function printing out vbasedev->name ++ * will not break. ++ */ ++ if (!vbasedev->name) { ++ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); ++ } ++ } ++ ++ return 0; ++} ++ ++void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) ++{ ++ int fd = monitor_fd_param(monitor_cur(), str, errp); ++ ++ if (fd < 0) { ++ error_prepend(errp, "Could not parse remote object fd %s:", str); ++ return; ++ } ++ vbasedev->fd = fd; ++} +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 6e53e013ef..5accd26484 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -320,11 +320,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + uint32_t ioas_id; + Error *err = NULL; + +- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); +- if (devfd < 0) { +- return devfd; ++ if (vbasedev->fd < 0) { ++ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); ++ if (devfd < 0) { ++ return devfd; ++ } ++ vbasedev->fd = devfd; ++ } else { ++ devfd = vbasedev->fd; + } +- vbasedev->fd = devfd; + + ret = iommufd_cdev_connect_and_bind(vbasedev, errp); + if (ret) { +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index c5984b0598..445d58c8e5 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + VFIODevice *vbasedev = &vdev->vbasedev; + char *tmp, *subsys; + Error *err = NULL; +- struct stat st; + int i, ret; + bool is_mdev; + char uuid[UUID_STR_LEN]; + char *name; + +- if (!vbasedev->sysfsdev) { ++ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { + error_setg(errp, "No provided host device"); + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " ++#ifdef CONFIG_IOMMUFD ++ "or -device vfio-pci,fd=DEVICE_FD " ++#endif + "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); + return; + } +@@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + vdev->host.slot, vdev->host.function); + } + +- if (stat(vbasedev->sysfsdev, &st) < 0) { +- error_setg_errno(errp, errno, "no such host device"); +- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); ++ if (vfio_device_get_name(vbasedev, errp) < 0) { + return; + } +- +- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); + vbasedev->ops = &vfio_pci_ops; + vbasedev->type = VFIO_DEVICE_TYPE_PCI; + vbasedev->dev = DEVICE(vdev); +@@ -3330,6 +3328,7 @@ static void vfio_instance_init(Object *obj) + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; ++ vdev->vbasedev.fd = -1; + + vdev->nv_gpudirect_clique = 0xFF; + +@@ -3383,11 +3382,6 @@ static Property vfio_pci_dev_properties[] = { + qdev_prop_nv_gpudirect_clique, uint8_t), + DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, + OFF_AUTOPCIBAR_OFF), +- /* +- * TODO - support passed fds... is this necessary? +- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), +- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), +- */ + #ifdef CONFIG_IOMMUFD + DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, + TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), +@@ -3395,6 +3389,13 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++#ifdef CONFIG_IOMMUFD ++static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); ++} ++#endif ++ + static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3402,6 +3403,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + + dc->reset = vfio_pci_reset; + device_class_set_props(dc, vfio_pci_dev_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); ++#endif + dc->desc = "VFIO-based PCI device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->realize = vfio_realize; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 9b9fd7b461..5f35f2900b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -265,4 +265,8 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + hwaddr size); + int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); ++ ++/* Returns 0 on success, or a negative errno. */ ++int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); ++void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch b/vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch new file mode 100644 index 0000000000000000000000000000000000000000..669232d083695bbfc2f59b2be8c7bce3b6e53151 --- /dev/null +++ b/vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch @@ -0,0 +1,62 @@ +From 0781636a0c5652c25f81c06ba5fc289966021a33 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:58 +0800 +Subject: [PATCH] vfio/pci: Move VFIODevice initializations in + vfio_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some of the VFIODevice initializations is in vfio_realize, +move all of them in vfio_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/pci.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 445d58c8e5..87405584d7 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2969,9 +2969,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + if (vfio_device_get_name(vbasedev, errp) < 0) { + return; + } +- vbasedev->ops = &vfio_pci_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_PCI; +- vbasedev->dev = DEVICE(vdev); + + /* + * Mediated devices *might* operate compatibly with discarding of RAM, but +@@ -3320,6 +3317,7 @@ static void vfio_instance_init(Object *obj) + { + PCIDevice *pci_dev = PCI_DEVICE(obj); + VFIOPCIDevice *vdev = VFIO_PCI(obj); ++ VFIODevice *vbasedev = &vdev->vbasedev; + + device_add_bootindex_property(obj, &vdev->bootindex, + "bootindex", NULL, +@@ -3328,7 +3326,11 @@ static void vfio_instance_init(Object *obj) + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; +- vdev->vbasedev.fd = -1; ++ ++ vbasedev->type = VFIO_DEVICE_TYPE_PCI; ++ vbasedev->ops = &vfio_pci_ops; ++ vbasedev->dev = DEVICE(vdev); ++ vbasedev->fd = -1; + + vdev->nv_gpudirect_clique = 0xFF; + +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Pass-HostIOMMUDevice-to-vIOMMU.patch b/vfio-pci-Pass-HostIOMMUDevice-to-vIOMMU.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c91266eb915e5b7a5ef5333855844ef0aff8e18 --- /dev/null +++ b/vfio-pci-Pass-HostIOMMUDevice-to-vIOMMU.patch @@ -0,0 +1,89 @@ +From dbbf6b33d9ce5f2785972f81919be143e81f866b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 5 Jun 2024 16:30:40 +0800 +Subject: [PATCH] vfio/pci: Pass HostIOMMUDevice to vIOMMU + +With HostIOMMUDevice passed, vIOMMU can check compatibility with host +IOMMU, call into IOMMUFD specific methods, etc. + +Originally-by: Yi Liu +Signed-off-by: Nicolin Chen +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +--- + hw/vfio/pci.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d84a9e73a6..675a608b9c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3107,6 +3107,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + + vfio_bars_register(vdev); + ++ if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { ++ error_prepend(errp, "Failed to set iommu_device: "); ++ goto out_teardown; ++ } ++ + ret = vfio_add_capabilities(vdev, errp); + if (ret) { + goto out_teardown; +@@ -3128,7 +3133,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + error_setg(errp, + "cannot support IGD OpRegion feature on hotplugged " + "device"); +- goto out_teardown; ++ goto out_unset_idev; + } + + ret = vfio_get_dev_region_info(vbasedev, +@@ -3137,13 +3142,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + if (ret) { + error_setg_errno(errp, -ret, + "does not support requested IGD OpRegion feature"); +- goto out_teardown; ++ goto out_unset_idev; + } + + ret = vfio_pci_igd_opregion_init(vdev, opregion, errp); + g_free(opregion); + if (ret) { +- goto out_teardown; ++ goto out_unset_idev; + } + } + +@@ -3229,6 +3234,8 @@ out_deregister: + if (vdev->intx.mmap_timer) { + timer_free(vdev->intx.mmap_timer); + } ++out_unset_idev: ++ pci_device_unset_iommu_device(pdev); + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +@@ -3257,6 +3264,7 @@ static void vfio_instance_finalize(Object *obj) + static void vfio_exitfn(PCIDevice *pdev) + { + VFIOPCIDevice *vdev = VFIO_PCI(pdev); ++ VFIODevice *vbasedev = &vdev->vbasedev; + + vfio_unregister_req_notifier(vdev); + vfio_unregister_err_notifier(vdev); +@@ -3271,7 +3279,8 @@ static void vfio_exitfn(PCIDevice *pdev) + vfio_teardown_msi(vdev); + vfio_pci_disable_rp_atomics(vdev); + vfio_bars_exit(vdev); +- vfio_migration_exit(&vdev->vbasedev); ++ vfio_migration_exit(vbasedev); ++ pci_device_unset_iommu_device(pdev); + } + + static void vfio_pci_reset(DeviceState *dev) +-- +2.41.0.windows.1 + diff --git a/vfio-pci-Register-handler-for-iommu-fault.patch b/vfio-pci-Register-handler-for-iommu-fault.patch deleted file mode 100644 index feea0a347baad96a592cefba3dd6957947d1505d..0000000000000000000000000000000000000000 --- a/vfio-pci-Register-handler-for-iommu-fault.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 65b96da46d2c5dfdcf3a4618cf75ca94345164d7 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 13 Dec 2018 04:39:30 -0500 -Subject: [PATCH] vfio/pci: Register handler for iommu fault - -We use the new extended IRQ VFIO_IRQ_TYPE_NESTED type and -VFIO_IRQ_SUBTYPE_DMA_FAULT subtype to set/unset -a notifier for physical DMA faults. The associated eventfd is -triggered, in nested mode, whenever a fault is detected at IOMMU -physical level. - -The actual handler will be implemented in subsequent patches. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/pci.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++- - hw/vfio/pci.h | 7 +++++ - 2 files changed, 87 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index bbcba3fd16..f5c05d508d 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2857,6 +2857,76 @@ static PCIPASIDOps vfio_pci_pasid_ops = { - .set_pasid_table = vfio_iommu_set_pasid_table, - }; - -+static void vfio_dma_fault_notifier_handler(void *opaque) -+{ -+ VFIOPCIExtIRQ *ext_irq = opaque; -+ -+ if (!event_notifier_test_and_clear(&ext_irq->notifier)) { -+ return; -+ } -+} -+ -+static int vfio_register_ext_irq_handler(VFIOPCIDevice *vdev, -+ uint32_t type, uint32_t subtype, -+ IOHandler *handler) -+{ -+ int32_t fd, ext_irq_index, index; -+ struct vfio_irq_info *irq_info; -+ Error *err = NULL; -+ EventNotifier *n; -+ int ret; -+ -+ ret = vfio_get_dev_irq_info(&vdev->vbasedev, type, subtype, &irq_info); -+ if (ret) { -+ return ret; -+ } -+ index = irq_info->index; -+ ext_irq_index = irq_info->index - VFIO_PCI_NUM_IRQS; -+ g_free(irq_info); -+ -+ vdev->ext_irqs[ext_irq_index].vdev = vdev; -+ vdev->ext_irqs[ext_irq_index].index = index; -+ n = &vdev->ext_irqs[ext_irq_index].notifier; -+ -+ ret = event_notifier_init(n, 0); -+ if (ret) { -+ error_report("vfio: Unable to init event notifier for ext irq %d(%d)", -+ ext_irq_index, ret); -+ return ret; -+ } -+ -+ fd = event_notifier_get_fd(n); -+ qemu_set_fd_handler(fd, vfio_dma_fault_notifier_handler, NULL, -+ &vdev->ext_irqs[ext_irq_index]); -+ -+ ret = vfio_set_irq_signaling(&vdev->vbasedev, index, 0, -+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err); -+ if (ret) { -+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); -+ qemu_set_fd_handler(fd, NULL, NULL, vdev); -+ event_notifier_cleanup(n); -+ } -+ return ret; -+} -+ -+static void vfio_unregister_ext_irq_notifiers(VFIOPCIDevice *vdev) -+{ -+ VFIODevice *vbasedev = &vdev->vbasedev; -+ Error *err = NULL; -+ int i; -+ -+ for (i = 0; i < vbasedev->num_irqs - VFIO_PCI_NUM_IRQS; i++) { -+ if (vfio_set_irq_signaling(vbasedev, i + VFIO_PCI_NUM_IRQS , 0, -+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { -+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); -+ } -+ qemu_set_fd_handler(event_notifier_get_fd(&vdev->ext_irqs[i].notifier), -+ NULL, NULL, vdev); -+ event_notifier_cleanup(&vdev->ext_irqs[i].notifier); -+ } -+ g_free(vdev->ext_irqs); -+} -+ - static void vfio_realize(PCIDevice *pdev, Error **errp) - { - VFIOPCIDevice *vdev = PCI_VFIO(pdev); -@@ -2867,7 +2937,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ssize_t len; - struct stat st; - int groupid; -- int i, ret; -+ int i, ret, nb_ext_irqs; - bool is_mdev; - - if (!vdev->vbasedev.sysfsdev) { -@@ -2955,6 +3025,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - goto error; - } - -+ nb_ext_irqs = vdev->vbasedev.num_irqs - VFIO_PCI_NUM_IRQS; -+ if (nb_ext_irqs > 0) { -+ vdev->ext_irqs = g_new0(VFIOPCIExtIRQ, nb_ext_irqs); -+ } -+ - vfio_populate_device(vdev, &err); - if (err) { - error_propagate(errp, err); -@@ -3161,6 +3236,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - - vfio_register_err_notifier(vdev); - vfio_register_req_notifier(vdev); -+ vfio_register_ext_irq_handler(vdev, VFIO_IRQ_TYPE_NESTED, -+ VFIO_IRQ_SUBTYPE_DMA_FAULT, -+ vfio_dma_fault_notifier_handler); - vfio_setup_resetfn_quirk(vdev); - - pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops); -@@ -3201,6 +3279,7 @@ static void vfio_exitfn(PCIDevice *pdev) - - vfio_unregister_req_notifier(vdev); - vfio_unregister_err_notifier(vdev); -+ vfio_unregister_ext_irq_notifiers(vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); - vfio_disable_interrupts(vdev); - if (vdev->intx.mmap_timer) { -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 834a90d646..893d074375 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -113,6 +113,12 @@ typedef struct VFIOMSIXInfo { - unsigned long *pending; - } VFIOMSIXInfo; - -+typedef struct VFIOPCIExtIRQ { -+ struct VFIOPCIDevice *vdev; -+ EventNotifier notifier; -+ uint32_t index; -+} VFIOPCIExtIRQ; -+ - typedef struct VFIOPCIDevice { - PCIDevice pdev; - VFIODevice vbasedev; -@@ -134,6 +140,7 @@ typedef struct VFIOPCIDevice { - PCIHostDeviceAddress host; - EventNotifier err_notifier; - EventNotifier req_notifier; -+ VFIOPCIExtIRQ *ext_irqs; - int (*resetfn)(struct VFIOPCIDevice *); - uint32_t vendor_id; - uint32_t device_id; --- -2.27.0 - diff --git a/vfio-pci-Set-up-the-DMA-FAULT-region.patch b/vfio-pci-Set-up-the-DMA-FAULT-region.patch deleted file mode 100644 index ae70a0696cb8310e2669b7e75d2e12bf8e9911f8..0000000000000000000000000000000000000000 --- a/vfio-pci-Set-up-the-DMA-FAULT-region.patch +++ /dev/null @@ -1,132 +0,0 @@ -From e44d9cc377848f0a560b6d114561852e95fab557 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 13 Dec 2018 10:57:53 -0500 -Subject: [PATCH] vfio/pci: Set up the DMA FAULT region - -Set up the fault region which is composed of the actual fault -queue (mmappable) and a header used to handle it. The fault -queue is mmapped. - -Signed-off-by: Eric Auger -Signed-off-by: Kunkun Jiang ---- - hw/vfio/pci.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/pci.h | 1 + - 2 files changed, 65 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index f5c05d508d..0db7d68258 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2607,11 +2607,67 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) - return 0; - } - -+static void vfio_init_fault_regions(VFIOPCIDevice *vdev, Error **errp) -+{ -+ struct vfio_region_info *fault_region_info = NULL; -+ struct vfio_region_info_cap_fault *cap_fault; -+ VFIODevice *vbasedev = &vdev->vbasedev; -+ struct vfio_info_cap_header *hdr; -+ char *fault_region_name; -+ int ret; -+ -+ ret = vfio_get_dev_region_info(&vdev->vbasedev, -+ VFIO_REGION_TYPE_NESTED, -+ VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT, -+ &fault_region_info); -+ if (ret) { -+ goto out; -+ } -+ -+ hdr = vfio_get_region_info_cap(fault_region_info, -+ VFIO_REGION_INFO_CAP_DMA_FAULT); -+ if (!hdr) { -+ error_setg(errp, "failed to retrieve DMA FAULT capability"); -+ goto out; -+ } -+ cap_fault = container_of(hdr, struct vfio_region_info_cap_fault, -+ header); -+ if (cap_fault->version != 1) { -+ error_setg(errp, "Unsupported DMA FAULT API version %d", -+ cap_fault->version); -+ goto out; -+ } -+ -+ fault_region_name = g_strdup_printf("%s DMA FAULT %d", -+ vbasedev->name, -+ fault_region_info->index); -+ -+ ret = vfio_region_setup(OBJECT(vdev), vbasedev, -+ &vdev->dma_fault_region, -+ fault_region_info->index, -+ fault_region_name); -+ g_free(fault_region_name); -+ if (ret) { -+ error_setg_errno(errp, -ret, -+ "failed to set up the DMA FAULT region %d", -+ fault_region_info->index); -+ goto out; -+ } -+ -+ ret = vfio_region_mmap(&vdev->dma_fault_region); -+ if (ret) { -+ error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT queue"); -+ } -+out: -+ g_free(fault_region_info); -+} -+ - static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) - { - VFIODevice *vbasedev = &vdev->vbasedev; - struct vfio_region_info *reg_info; - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; -+ Error *err = NULL; - int i, ret = -1; - - /* Sanity check device */ -@@ -2675,6 +2731,12 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) - } - } - -+ vfio_init_fault_regions(vdev, &err); -+ if (err) { -+ error_propagate(errp, err); -+ return; -+ } -+ - irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); -@@ -3260,6 +3322,7 @@ static void vfio_instance_finalize(Object *obj) - - vfio_display_finalize(vdev); - vfio_bars_finalize(vdev); -+ vfio_region_finalize(&vdev->dma_fault_region); - g_free(vdev->emulated_config_bits); - g_free(vdev->rom); - /* -@@ -3280,6 +3343,7 @@ static void vfio_exitfn(PCIDevice *pdev) - vfio_unregister_req_notifier(vdev); - vfio_unregister_err_notifier(vdev); - vfio_unregister_ext_irq_notifiers(vdev); -+ vfio_region_exit(&vdev->dma_fault_region); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); - vfio_disable_interrupts(vdev); - if (vdev->intx.mmap_timer) { -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 893d074375..815154656c 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -141,6 +141,7 @@ typedef struct VFIOPCIDevice { - EventNotifier err_notifier; - EventNotifier req_notifier; - VFIOPCIExtIRQ *ext_irqs; -+ VFIORegion dma_fault_region; - int (*resetfn)(struct VFIOPCIDevice *); - uint32_t vendor_id; - uint32_t device_id; --- -2.27.0 - diff --git a/vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch b/vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch new file mode 100644 index 0000000000000000000000000000000000000000..d4c036bc026d3fcaef5b3f8b5ea7ccef2c6899da --- /dev/null +++ b/vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch @@ -0,0 +1,68 @@ +From 1bbc795190c3ad7c838dc57a6f7a38a779dfdd65 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:48 +0800 +Subject: [PATCH] vfio/platform: Allow the selection of a given iommu backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-platform device: + +If the user wants to use the legacy backend, it shall not +link the vfio-platform device with any iommufd object: + + -device vfio-platform,host=XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-platform device options: + + -object iommufd,id=iommufd0 + -device vfio-platform,host=XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/platform.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 8e3d4ac458..98ae4bc655 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -15,11 +15,13 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include "qapi/error.h" + #include + #include + + #include "hw/vfio/vfio-platform.h" ++#include "sysemu/iommufd.h" + #include "migration/vmstate.h" + #include "qemu/error-report.h" + #include "qemu/lockable.h" +@@ -649,6 +651,10 @@ static Property vfio_platform_dev_properties[] = { + DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, + mmap_timeout, 1100), + DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.41.0.windows.1 + diff --git a/vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch b/vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch new file mode 100644 index 0000000000000000000000000000000000000000..8de50ae3ef9687769bdf5e4ff21a2932de3f0da7 --- /dev/null +++ b/vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch @@ -0,0 +1,100 @@ +From 9a12f3f754fcebe86fe2346e62cd25d8a2d06a89 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:49 +0800 +Subject: [PATCH] vfio/platform: Make vfio cdev pre-openable by passing a file + handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/platform.c | 32 ++++++++++++++++++++++++-------- + 1 file changed, 24 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 98ae4bc655..a97d9c6234 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -531,14 +531,13 @@ static VFIODeviceOps vfio_platform_ops = { + */ + static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) + { +- struct stat st; + int ret; + +- /* @sysfsdev takes precedence over @host */ +- if (vbasedev->sysfsdev) { ++ /* @fd takes precedence over @sysfsdev which takes precedence over @host */ ++ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { + g_free(vbasedev->name); + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); +- } else { ++ } else if (vbasedev->fd < 0) { + if (!vbasedev->name || strchr(vbasedev->name, '/')) { + error_setg(errp, "wrong host device name"); + return -EINVAL; +@@ -548,10 +547,9 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) + vbasedev->name); + } + +- if (stat(vbasedev->sysfsdev, &st) < 0) { +- error_setg_errno(errp, errno, +- "failed to get the sysfs host device file status"); +- return -errno; ++ ret = vfio_device_get_name(vbasedev, errp); ++ if (ret) { ++ return ret; + } + + ret = vfio_attach_device(vbasedev->name, vbasedev, +@@ -658,6 +656,20 @@ static Property vfio_platform_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++static void vfio_platform_instance_init(Object *obj) ++{ ++ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); ++ ++ vdev->vbasedev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp); ++} ++#endif ++ + static void vfio_platform_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -665,6 +677,9 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data) + + dc->realize = vfio_platform_realize; + device_class_set_props(dc, vfio_platform_dev_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd); ++#endif + dc->vmsd = &vfio_platform_vmstate; + dc->desc = "VFIO-based platform device assignment"; + sbc->connect_irq_notifier = vfio_start_irqfd_injection; +@@ -677,6 +692,7 @@ static const TypeInfo vfio_platform_dev_info = { + .name = TYPE_VFIO_PLATFORM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(VFIOPlatformDevice), ++ .instance_init = vfio_platform_instance_init, + .class_init = vfio_platform_class_init, + .class_size = sizeof(VFIOPlatformDeviceClass), + }; +-- +2.41.0.windows.1 + diff --git a/vfio-platform-Move-VFIODevice-initializations-in-vfi.patch b/vfio-platform-Move-VFIODevice-initializations-in-vfi.patch new file mode 100644 index 0000000000000000000000000000000000000000..ecfaef2b81ac29b92b5f3a7be725c30e52c8ef50 --- /dev/null +++ b/vfio-platform-Move-VFIODevice-initializations-in-vfi.patch @@ -0,0 +1,56 @@ +From 594a30d0a9d0d569cf264ffd7b042aa39a404383 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:59 +0800 +Subject: [PATCH] vfio/platform: Move VFIODevice initializations in + vfio_platform_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some of the VFIODevice initializations is in vfio_platform_realize, +move all of them in vfio_platform_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/platform.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index a97d9c6234..506eb8193f 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -581,10 +581,6 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) + VFIODevice *vbasedev = &vdev->vbasedev; + int i, ret; + +- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; +- vbasedev->dev = dev; +- vbasedev->ops = &vfio_platform_ops; +- + qemu_mutex_init(&vdev->intp_mutex); + + trace_vfio_platform_realize(vbasedev->sysfsdev ? +@@ -659,8 +655,12 @@ static Property vfio_platform_dev_properties[] = { + static void vfio_platform_instance_init(Object *obj) + { + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); ++ VFIODevice *vbasedev = &vdev->vbasedev; + +- vdev->vbasedev.fd = -1; ++ vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; ++ vbasedev->ops = &vfio_platform_ops; ++ vbasedev->dev = DEVICE(vdev); ++ vbasedev->fd = -1; + } + + #ifdef CONFIG_IOMMUFD +-- +2.41.0.windows.1 + diff --git a/vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch b/vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch new file mode 100644 index 0000000000000000000000000000000000000000..a622f5f3a067856210d11598a8079710f3ed6b2e --- /dev/null +++ b/vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch @@ -0,0 +1,97 @@ +From feed555b60bc36d3e704431148e302dae48b77a1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:16 +0100 +Subject: [PATCH] vfio/spapr: Extend VFIOIOMMUOps with a release handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This allows to abstract a bit more the sPAPR IOMMU support in the +legacy IOMMU backend. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/container.c | 8 ++++++-- + hw/vfio/spapr.c | 19 +++++++++++++++++++ + include/hw/vfio/vfio-container-base.h | 1 + + 3 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index e245d5a082..4c62f088b1 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -764,7 +764,9 @@ listener_release_exit: + } + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +- vfio_spapr_container_deinit(container); ++ if (bcontainer->ops->release) { ++ bcontainer->ops->release(bcontainer); ++ } + } + + enable_discards_exit: +@@ -803,7 +805,9 @@ static void vfio_disconnect_container(VFIOGroup *group) + } + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +- vfio_spapr_container_deinit(container); ++ if (bcontainer->ops->release) { ++ bcontainer->ops->release(bcontainer); ++ } + } + } + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 3694dfb874..697f80d11d 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -440,6 +440,24 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + } + } + ++static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) ++{ ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); ++ VFIOHostDMAWindow *hostwin, *next; ++ ++ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { ++ memory_listener_unregister(&scontainer->prereg_listener); ++ } ++ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, ++ next) { ++ QLIST_REMOVE(hostwin, hostwin_next); ++ g_free(hostwin); ++ } ++} ++ + static VFIOIOMMUOps vfio_iommu_spapr_ops; + + static void setup_spapr_ops(VFIOContainerBase *bcontainer) +@@ -447,6 +465,7 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) + vfio_iommu_spapr_ops = *bcontainer->ops; + vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; + vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; ++ vfio_iommu_spapr_ops.release = vfio_spapr_container_release; + bcontainer->ops = &vfio_iommu_spapr_ops; + } + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index c12ce4dfcb..b2813b0c11 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -135,5 +135,6 @@ struct VFIOIOMMUClass { + Error **errp); + void (*del_window)(VFIOContainerBase *bcontainer, + MemoryRegionSection *section); ++ void (*release)(VFIOContainerBase *bcontainer); + }; + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch b/vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch new file mode 100644 index 0000000000000000000000000000000000000000..443cccb28eaccc2c5e346078df2dbc53aaa75730 --- /dev/null +++ b/vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch @@ -0,0 +1,113 @@ +From 2692ea754863364731e5712ebf83208690179089 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:22 +0100 +Subject: [PATCH] vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Move vfio_spapr_container_setup() to a VFIOIOMMUClass::setup handler +and convert the sPAPR VFIOIOMMUOps struct to a QOM interface. The +sPAPR QOM interface inherits from the legacy QOM interface because +because both have the same basic needs. The sPAPR interface is then +extended with the handlers specific to the sPAPR IOMMU. + +This allows reuse and provides better abstraction of the backends. It +will be useful to avoid compiling the sPAPR IOMMU backend on targets +not supporting it. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/container.c | 24 ++++++------------------ + hw/vfio/spapr.c | 20 ++++++++++++++++++++ + include/hw/vfio/vfio-container-base.h | 1 + + 3 files changed, 27 insertions(+), 18 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 845239eff4..e245d5a082 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -441,6 +441,10 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) + case VFIO_TYPE1_IOMMU: + klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); + break; ++ case VFIO_SPAPR_TCE_v2_IOMMU: ++ case VFIO_SPAPR_TCE_IOMMU: ++ klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); ++ break; + default: + g_assert_not_reached(); + }; +@@ -716,25 +720,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + goto free_container_exit; + } + +- switch (container->iommu_type) { +- case VFIO_TYPE1v2_IOMMU: +- case VFIO_TYPE1_IOMMU: +- case VFIO_TYPE1v2_S_IOMMU: +- ret = vfio_legacy_setup(bcontainer, errp); +- break; +- case VFIO_SPAPR_TCE_v2_IOMMU: +- case VFIO_SPAPR_TCE_IOMMU: +- { +- ret = vfio_spapr_container_init(container, errp); +- if (ret) { +- goto enable_discards_exit; +- } +- break; +- } +- default: +- g_assert_not_reached(); +- } ++ assert(bcontainer->ops->setup); + ++ ret = bcontainer->ops->setup(bcontainer, errp); + if (ret) { + goto enable_discards_exit; + } +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 5c6426e697..3694dfb874 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -543,3 +543,23 @@ void vfio_spapr_container_deinit(VFIOContainer *container) + g_free(hostwin); + } + } ++ ++static void vfio_iommu_spapr_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->add_window = vfio_spapr_container_add_section_window; ++ vioc->del_window = vfio_spapr_container_del_section_window; ++ //vioc->release = vfio_spapr_container_release; ++ //vioc->setup = vfio_spapr_container_setup; ++}; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_SPAPR, ++ .parent = TYPE_VFIO_IOMMU_LEGACY, ++ .class_init = vfio_iommu_spapr_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 614de90767..1085109d0c 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -95,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + #define TYPE_VFIO_IOMMU "vfio-iommu" + #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" ++#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.41.0.windows.1 + diff --git a/vfio-spapr-Introduce-spapr-backend-and-target-interf.patch b/vfio-spapr-Introduce-spapr-backend-and-target-interf.patch new file mode 100644 index 0000000000000000000000000000000000000000..4b57f50c7bcc5105ca967a3bc2bc7e8926c8f96b --- /dev/null +++ b/vfio-spapr-Introduce-spapr-backend-and-target-interf.patch @@ -0,0 +1,82 @@ +From 4b0bff002d93d8785ccec8020667dc559bda4e9c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:33 +0800 +Subject: [PATCH] vfio/spapr: Introduce spapr backend and target interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce an empty spapr backend which will hold spapr specific +content, currently only prereg_listener and hostwin_list. + +Also introduce two spapr specific callbacks add/del_window into +VFIOIOMMUOps. Instantiate a spapr ops with a helper setup_spapr_ops +and assign it to bcontainer->ops. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/spapr.c | 14 ++++++++++++++ + include/hw/vfio/vfio-container-base.h | 6 ++++++ + 2 files changed, 20 insertions(+) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 7a50975f25..e1a6b35563 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -24,6 +24,10 @@ + #include "qapi/error.h" + #include "trace.h" + ++typedef struct VFIOSpaprContainer { ++ VFIOContainer container; ++} VFIOSpaprContainer; ++ + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) + { + if (memory_region_is_iommu(section->mr)) { +@@ -421,6 +425,14 @@ void vfio_container_del_section_window(VFIOContainer *container, + } + } + ++static VFIOIOMMUOps vfio_iommu_spapr_ops; ++ ++static void setup_spapr_ops(VFIOContainerBase *bcontainer) ++{ ++ vfio_iommu_spapr_ops = *bcontainer->ops; ++ bcontainer->ops = &vfio_iommu_spapr_ops; ++} ++ + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { + VFIOContainerBase *bcontainer = &container->bcontainer; +@@ -486,6 +498,8 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + 0x1000); + } + ++ setup_spapr_ops(bcontainer); ++ + return 0; + + listener_unregister_exit: +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 9658ffb526..f62a14ac73 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -101,5 +101,11 @@ struct VFIOIOMMUOps { + int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); + int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); ++ /* SPAPR specific */ ++ int (*add_window)(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp); ++ void (*del_window)(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section); + }; + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.41.0.windows.1 + diff --git a/vfio-spapr-Move-hostwin_list-into-spapr-container.patch b/vfio-spapr-Move-hostwin_list-into-spapr-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..075c7321ca70465235b22295b9ca4471b6579cf6 --- /dev/null +++ b/vfio-spapr-Move-hostwin_list-into-spapr-container.patch @@ -0,0 +1,180 @@ +From 13c57d5e888fe9d6bdf68469c8e76991a789c1e6 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:36 +0800 +Subject: [PATCH] vfio/spapr: Move hostwin_list into spapr container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/spapr.c | 36 +++++++++++++++++++---------------- + include/hw/vfio/vfio-common.h | 1 - + 2 files changed, 20 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 68c3dd6c75..5c6426e697 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -27,6 +27,7 @@ + typedef struct VFIOSpaprContainer { + VFIOContainer container; + MemoryListener prereg_listener; ++ QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + } VFIOSpaprContainer; + + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) +@@ -154,12 +155,12 @@ static const MemoryListener vfio_prereg_listener = { + .region_del = vfio_prereg_listener_region_del, + }; + +-static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, ++static void vfio_host_win_add(VFIOSpaprContainer *scontainer, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) + { + VFIOHostDMAWindow *hostwin; + +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + min_iova, +@@ -173,15 +174,15 @@ static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hostwin->min_iova = min_iova; + hostwin->max_iova = max_iova; + hostwin->iova_pgsizes = iova_pgsizes; +- QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); ++ QLIST_INSERT_HEAD(&scontainer->hostwin_list, hostwin, hostwin_next); + } + +-static int vfio_host_win_del(VFIOContainer *container, ++static int vfio_host_win_del(VFIOSpaprContainer *scontainer, + hwaddr min_iova, hwaddr max_iova) + { + VFIOHostDMAWindow *hostwin; + +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); +@@ -192,7 +193,7 @@ static int vfio_host_win_del(VFIOContainer *container, + return -1; + } + +-static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, ++static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container, + hwaddr iova, hwaddr end) + { + VFIOHostDMAWindow *hostwin; +@@ -329,6 +330,8 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + { + VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; +@@ -344,7 +347,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + iova = section->offset_within_address_space; + end = iova + int128_get64(section->size) - 1; + +- if (!vfio_find_hostwin(container, iova, end)) { ++ if (!vfio_find_hostwin(scontainer, iova, end)) { + error_setg(errp, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, + iova, end); +@@ -358,7 +361,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + } + + /* For now intersections are not allowed, we may relax this later */ +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, +@@ -380,7 +383,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + return ret; + } + +- vfio_host_win_add(container, section->offset_within_address_space, ++ vfio_host_win_add(scontainer, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); + #ifdef CONFIG_KVM +@@ -419,6 +422,8 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + { + VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; +@@ -426,7 +431,7 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + + vfio_spapr_remove_window(container, + section->offset_within_address_space); +- if (vfio_host_win_del(container, ++ if (vfio_host_win_del(scontainer, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { +@@ -454,7 +459,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; + +- QLIST_INIT(&container->hostwin_list); ++ QLIST_INIT(&scontainer->hostwin_list); + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called +@@ -506,7 +511,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } else { + /* The default table uses 4K pages */ + bcontainer->pgsizes = 0x1000; +- vfio_host_win_add(container, info.dma32_window_start, ++ vfio_host_win_add(scontainer, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); +@@ -525,15 +530,14 @@ listener_unregister_exit: + + void vfio_spapr_container_deinit(VFIOContainer *container) + { ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- VFIOSpaprContainer *scontainer = container_of(container, +- VFIOSpaprContainer, +- container); + memory_listener_unregister(&scontainer->prereg_listener); + } +- QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, ++ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index ba8abed75a..9e22acbfb6 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -88,7 +88,6 @@ typedef struct VFIOContainer { + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + unsigned iommu_type; + bool dirty_log_manual_clear; +- QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIODMARange) dma_list; + } VFIOContainer; +-- +2.41.0.windows.1 + diff --git a/vfio-spapr-Move-prereg_listener-into-spapr-container.patch b/vfio-spapr-Move-prereg_listener-into-spapr-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..70cce72253cf8ad074db53fa9866a4a6999d71ee --- /dev/null +++ b/vfio-spapr-Move-prereg_listener-into-spapr-container.patch @@ -0,0 +1,112 @@ +From 8f27e17107a923a0739c17efe5dcd11f818364af Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:35 +0800 +Subject: [PATCH] vfio/spapr: Move prereg_listener into spapr container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/spapr.c | 24 ++++++++++++++++-------- + include/hw/vfio/vfio-common.h | 1 - + 2 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 5be1911aad..68c3dd6c75 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -26,6 +26,7 @@ + + typedef struct VFIOSpaprContainer { + VFIOContainer container; ++ MemoryListener prereg_listener; + } VFIOSpaprContainer; + + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) +@@ -48,8 +49,9 @@ static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa) + static void vfio_prereg_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, +- prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, ++ prereg_listener); ++ VFIOContainer *container = &scontainer->container; + VFIOContainerBase *bcontainer = &container->bcontainer; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; +@@ -107,8 +109,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + static void vfio_prereg_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, +- prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, ++ prereg_listener); ++ VFIOContainer *container = &scontainer->container; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; + int ret; +@@ -445,6 +448,8 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { + VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; +@@ -463,9 +468,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + return -errno; + } + } else { +- container->prereg_listener = vfio_prereg_listener; ++ scontainer->prereg_listener = vfio_prereg_listener; + +- memory_listener_register(&container->prereg_listener, ++ memory_listener_register(&scontainer->prereg_listener, + &address_space_memory); + if (bcontainer->error) { + ret = -1; +@@ -513,7 +518,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + + listener_unregister_exit: + if (v2) { +- memory_listener_unregister(&container->prereg_listener); ++ memory_listener_unregister(&scontainer->prereg_listener); + } + return ret; + } +@@ -523,7 +528,10 @@ void vfio_spapr_container_deinit(VFIOContainer *container) + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- memory_listener_unregister(&container->prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(container, ++ VFIOSpaprContainer, ++ container); ++ memory_listener_unregister(&scontainer->prereg_listener); + } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index c6b1260911..ba8abed75a 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -86,7 +86,6 @@ typedef struct VFIODMARange { + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +- MemoryListener prereg_listener; + unsigned iommu_type; + bool dirty_log_manual_clear; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; +-- +2.41.0.windows.1 + diff --git a/vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch b/vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch new file mode 100644 index 0000000000000000000000000000000000000000..06e68137f9cbf576d449b85b0af2ec118d481fff --- /dev/null +++ b/vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch @@ -0,0 +1,36 @@ +From 017272249cc362055dc5b31cdc16b2265df39e5c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:24 +0100 +Subject: [PATCH] vfio/spapr: Only compile sPAPR IOMMU support when needed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +sPAPR IOMMU support is only needed for pseries machines. Compile out +support when CONFIG_PSERIES is not set. This saves ~7K of text. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +--- + hw/vfio/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index bd5cc4ca79..bda2688983 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -4,9 +4,9 @@ vfio_ss.add(files( + 'common.c', + 'container-base.c', + 'container.c', +- 'spapr.c', + 'migration.c', + )) ++vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) + vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( + 'iommufd.c', + )) +-- +2.41.0.windows.1 + diff --git a/vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch b/vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch new file mode 100644 index 0000000000000000000000000000000000000000..28a9e4e22258edd951b5bf41e906fd8625ff927a --- /dev/null +++ b/vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch @@ -0,0 +1,175 @@ +From 42d02193bbe543173aa16e463015c76fa2d38ec0 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Sat, 11 Jan 2025 10:52:34 +0800 +Subject: [PATCH] vfio/spapr: switch to spapr IOMMU BE add/del_section_window +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Zhou Wang +--- + hw/vfio/common.c | 8 ++------ + hw/vfio/container-base.c | 21 +++++++++++++++++++++ + hw/vfio/spapr.c | 19 ++++++++++++++----- + include/hw/vfio/vfio-common.h | 5 ----- + include/hw/vfio/vfio-container-base.h | 5 +++++ + 5 files changed, 42 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 488aa43c9b..679fee4321 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -571,8 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, + { + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -595,7 +593,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + return; + } + +- if (vfio_container_add_section_window(container, section, &err)) { ++ if (vfio_container_add_section_window(bcontainer, section, &err)) { + goto fail; + } + +@@ -738,8 +736,6 @@ static void vfio_listener_region_del(MemoryListener *listener, + { + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -818,7 +814,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + + memory_region_unref(section->mr); + +- vfio_container_del_section_window(container, section); ++ vfio_container_del_section_window(bcontainer, section); + } + + typedef struct VFIODirtyRanges { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 0177f43741..71f7274973 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,6 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + ++int vfio_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp) ++{ ++ if (!bcontainer->ops->add_window) { ++ return 0; ++ } ++ ++ return bcontainer->ops->add_window(bcontainer, section, errp); ++} ++ ++void vfio_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) ++{ ++ if (!bcontainer->ops->del_window) { ++ return; ++ } ++ ++ return bcontainer->ops->del_window(bcontainer, section); ++} ++ + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start) + { +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index e1a6b35563..5be1911aad 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -319,10 +319,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, + return 0; + } + +-int vfio_container_add_section_window(VFIOContainer *container, +- MemoryRegionSection *section, +- Error **errp) ++static int ++vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; +@@ -407,9 +410,13 @@ int vfio_container_add_section_window(VFIOContainer *container, + return 0; + } + +-void vfio_container_del_section_window(VFIOContainer *container, +- MemoryRegionSection *section) ++static void ++vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } +@@ -430,6 +437,8 @@ static VFIOIOMMUOps vfio_iommu_spapr_ops; + static void setup_spapr_ops(VFIOContainerBase *bcontainer) + { + vfio_iommu_spapr_ops = *bcontainer->ops; ++ vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; ++ vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; + bcontainer->ops = &vfio_iommu_spapr_ops; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 9a2e0ace72..c6b1260911 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -183,11 +183,6 @@ VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, + void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange); + + /* SPAPR specific */ +-int vfio_container_add_section_window(VFIOContainer *container, +- MemoryRegionSection *section, +- Error **errp); +-void vfio_container_del_section_window(VFIOContainer *container, +- MemoryRegionSection *section); + int vfio_spapr_container_init(VFIOContainer *container, Error **errp); + void vfio_spapr_container_deinit(VFIOContainer *container); + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index f62a14ac73..4b6f017c6f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -75,6 +75,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, + int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); ++int vfio_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp); ++void vfio_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section); + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start); + int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, +-- +2.41.0.windows.1 + diff --git a/vfio.h-and-iommu.h-header-update-against-5.10.patch b/vfio.h-and-iommu.h-header-update-against-5.10.patch deleted file mode 100644 index 721f2b6fcbc9de84c77b59ddf68da60d3d1fd255..0000000000000000000000000000000000000000 --- a/vfio.h-and-iommu.h-header-update-against-5.10.patch +++ /dev/null @@ -1,760 +0,0 @@ -From 95435c6778f38dee9ed6f3ee6fd9e022107315d7 Mon Sep 17 00:00:00 2001 -From: Kunkun Jiang -Date: Fri, 30 Jul 2021 09:15:31 +0800 -Subject: [PATCH] vfio.h and iommu.h header update against 5.10 - -Signed-off-by: Kunkun Jiang ---- - linux-headers/linux/iommu.h | 395 ++++++++++++++++++++++++++++++++++++ - linux-headers/linux/vfio.h | 249 ++++++++++++++++++++++- - 2 files changed, 641 insertions(+), 3 deletions(-) - create mode 100644 linux-headers/linux/iommu.h - -diff --git a/linux-headers/linux/iommu.h b/linux-headers/linux/iommu.h -new file mode 100644 -index 0000000000..773b7dc2d6 ---- /dev/null -+++ b/linux-headers/linux/iommu.h -@@ -0,0 +1,395 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * IOMMU user API definitions -+ */ -+ -+#ifndef IOMMU_H -+#define IOMMU_H -+ -+#include -+ -+#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ -+#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ -+#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ -+#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ -+ -+/* Generic fault types, can be expanded IRQ remapping fault */ -+enum iommu_fault_type { -+ IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */ -+ IOMMU_FAULT_PAGE_REQ, /* page request fault */ -+}; -+ -+enum iommu_fault_reason { -+ IOMMU_FAULT_REASON_UNKNOWN = 0, -+ -+ /* Could not access the PASID table (fetch caused external abort) */ -+ IOMMU_FAULT_REASON_PASID_FETCH, -+ -+ /* PASID entry is invalid or has configuration errors */ -+ IOMMU_FAULT_REASON_BAD_PASID_ENTRY, -+ -+ /* -+ * PASID is out of range (e.g. exceeds the maximum PASID -+ * supported by the IOMMU) or disabled. -+ */ -+ IOMMU_FAULT_REASON_PASID_INVALID, -+ -+ /* -+ * An external abort occurred fetching (or updating) a translation -+ * table descriptor -+ */ -+ IOMMU_FAULT_REASON_WALK_EABT, -+ -+ /* -+ * Could not access the page table entry (Bad address), -+ * actual translation fault -+ */ -+ IOMMU_FAULT_REASON_PTE_FETCH, -+ -+ /* Protection flag check failed */ -+ IOMMU_FAULT_REASON_PERMISSION, -+ -+ /* access flag check failed */ -+ IOMMU_FAULT_REASON_ACCESS, -+ -+ /* Output address of a translation stage caused Address Size fault */ -+ IOMMU_FAULT_REASON_OOR_ADDRESS, -+}; -+ -+/** -+ * struct iommu_fault_unrecoverable - Unrecoverable fault data -+ * @reason: reason of the fault, from &enum iommu_fault_reason -+ * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values) -+ * @pasid: Process Address Space ID -+ * @perm: requested permission access using by the incoming transaction -+ * (IOMMU_FAULT_PERM_* values) -+ * @addr: offending page address -+ * @fetch_addr: address that caused a fetch abort, if any -+ */ -+struct iommu_fault_unrecoverable { -+ __u32 reason; -+#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0) -+#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1) -+#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2) -+ __u32 flags; -+ __u32 pasid; -+ __u32 perm; -+ __u64 addr; -+ __u64 fetch_addr; -+}; -+ -+/** -+ * struct iommu_fault_page_request - Page Request data -+ * @flags: encodes whether the corresponding fields are valid and whether this -+ * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values). -+ * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response -+ * must have the same PASID value as the page request. When it is clear, -+ * the page response should not have a PASID. -+ * @pasid: Process Address Space ID -+ * @grpid: Page Request Group Index -+ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) -+ * @addr: page address -+ * @private_data: device-specific private information -+ */ -+struct iommu_fault_page_request { -+#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) -+#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) -+#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) -+#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) -+ __u32 flags; -+ __u32 pasid; -+ __u32 grpid; -+ __u32 perm; -+ __u64 addr; -+ __u64 private_data[2]; -+}; -+ -+/** -+ * struct iommu_fault - Generic fault data -+ * @type: fault type from &enum iommu_fault_type -+ * @padding: reserved for future use (should be zero) -+ * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV -+ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ -+ * @padding2: sets the fault size to allow for future extensions -+ */ -+struct iommu_fault { -+ __u32 type; -+ __u32 padding; -+ union { -+ struct iommu_fault_unrecoverable event; -+ struct iommu_fault_page_request prm; -+ __u8 padding2[56]; -+ }; -+}; -+ -+/** -+ * enum iommu_page_response_code - Return status of fault handlers -+ * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables -+ * populated, retry the access. This is "Success" in PCI PRI. -+ * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from -+ * this device if possible. This is "Response Failure" in PCI PRI. -+ * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the -+ * access. This is "Invalid Request" in PCI PRI. -+ */ -+enum iommu_page_response_code { -+ IOMMU_PAGE_RESP_SUCCESS = 0, -+ IOMMU_PAGE_RESP_INVALID, -+ IOMMU_PAGE_RESP_FAILURE, -+}; -+ -+/** -+ * struct iommu_page_response - Generic page response information -+ * @argsz: User filled size of this data -+ * @version: API version of this structure -+ * @flags: encodes whether the corresponding fields are valid -+ * (IOMMU_FAULT_PAGE_RESPONSE_* values) -+ * @pasid: Process Address Space ID -+ * @grpid: Page Request Group Index -+ * @code: response code from &enum iommu_page_response_code -+ */ -+struct iommu_page_response { -+ __u32 argsz; -+#define IOMMU_PAGE_RESP_VERSION_1 1 -+ __u32 version; -+#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) -+ __u32 flags; -+ __u32 pasid; -+ __u32 grpid; -+ __u32 code; -+}; -+ -+/* defines the granularity of the invalidation */ -+enum iommu_inv_granularity { -+ IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ -+ IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ -+ IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ -+ IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ -+}; -+ -+/** -+ * struct iommu_inv_addr_info - Address Selective Invalidation Structure -+ * -+ * @flags: indicates the granularity of the address-selective invalidation -+ * - If the PASID bit is set, the @pasid field is populated and the invalidation -+ * relates to cache entries tagged with this PASID and matching the address -+ * range. -+ * - If ARCHID bit is set, @archid is populated and the invalidation relates -+ * to cache entries tagged with this architecture specific ID and matching -+ * the address range. -+ * - Both PASID and ARCHID can be set as they may tag different caches. -+ * - If neither PASID or ARCHID is set, global addr invalidation applies. -+ * - The LEAF flag indicates whether only the leaf PTE caching needs to be -+ * invalidated and other paging structure caches can be preserved. -+ * @pasid: process address space ID -+ * @archid: architecture-specific ID -+ * @addr: first stage/level input address -+ * @granule_size: page/block size of the mapping in bytes -+ * @nb_granules: number of contiguous granules to be invalidated -+ */ -+struct iommu_inv_addr_info { -+#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) -+#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) -+#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) -+ __u32 flags; -+ __u32 archid; -+ __u64 pasid; -+ __u64 addr; -+ __u64 granule_size; -+ __u64 nb_granules; -+}; -+ -+/** -+ * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure -+ * -+ * @flags: indicates the granularity of the PASID-selective invalidation -+ * - If the PASID bit is set, the @pasid field is populated and the invalidation -+ * relates to cache entries tagged with this PASID and matching the address -+ * range. -+ * - If the ARCHID bit is set, the @archid is populated and the invalidation -+ * relates to cache entries tagged with this architecture specific ID and -+ * matching the address range. -+ * - Both PASID and ARCHID can be set as they may tag different caches. -+ * - At least one of PASID or ARCHID must be set. -+ * @pasid: process address space ID -+ * @archid: architecture-specific ID -+ */ -+struct iommu_inv_pasid_info { -+#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) -+#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) -+ __u32 flags; -+ __u32 archid; -+ __u64 pasid; -+}; -+ -+/** -+ * struct iommu_cache_invalidate_info - First level/stage invalidation -+ * information -+ * @argsz: User filled size of this data -+ * @version: API version of this structure -+ * @cache: bitfield that allows to select which caches to invalidate -+ * @granularity: defines the lowest granularity used for the invalidation: -+ * domain > PASID > addr -+ * @padding: reserved for future use (should be zero) -+ * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID -+ * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR -+ * -+ * Not all the combinations of cache/granularity are valid: -+ * -+ * +--------------+---------------+---------------+---------------+ -+ * | type / | DEV_IOTLB | IOTLB | PASID | -+ * | granularity | | | cache | -+ * +==============+===============+===============+===============+ -+ * | DOMAIN | N/A | Y | Y | -+ * +--------------+---------------+---------------+---------------+ -+ * | PASID | Y | Y | Y | -+ * +--------------+---------------+---------------+---------------+ -+ * | ADDR | Y | Y | N/A | -+ * +--------------+---------------+---------------+---------------+ -+ * -+ * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than -+ * @version and @cache. -+ * -+ * If multiple cache types are invalidated simultaneously, they all -+ * must support the used granularity. -+ */ -+struct iommu_cache_invalidate_info { -+ __u32 argsz; -+#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 -+ __u32 version; -+/* IOMMU paging structure cache */ -+#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ -+#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ -+#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ -+#define IOMMU_CACHE_INV_TYPE_NR (3) -+ __u8 cache; -+ __u8 granularity; -+ __u8 padding[6]; -+ union { -+ struct iommu_inv_pasid_info pasid_info; -+ struct iommu_inv_addr_info addr_info; -+ } granu; -+}; -+ -+/** -+ * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest -+ * SVA binding. -+ * -+ * @flags: VT-d PASID table entry attributes -+ * @pat: Page attribute table data to compute effective memory type -+ * @emt: Extended memory type -+ * -+ * Only guest vIOMMU selectable and effective options are passed down to -+ * the host IOMMU. -+ */ -+struct iommu_gpasid_bind_data_vtd { -+#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ -+#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ -+#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ -+#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ -+#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ -+#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ -+#define IOMMU_SVA_VTD_GPASID_LAST (1 << 6) -+ __u64 flags; -+ __u32 pat; -+ __u32 emt; -+}; -+ -+#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ -+ IOMMU_SVA_VTD_GPASID_EMTE | \ -+ IOMMU_SVA_VTD_GPASID_PCD | \ -+ IOMMU_SVA_VTD_GPASID_PWT) -+ -+/** -+ * struct iommu_gpasid_bind_data - Information about device and guest PASID binding -+ * @argsz: User filled size of this data -+ * @version: Version of this data structure -+ * @format: PASID table entry format -+ * @flags: Additional information on guest bind request -+ * @gpgd: Guest page directory base of the guest mm to bind -+ * @hpasid: Process address space ID used for the guest mm in host IOMMU -+ * @gpasid: Process address space ID used for the guest mm in guest IOMMU -+ * @addr_width: Guest virtual address width -+ * @padding: Reserved for future use (should be zero) -+ * @vtd: Intel VT-d specific data -+ * -+ * Guest to host PASID mapping can be an identity or non-identity, where guest -+ * has its own PASID space. For non-identify mapping, guest to host PASID lookup -+ * is needed when VM programs guest PASID into an assigned device. VMM may -+ * trap such PASID programming then request host IOMMU driver to convert guest -+ * PASID to host PASID based on this bind data. -+ */ -+struct iommu_gpasid_bind_data { -+ __u32 argsz; -+#define IOMMU_GPASID_BIND_VERSION_1 1 -+ __u32 version; -+#define IOMMU_PASID_FORMAT_INTEL_VTD 1 -+#define IOMMU_PASID_FORMAT_LAST 2 -+ __u32 format; -+ __u32 addr_width; -+#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ -+ __u64 flags; -+ __u64 gpgd; -+ __u64 hpasid; -+ __u64 gpasid; -+ __u8 padding[8]; -+ /* Vendor specific data */ -+ union { -+ struct iommu_gpasid_bind_data_vtd vtd; -+ } vendor; -+}; -+ -+/** -+ * struct iommu_pasid_smmuv3 - ARM SMMUv3 Stream Table Entry stage 1 related -+ * information -+ * @version: API version of this structure -+ * @s1fmt: STE s1fmt (format of the CD table: single CD, linear table -+ * or 2-level table) -+ * @s1dss: STE s1dss (specifies the behavior when @pasid_bits != 0 -+ * and no PASID is passed along with the incoming transaction) -+ * @padding: reserved for future use (should be zero) -+ * -+ * The PASID table is referred to as the Context Descriptor (CD) table on ARM -+ * SMMUv3. Please refer to the ARM SMMU 3.x spec (ARM IHI 0070A) for full -+ * details. -+ */ -+struct iommu_pasid_smmuv3 { -+#define PASID_TABLE_SMMUV3_CFG_VERSION_1 1 -+ __u32 version; -+ __u8 s1fmt; -+ __u8 s1dss; -+ __u8 padding[2]; -+}; -+ -+/** -+ * struct iommu_pasid_table_config - PASID table data used to bind guest PASID -+ * table to the host IOMMU -+ * @argsz: User filled size of this data -+ * @version: API version to prepare for future extensions -+ * @base_ptr: guest physical address of the PASID table -+ * @format: format of the PASID table -+ * @pasid_bits: number of PASID bits used in the PASID table -+ * @config: indicates whether the guest translation stage must -+ * be translated, bypassed or aborted. -+ * @padding: reserved for future use (should be zero) -+ * @vendor_data.smmuv3: table information when @format is -+ * %IOMMU_PASID_FORMAT_SMMUV3 -+ */ -+struct iommu_pasid_table_config { -+ __u32 argsz; -+#define PASID_TABLE_CFG_VERSION_1 1 -+ __u32 version; -+ __u64 base_ptr; -+#define IOMMU_PASID_FORMAT_SMMUV3 1 -+ __u32 format; -+ __u8 pasid_bits; -+#define IOMMU_PASID_CONFIG_TRANSLATE 1 -+#define IOMMU_PASID_CONFIG_BYPASS 2 -+#define IOMMU_PASID_CONFIG_ABORT 3 -+ __u8 config; -+ __u8 padding[2]; -+ union { -+ struct iommu_pasid_smmuv3 smmuv3; -+ } vendor_data; -+}; -+ -+#endif /* _UAPI_IOMMU_H */ -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index 120387ba58..d6edfbd2f5 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -14,6 +14,7 @@ - - #include - #include -+#include - - #define VFIO_API_VERSION 0 - -@@ -211,8 +212,11 @@ struct vfio_device_info { - #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ - #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ - #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ -+#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ -+#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ - __u32 num_regions; /* Max region index + 1 */ - __u32 num_irqs; /* Max IRQ index + 1 */ -+ __u32 cap_offset; /* Offset within info struct of first cap */ - }; - #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) - -@@ -228,6 +232,15 @@ struct vfio_device_info { - #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" - #define VFIO_DEVICE_API_AP_STRING "vfio-ap" - -+/* -+ * The following capabilities are unique to s390 zPCI devices. Their contents -+ * are further-defined in vfio_zdev.h -+ */ -+#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 -+#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 -+#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 -+#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 -+ - /** - * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, - * struct vfio_region_info) -@@ -316,6 +329,7 @@ struct vfio_region_info_cap_type { - #define VFIO_REGION_TYPE_GFX (1) - #define VFIO_REGION_TYPE_CCW (2) - #define VFIO_REGION_TYPE_MIGRATION (3) -+#define VFIO_REGION_TYPE_NESTED (4) - - /* sub-types for VFIO_REGION_TYPE_PCI_* */ - -@@ -340,6 +354,10 @@ struct vfio_region_info_cap_type { - /* sub-types for VFIO_REGION_TYPE_GFX */ - #define VFIO_REGION_SUBTYPE_GFX_EDID (1) - -+/* sub-types for VFIO_REGION_TYPE_NESTED */ -+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT (1) -+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE (2) -+ - /** - * struct vfio_region_gfx_edid - EDID region layout. - * -@@ -472,7 +490,7 @@ struct vfio_region_gfx_edid { - * 5. Resumed - * |--------->| - * -- * 0. Default state of VFIO device is _RUNNNG when the user application starts. -+ * 0. Default state of VFIO device is _RUNNING when the user application starts. - * 1. During normal shutdown of the user application, the user application may - * optionally change the VFIO device state from _RUNNING to _STOP. This - * transition is optional. The vendor driver must support this transition but -@@ -695,11 +713,30 @@ struct vfio_irq_info { - #define VFIO_IRQ_INFO_MASKABLE (1 << 1) - #define VFIO_IRQ_INFO_AUTOMASKED (1 << 2) - #define VFIO_IRQ_INFO_NORESIZE (1 << 3) -+#define VFIO_IRQ_INFO_FLAG_CAPS (1 << 4) /* Info supports caps */ - __u32 index; /* IRQ index */ - __u32 count; /* Number of IRQs within this index */ -+ __u32 cap_offset; /* Offset within info struct of first cap */ - }; - #define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9) - -+/* -+ * The irq type capability allows IRQs unique to a specific device or -+ * class of devices to be exposed. -+ * -+ * The structures below define version 1 of this capability. -+ */ -+#define VFIO_IRQ_INFO_CAP_TYPE 3 -+ -+struct vfio_irq_info_cap_type { -+ struct vfio_info_cap_header header; -+ __u32 type; /* global per bus driver */ -+ __u32 subtype; /* type specific */ -+}; -+ -+#define VFIO_IRQ_TYPE_NESTED (1) -+#define VFIO_IRQ_SUBTYPE_DMA_FAULT (1) -+ - /** - * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set) - * -@@ -801,7 +838,8 @@ enum { - VFIO_PCI_MSIX_IRQ_INDEX, - VFIO_PCI_ERR_IRQ_INDEX, - VFIO_PCI_REQ_IRQ_INDEX, -- VFIO_PCI_NUM_IRQS -+ VFIO_PCI_NUM_IRQS = 5 /* Fixed user ABI, IRQ indexes >=5 use */ -+ /* device specific cap to define content */ - }; - - /* -@@ -985,6 +1023,68 @@ struct vfio_device_feature { - */ - #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) - -+/* -+ * Capability exposed by the DMA fault region -+ * @version: ABI version -+ */ -+#define VFIO_REGION_INFO_CAP_DMA_FAULT 6 -+ -+struct vfio_region_info_cap_fault { -+ struct vfio_info_cap_header header; -+ __u32 version; -+}; -+ -+/* -+ * Capability exposed by the DMA fault response region -+ * @version: ABI version -+ */ -+#define VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE 7 -+ -+struct vfio_region_info_cap_fault_response { -+ struct vfio_info_cap_header header; -+ __u32 version; -+}; -+ -+/* -+ * DMA Fault Region Layout -+ * @tail: index relative to the start of the ring buffer at which the -+ * consumer finds the next item in the buffer -+ * @entry_size: fault ring buffer entry size in bytes -+ * @nb_entries: max capacity of the fault ring buffer -+ * @offset: ring buffer offset relative to the start of the region -+ * @head: index relative to the start of the ring buffer at which the -+ * producer (kernel) inserts items into the buffers -+ */ -+struct vfio_region_dma_fault { -+ /* Write-Only */ -+ __u32 tail; -+ /* Read-Only */ -+ __u32 entry_size; -+ __u32 nb_entries; -+ __u32 offset; -+ __u32 head; -+}; -+ -+/* -+ * DMA Fault Response Region Layout -+ * @head: index relative to the start of the ring buffer at which the -+ * producer (userspace) insert responses into the buffer -+ * @entry_size: fault ring buffer entry size in bytes -+ * @nb_entries: max capacity of the fault ring buffer -+ * @offset: ring buffer offset relative to the start of the region -+ * @tail: index relative to the start of the ring buffer at which the -+ * consumer (kernel) finds the next item in the buffer -+ */ -+struct vfio_region_dma_fault_response { -+ /* Write-Only */ -+ __u32 head; -+ /* Read-Only */ -+ __u32 entry_size; -+ __u32 nb_entries; -+ __u32 offset; -+ __u32 tail; -+}; -+ - /* -------- API for Type1 VFIO IOMMU -------- */ - - /** -@@ -1049,6 +1149,21 @@ struct vfio_iommu_type1_info_cap_migration { - __u64 max_dirty_bitmap_size; /* in bytes */ - }; - -+/* -+ * The DMA available capability allows to report the current number of -+ * simultaneously outstanding DMA mappings that are allowed. -+ * -+ * The structure below defines version 1 of this capability. -+ * -+ * avail: specifies the current number of outstanding DMA mappings allowed. -+ */ -+#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 -+ -+struct vfio_iommu_type1_info_dma_avail { -+ struct vfio_info_cap_header header; -+ __u32 avail; -+}; -+ - #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) - - /** -@@ -1072,7 +1187,7 @@ struct vfio_iommu_type1_dma_map { - struct vfio_bitmap { - __u64 pgsize; /* page size for bitmap in bytes */ - __u64 size; /* in bytes */ -- __u64 *data; /* one bit per page */ -+ __u64 *data; /* one bit per page */ - }; - - /** -@@ -1188,6 +1303,134 @@ struct vfio_iommu_type1_dirty_bitmap_get { - - #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) - -+/* -+ * VFIO_IOMMU_BIND_PROCESS -+ * -+ * Allocate a PASID for a process address space, and use it to attach this -+ * process to all devices in the container. Devices can then tag their DMA -+ * traffic with the returned @pasid to perform transactions on the associated -+ * virtual address space. Mapping and unmapping buffers is performed by standard -+ * functions such as mmap and malloc. -+ * -+ * If flag is VFIO_IOMMU_BIND_PID, @pid contains the pid of a foreign process to -+ * bind. Otherwise the current task is bound. Given that the caller owns the -+ * device, setting this flag grants the caller read and write permissions on the -+ * entire address space of foreign process described by @pid. Therefore, -+ * permission to perform the bind operation on a foreign process is governed by -+ * the ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check. See man ptrace(2) -+ * for more information. -+ * -+ * On success, VFIO writes a Process Address Space ID (PASID) into @pasid. This -+ * ID is unique to a process and can be used on all devices in the container. -+ * -+ * On fork, the child inherits the device fd and can use the bonds setup by its -+ * parent. Consequently, the child has R/W access on the address spaces bound by -+ * its parent. After an execv, the device fd is closed and the child doesn't -+ * have access to the address space anymore. -+ * -+ * To remove a bond between process and container, VFIO_IOMMU_UNBIND ioctl is -+ * issued with the same parameters. If a pid was specified in VFIO_IOMMU_BIND, -+ * it should also be present for VFIO_IOMMU_UNBIND. Otherwise unbind the current -+ * task from the container. -+ */ -+struct vfio_iommu_type1_bind_process { -+ __u32 flags; -+#define VFIO_IOMMU_BIND_PID (1 << 0) -+ __u32 pasid; -+ __s32 pid; -+}; -+ -+/* -+ * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes -+ * vfio_iommu_type1_bind_process in data. -+ */ -+struct vfio_iommu_type1_bind { -+ __u32 argsz; -+ __u32 flags; -+#define VFIO_IOMMU_BIND_PROCESS (1 << 0) -+ __u8 data[]; -+}; -+ -+/* -+ * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct vfio_iommu_bind) -+ * -+ * Manage address spaces of devices in this container. Initially a TYPE1 -+ * container can only have one address space, managed with -+ * VFIO_IOMMU_MAP/UNMAP_DMA. -+ * -+ * An IOMMU of type VFIO_TYPE1_NESTING_IOMMU can be managed by both MAP/UNMAP -+ * and BIND ioctls at the same time. MAP/UNMAP acts on the stage-2 (host) page -+ * tables, and BIND manages the stage-1 (guest) page tables. Other types of -+ * IOMMU may allow MAP/UNMAP and BIND to coexist, where MAP/UNMAP controls -+ * non-PASID traffic and BIND controls PASID traffic. But this depends on the -+ * underlying IOMMU architecture and isn't guaranteed. -+ * -+ * Availability of this feature depends on the device, its bus, the underlying -+ * IOMMU and the CPU architecture. -+ * -+ * returns: 0 on success, -errno on failure. -+ */ -+#define VFIO_IOMMU_BIND _IO(VFIO_TYPE, VFIO_BASE + 22) -+ -+/* -+ * VFIO_IOMMU_UNBIND - _IOWR(VFIO_TYPE, VFIO_BASE + 23, struct vfio_iommu_bind) -+ * -+ * Undo what was done by the corresponding VFIO_IOMMU_BIND ioctl. -+ */ -+#define VFIO_IOMMU_UNBIND _IO(VFIO_TYPE, VFIO_BASE + 23) -+ -+/* -+ * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 18, -+ * struct vfio_iommu_type1_set_pasid_table) -+ * -+ * The SET operation passes a PASID table to the host while the -+ * UNSET operation detaches the one currently programmed. It is -+ * allowed to "SET" the table several times without unsetting as -+ * long as the table config does not stay IOMMU_PASID_CONFIG_TRANSLATE. -+ */ -+struct vfio_iommu_type1_set_pasid_table { -+ __u32 argsz; -+ __u32 flags; -+#define VFIO_PASID_TABLE_FLAG_SET (1 << 0) -+#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1) -+ struct iommu_pasid_table_config config; /* used on SET */ -+}; -+ -+#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 18) -+ -+/** -+ * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, -+ * struct vfio_iommu_type1_cache_invalidate) -+ * -+ * Propagate guest IOMMU cache invalidation to the host. -+ */ -+struct vfio_iommu_type1_cache_invalidate { -+ __u32 argsz; -+ __u32 flags; -+ struct iommu_cache_invalidate_info info; -+}; -+#define VFIO_IOMMU_CACHE_INVALIDATE _IO(VFIO_TYPE, VFIO_BASE + 19) -+ -+/** -+ * VFIO_IOMMU_SET_MSI_BINDING - _IOWR(VFIO_TYPE, VFIO_BASE + 20, -+ * struct vfio_iommu_type1_set_msi_binding) -+ * -+ * Pass a stage 1 MSI doorbell mapping to the host so that this -+ * latter can build a nested stage2 mapping. Or conversely tear -+ * down a previously bound stage 1 MSI binding. -+ */ -+struct vfio_iommu_type1_set_msi_binding { -+ __u32 argsz; -+ __u32 flags; -+#define VFIO_IOMMU_BIND_MSI (1 << 0) -+#define VFIO_IOMMU_UNBIND_MSI (1 << 1) -+ __u64 iova; /* MSI guest IOVA */ -+ /* Fields below are used on BIND */ -+ __u64 gpa; /* MSI guest physical address */ -+ __u64 size; /* size of stage1 mapping (bytes) */ -+}; -+#define VFIO_IOMMU_SET_MSI_BINDING _IO(VFIO_TYPE, VFIO_BASE + 20) -+ - /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ - - /* --- -2.27.0 - diff --git a/vga-Force-full-update-for-CSV3-guest.patch b/vga-Force-full-update-for-CSV3-guest.patch new file mode 100644 index 0000000000000000000000000000000000000000..3c7e89e03d3144c57cc84fe7e4b876e84f13fd37 --- /dev/null +++ b/vga-Force-full-update-for-CSV3-guest.patch @@ -0,0 +1,129 @@ +From b791d13a0630e6640b3c39dc90671a2150734a24 Mon Sep 17 00:00:00 2001 +From: Xin Jiang +Date: Thu, 13 Jul 2023 09:35:10 +0800 +Subject: [PATCH] vga: Force full update for CSV3 guest + +As CSV3's NPT(nested page table) is managed by firmware, VMM is hard +to track the dirty pages of vga buffer. Although VMM could perform +a command to firmware to update read/write attribute of vga buffer +in NPT, it costs more time due to communication between VMM and +firmware. So the simplest method is to fully update vga buffer +always. + +Signed-off-by: Xin Jiang +Signed-off-by: hanliyang +--- + accel/kvm/kvm-all.c | 1 + + accel/stubs/kvm-stub.c | 2 ++ + hw/display/vga.c | 7 +++++++ + include/sysemu/kvm.h | 8 ++++++++ + target/i386/csv.c | 3 +++ + 5 files changed, 21 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 8077630825..8028caddf9 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -103,6 +103,7 @@ bool kvm_allowed; + bool kvm_readonly_mem_allowed; + bool kvm_vm_attributes_allowed; + bool kvm_msi_use_devid; ++bool kvm_csv3_allowed; + bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index ad39a434c4..b071afee45 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -27,6 +27,8 @@ bool kvm_msi_use_devid; + + bool virtcca_cvm_allowed; + ++bool kvm_csv3_allowed; ++ + void kvm_flush_coalesced_mmio_buffer(void) + { + } +diff --git a/hw/display/vga.c b/hw/display/vga.c +index cb6b6ee2ca..3f1358676b 100644 +--- a/hw/display/vga.c ++++ b/hw/display/vga.c +@@ -39,6 +39,8 @@ + #include "migration/vmstate.h" + #include "trace.h" + ++#include "sysemu/kvm.h" ++ + //#define DEBUG_VGA_MEM + //#define DEBUG_VGA_REG + +@@ -1790,6 +1792,11 @@ static void vga_update_display(void *opaque) + s->cursor_blink_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + full_update = 1; + } ++ ++ /* Force to full update in CSV guest. */ ++ if (kvm_csv3_enabled()) ++ full_update = 1; ++ + switch(graphic_mode) { + case GMODE_TEXT: + vga_draw_text(s, full_update); +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 31af5f0e24..fd8634cc8f 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -44,6 +44,7 @@ extern bool kvm_gsi_routing_allowed; + extern bool kvm_gsi_direct_mapping; + extern bool kvm_readonly_mem_allowed; + extern bool kvm_msi_use_devid; ++extern bool kvm_csv3_allowed; + + #define kvm_enabled() (kvm_allowed) + #define virtcca_cvm_enabled() (virtcca_cvm_allowed) +@@ -147,6 +148,12 @@ extern bool kvm_msi_use_devid; + */ + #define kvm_msi_devid_required() (kvm_msi_use_devid) + ++/** ++ * kvm_csv3_enabled: ++ * Returns: true if CSV3 feature is used for the VM. ++ */ ++#define kvm_csv3_enabled() (kvm_csv3_allowed) ++ + #else + + #define kvm_enabled() (0) +@@ -163,6 +170,7 @@ extern bool kvm_msi_use_devid; + #define kvm_gsi_direct_mapping() (false) + #define kvm_readonly_mem_enabled() (false) + #define kvm_msi_devid_required() (false) ++#define kvm_csv3_enabled() (false) + + #endif /* CONFIG_KVM_IS_POSSIBLE */ + +diff --git a/target/i386/csv.c b/target/i386/csv.c +index 12282ba451..65d87de003 100644 +--- a/target/i386/csv.c ++++ b/target/i386/csv.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qemu/error-report.h" + #include "qapi/error.h" ++#include "sysemu/kvm.h" + + #include + +@@ -60,6 +61,8 @@ csv3_init(uint32_t policy, int fd, void *state, struct sev_ops *ops) + return -1; + } + ++ kvm_csv3_allowed = true; ++ + csv3_guest.sev_fd = fd; + csv3_guest.state = state; + csv3_guest.sev_ioctl = ops->sev_ioctl; +-- +2.41.0.windows.1 + diff --git a/vhost-Add-names-to-section-rounded-warning.patch b/vhost-Add-names-to-section-rounded-warning.patch deleted file mode 100644 index 09c36eda5b8f7063d3543ec1adab3349bf87ba7b..0000000000000000000000000000000000000000 --- a/vhost-Add-names-to-section-rounded-warning.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 437a9d2c7e48495ffc467808eece045579956c79 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 16 Jan 2020 20:24:13 +0000 -Subject: [PATCH] vhost: Add names to section rounded warning - -Add the memory region names to section rounding/alignment -warnings. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/virtio/vhost.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 9c16f0d107..ae61c33c15 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -591,9 +591,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, - * match up in the same RAMBlock if they do. - */ - if (mrs_gpa < prev_gpa_start) { -- error_report("%s:Section rounded to %"PRIx64 -- " prior to previous %"PRIx64, -- __func__, mrs_gpa, prev_gpa_start); -+ error_report("%s:Section '%s' rounded to %"PRIx64 -+ " prior to previous '%s' %"PRIx64, -+ __func__, section->mr->name, mrs_gpa, -+ prev_sec->mr->name, prev_gpa_start); - /* A way to cleanly fail here would be better */ - return; - } --- -2.27.0 - diff --git a/vhost-Fix-memory-region-section-comparison.patch b/vhost-Fix-memory-region-section-comparison.patch deleted file mode 100644 index f96bbb3795f3b4e8ee72bc8b6c6dd2ccd91aee18..0000000000000000000000000000000000000000 --- a/vhost-Fix-memory-region-section-comparison.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 8df0aa6ebb27fcf535a7d28cfdc006cd9a34a041 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 14 Aug 2019 18:55:35 +0100 -Subject: [PATCH] vhost: Fix memory region section comparison - -Using memcmp to compare structures wasn't safe, -as I found out on ARM when I was getting falce miscompares. - -Use the helper function for comparing the MRSs. - -Fixes: ade6d081fc33948e56e6 ("vhost: Regenerate region list from changed sections list") -Cc: qemu-stable@nongnu.org -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20190814175535.2023-4-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3fc4a64cbaed2ddee4c60ddc06740b320e18ab82) -Signed-off-by: Michael Roth ---- - hw/virtio/vhost.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 6d3a013..221e635 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -451,8 +451,13 @@ static void vhost_commit(MemoryListener *listener) - changed = true; - } else { - /* Same size, lets check the contents */ -- changed = n_old_sections && memcmp(dev->mem_sections, old_sections, -- n_old_sections * sizeof(old_sections[0])) != 0; -+ for (int i = 0; i < n_old_sections; i++) { -+ if (!MemoryRegionSection_eq(&old_sections[i], -+ &dev->mem_sections[i])) { -+ changed = true; -+ break; -+ } -+ } - } - - trace_vhost_commit(dev->started, changed); --- -1.8.3.1 - diff --git a/vhost-add-vhost_dev_suspend-resume_op.patch b/vhost-add-vhost_dev_suspend-resume_op.patch new file mode 100644 index 0000000000000000000000000000000000000000..c400fb2e0705786be85cdc2dffd9246b2af30b0c --- /dev/null +++ b/vhost-add-vhost_dev_suspend-resume_op.patch @@ -0,0 +1,38 @@ +From b0a62a84bd1c6ad5d4c11463371fcf267b56d902 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:13:41 +0800 +Subject: [PATCH] vhost: add vhost_dev_suspend/resume_op + +Introduce new vhost interface to support vhost device suspend & resume + +Signed-off-by: libai +--- + include/hw/virtio/vhost-backend.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index 71b02e4a12..84b8fa1075 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -155,6 +155,9 @@ typedef int (*vhost_set_device_state_fd_op)(struct vhost_dev *dev, + Error **errp); + typedef int (*vhost_check_device_state_op)(struct vhost_dev *dev, Error **errp); + ++typedef int (*vhost_dev_suspend_op)(struct vhost_dev *dev); ++typedef int (*vhost_dev_resume_op)(struct vhost_dev *dev); ++ + typedef struct VhostOps { + VhostBackendType backend_type; + vhost_backend_init vhost_backend_init; +@@ -208,6 +211,8 @@ typedef struct VhostOps { + vhost_supports_device_state_op vhost_supports_device_state; + vhost_set_device_state_fd_op vhost_set_device_state_fd; + vhost_check_device_state_op vhost_check_device_state; ++ vhost_dev_suspend_op vhost_dev_suspend; ++ vhost_dev_resume_op vhost_dev_resume; + } VhostOps; + + int vhost_backend_update_device_iotlb(struct vhost_dev *dev, +-- +2.27.0 + diff --git a/vhost-cancel-migration-when-vhost-user-restarted.patch b/vhost-cancel-migration-when-vhost-user-restarted-dur.patch similarity index 51% rename from vhost-cancel-migration-when-vhost-user-restarted.patch rename to vhost-cancel-migration-when-vhost-user-restarted-dur.patch index 38557753ce28b7120f23d21bc4f87fae22160d44..b551e4b014b4707df959b2acb42517ca6c5e3d31 100644 --- a/vhost-cancel-migration-when-vhost-user-restarted.patch +++ b/vhost-cancel-migration-when-vhost-user-restarted-dur.patch @@ -1,5 +1,5 @@ -From 750328e01afe4776eaddacde406063978dbf1291 Mon Sep 17 00:00:00 2001 -From: Ying Fang +From 302401ee7eb437712b69caff44ce684c88573dc6 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng Date: Mon, 29 Jul 2019 16:22:12 +0800 Subject: [PATCH] vhost: cancel migration when vhost-user restarted during migraiton @@ -11,24 +11,25 @@ lost. Let's cancel migraiton and report it to user in this abnormal situation. Signed-off-by: Ying Fang -Reviewed-by: Gonglei --- - hw/virtio/vhost.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) + hw/virtio/vhost.c | 9 +++++++-- + migration/migration.c | 2 +- + migration/migration.h | 1 + + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 7f61018f..f302c506 100644 +index 2c9ac79468..a8adc149ad 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -26,6 +26,7 @@ - #include "hw/virtio/virtio-bus.h" - #include "hw/virtio/virtio-access.h" + #include "hw/mem/memory-device.h" #include "migration/blocker.h" + #include "migration/qemu-file-types.h" +#include "migration/migration.h" #include "sysemu/dma.h" #include "trace.h" -@@ -808,20 +809,24 @@ static int vhost_migration_log(MemoryListener *listener, int enable) +@@ -1047,20 +1048,24 @@ check_dev_state: static void vhost_log_global_start(MemoryListener *listener) { int r; @@ -55,6 +56,31 @@ index 7f61018f..f302c506 100644 } } +diff --git a/migration/migration.c b/migration/migration.c +index 3ce04b2aaf..71a03b3248 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1377,7 +1377,7 @@ static void migrate_error_free(MigrationState *s) + } + } + +-static void migrate_fd_error(MigrationState *s, const Error *error) ++void migrate_fd_error(MigrationState *s, const Error *error) + { + trace_migrate_fd_error(error_get_pretty(error)); + assert(s->to_dst_file == NULL); +diff --git a/migration/migration.h b/migration/migration.h +index cf2c9c88e0..6aafa04314 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -482,6 +482,7 @@ bool migration_has_all_channels(void); + + uint64_t migrate_max_downtime(void); + ++void migrate_fd_error(MigrationState *s, const Error *error); + void migrate_set_error(MigrationState *s, const Error *error); + bool migrate_has_error(MigrationState *s); + -- -2.19.1 +2.27.0 diff --git a/vhost-implement-migration-state-notifier-for-vdpa-de.patch b/vhost-implement-migration-state-notifier-for-vdpa-de.patch new file mode 100644 index 0000000000000000000000000000000000000000..16d939032c5859f2e774abe3242356feb6dd057d --- /dev/null +++ b/vhost-implement-migration-state-notifier-for-vdpa-de.patch @@ -0,0 +1,87 @@ +From 3ef6dc341d6921a95564e9089f41ddbd79cd2a94 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:55:53 +0800 +Subject: [PATCH] vhost: implement migration state notifier for vdpa device + +Register migration state notifier to support triggered by +migration exceptions + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 29 +++++++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev.h | 1 + + 2 files changed, 30 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1872f11f3f..9b47e3ed45 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -23,6 +23,7 @@ + #include "hw/virtio/virtio-bus.h" + #include "migration/register.h" + #include "migration/migration.h" ++#include "migration/misc.h" + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/qemu-file-types.h" +@@ -354,6 +355,31 @@ static SaveVMHandlers savevm_vdpa_handlers = { + .load_setup = vdpa_load_setup, + }; + ++static void vdpa_migration_state_notifier(Notifier *notifier, void *data) ++{ ++ MigrationState *s = data; ++ VhostVdpaDevice *vdev = container_of(notifier, ++ VhostVdpaDevice, ++ migration_state); ++ struct vhost_dev *hdev = &vdev->dev; ++ int ret; ++ ++ switch (s->state) { ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_CANCELLED: ++ case MIGRATION_STATUS_FAILED: ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_CANCEL); ++ if (ret) { ++ error_report("Failed to set state CANCEL\n"); ++ } ++ ++ break; ++ case MIGRATION_STATUS_COMPLETED: ++ default: ++ break; ++ } ++} ++ + void vdpa_migration_register(VhostVdpaDevice *vdev) + { + vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), +@@ -361,10 +387,13 @@ void vdpa_migration_register(VhostVdpaDevice *vdev) + DEVICE(vdev)); + register_savevm_live("vdpa", -1, 1, + &savevm_vdpa_handlers, DEVICE(vdev)); ++ vdev->migration_state.notify = vdpa_migration_state_notifier; ++ migration_add_notifier(&vdev->migration_state, vdpa_migration_state_notifier); + } + + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { ++ migration_remove_notifier(&vdev->migration_state); + unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 43cbcef81b..20f50c76c6 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -39,6 +39,7 @@ struct VhostVdpaDevice { + bool started; + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; ++ Notifier migration_state; + }; + + #endif +-- +2.27.0 + diff --git a/vhost-implement-post-resume-bh.patch b/vhost-implement-post-resume-bh.patch new file mode 100644 index 0000000000000000000000000000000000000000..1da1164301cc664adbbcd67dc05d6ba9d66ff6dd --- /dev/null +++ b/vhost-implement-post-resume-bh.patch @@ -0,0 +1,57 @@ +From 229737ca91d4e81b4a14143da9981bd59b80a539 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:57:35 +0800 +Subject: [PATCH] vhost: implement post resume bh + +Set vdpa device mig state to post start when vm post start + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 662d4a29dc..1872f11f3f 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -26,6 +26,7 @@ + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/qemu-file-types.h" ++#include "qemu/main-loop.h" + + /* + * Flags used as delimiter: +@@ -218,6 +219,18 @@ err_host_notifiers: + return ret; + } + ++static void vdpa_dev_migration_handle_incoming_bh(void *opaque) ++{ ++ struct vhost_dev *hdev = opaque; ++ int ret; ++ ++ /* Post start device, unsupport rollback if failed! */ ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_POST_START); ++ if (ret) { ++ error_report("Failed to set state: POST_START\n"); ++ } ++} ++ + static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + { + VhostVdpaDevice *vdpa = VHOST_VDPA_DEVICE(opaque); +@@ -247,6 +260,10 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + + if (mis->state == RUN_STATE_RESTORE_VM) { + vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ /* post resume */ ++ mis->bh = qemu_bh_new(vdpa_dev_migration_handle_incoming_bh, ++ hdev); ++ qemu_bh_schedule(mis->bh); + } + } + } +-- +2.27.0 + diff --git a/vhost-implement-savevm_handler-for-vdpa-device.patch b/vhost-implement-savevm_handler-for-vdpa-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..721636ac0092081615e33da62edb6717e39b63e6 --- /dev/null +++ b/vhost-implement-savevm_handler-for-vdpa-device.patch @@ -0,0 +1,270 @@ +From 556aaa9632862505548d5083d369e92590fb2087 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:53:28 +0800 +Subject: [PATCH] vhost: implement savevm_handler for vdpa device + +Register savevm_handler ops for vdpa devices to support migration:x + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 175 +++++++++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev-mig.h | 13 +++ + linux-headers/linux/vhost.h | 9 ++ + 3 files changed, 197 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1d2bed2571..662d4a29dc 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -21,9 +21,21 @@ + #include "hw/virtio/vhost.h" + #include "hw/virtio/vdpa-dev.h" + #include "hw/virtio/virtio-bus.h" ++#include "migration/register.h" + #include "migration/migration.h" + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" ++#include "migration/qemu-file-types.h" ++ ++/* ++ * Flags used as delimiter: ++ * 0xffffffff => MSB 32-bit all 1s ++ * 0xef10 => emulated (virtual) function IO ++ * 0x0000 => 16-bits reserved for flags ++ */ ++#define VDPA_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) ++#define VDPA_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) ++#define VDPA_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + + static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +@@ -39,6 +51,80 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, + return ioctl(fd, request, arg); + } + ++static int vhost_vdpa_set_mig_state(struct vhost_dev *dev, uint8_t state) ++{ ++ return vhost_vdpa_call(dev, VHOST_VDPA_SET_MIG_STATE, &state); ++} ++ ++static int vhost_vdpa_dev_buffer_size(struct vhost_dev *dev, uint32_t *size) ++{ ++ return vhost_vdpa_call(dev, VHOST_GET_DEV_BUFFER_SIZE, size); ++} ++ ++static int vhost_vdpa_dev_buffer_save(struct vhost_dev *dev, QEMUFile *f) ++{ ++ struct vhost_vdpa_config *config; ++ unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); ++ uint32_t buffer_size = 0; ++ int ret; ++ ++ ret = vhost_vdpa_dev_buffer_size(dev, &buffer_size); ++ if (ret) { ++ error_report("get dev buffer size failed: %d\n", ret); ++ return ret; ++ } ++ ++ qemu_put_be32(f, buffer_size); ++ ++ config = g_malloc(buffer_size + config_size); ++ config->off = 0; ++ config->len = buffer_size; ++ ++ ret = vhost_vdpa_call(dev, VHOST_GET_DEV_BUFFER, config); ++ if (ret) { ++ error_report("get dev buffer failed: %d\n", ret); ++ goto free; ++ } ++ ++ qemu_put_buffer(f, config->buf, buffer_size); ++free: ++ g_free(config); ++ ++ return ret; ++} ++ ++static int vhost_vdpa_dev_buffer_load(struct vhost_dev *dev, QEMUFile *f) ++{ ++ struct vhost_vdpa_config *config; ++ unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); ++ uint32_t buffer_size, recv_size; ++ int ret; ++ ++ buffer_size = qemu_get_be32(f); ++ ++ config = g_malloc(buffer_size + config_size); ++ config->off = 0; ++ config->len = buffer_size; ++ ++ recv_size = qemu_get_buffer(f, config->buf, buffer_size); ++ if (recv_size != buffer_size) { ++ error_report("read dev mig buffer failed, buffer_size: %u, " ++ "recv_size: %u\n", buffer_size, recv_size); ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ ret = vhost_vdpa_call(dev, VHOST_SET_DEV_BUFFER, config); ++ if (ret) { ++ error_report("set dev buffer failed: %d\n", ret); ++ } ++ ++free: ++ g_free(config); ++ ++ return ret; ++} ++ + static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + { + VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); +@@ -165,14 +251,103 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + } + } + ++static int vdpa_save_setup(QEMUFile *f, void *opaque) ++{ ++ qemu_put_be64(f, VDPA_MIG_FLAG_DEV_SETUP_STATE); ++ qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); ++ ++ return qemu_file_get_error(f); ++} ++ ++static int vdpa_save_complete_precopy(QEMUFile *f, void *opaque) ++{ ++ VhostVdpaDevice *vdev = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdev->dev; ++ int ret; ++ ++ qemu_put_be64(f, VDPA_MIG_FLAG_DEV_CONFIG_STATE); ++ ret = vhost_vdpa_dev_buffer_save(hdev, f); ++ if (ret) { ++ error_report("Save vdpa device buffer failed: %d\n", ret); ++ return ret; ++ } ++ qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); ++ ++ return qemu_file_get_error(f); ++} ++ ++static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) ++{ ++ VhostVdpaDevice *vdev = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdev->dev; ++ ++ int ret; ++ uint64_t data; ++ ++ data = qemu_get_be64(f); ++ while (data != VDPA_MIG_FLAG_END_OF_STATE) { ++ if (data == VDPA_MIG_FLAG_DEV_SETUP_STATE) { ++ data = qemu_get_be64(f); ++ if (data == VDPA_MIG_FLAG_END_OF_STATE) { ++ return 0; ++ } else { ++ error_report("SETUP STATE: EOS not found 0x%lx\n", data); ++ return -EINVAL; ++ } ++ } else if (data == VDPA_MIG_FLAG_DEV_CONFIG_STATE) { ++ ret = vhost_vdpa_dev_buffer_load(hdev, f); ++ if (ret) { ++ error_report("fail to restore device buffer.\n"); ++ return ret; ++ } ++ } ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ error_report("qemu file error: %d\n", ret); ++ return ret; ++ } ++ data = qemu_get_be64(f); ++ } ++ ++ return 0; ++} ++ ++static int vdpa_load_setup(QEMUFile *f, void *opaque) ++{ ++ VhostVdpaDevice *v = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &v->dev; ++ int ret = 0; ++ ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_PRE_START); ++ if (ret) { ++ error_report("pre start device failed: %d\n", ret); ++ goto out; ++ } ++ ++ return qemu_file_get_error(f); ++out: ++ return ret; ++} ++ ++static SaveVMHandlers savevm_vdpa_handlers = { ++ .save_setup = vdpa_save_setup, ++ .save_live_complete_precopy = vdpa_save_complete_precopy, ++ .load_state = vdpa_load_state, ++ .load_setup = vdpa_load_setup, ++}; ++ + void vdpa_migration_register(VhostVdpaDevice *vdev) + { + vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), + vdpa_dev_vmstate_change, + DEVICE(vdev)); ++ register_savevm_live("vdpa", -1, 1, ++ &savevm_vdpa_handlers, DEVICE(vdev)); + } + + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { ++ unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +diff --git a/include/hw/virtio/vdpa-dev-mig.h b/include/hw/virtio/vdpa-dev-mig.h +index 89665ca747..adc1d657f7 100644 +--- a/include/hw/virtio/vdpa-dev-mig.h ++++ b/include/hw/virtio/vdpa-dev-mig.h +@@ -9,6 +9,19 @@ + + #include "hw/virtio/vdpa-dev.h" + ++enum { ++ VDPA_DEVICE_START, ++ VDPA_DEVICE_STOP, ++ VDPA_DEVICE_PRE_START, ++ VDPA_DEVICE_PRE_STOP, ++ VDPA_DEVICE_CANCEL, ++ VDPA_DEVICE_POST_START, ++ VDPA_DEVICE_START_ASYNC, ++ VDPA_DEVICE_STOP_ASYNC, ++ VDPA_DEVICE_PRE_START_ASYNC, ++ VDPA_DEVICE_QUERY_OP_STATE, ++}; ++ + void vdpa_migration_register(VhostVdpaDevice *vdev); + + void vdpa_migration_unregister(VhostVdpaDevice *vdev); +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 19dc7fd36c..a08e980a1e 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -231,4 +231,13 @@ + */ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) ++ ++/* set and get device buffer */ ++#define VHOST_GET_DEV_BUFFER _IOR(VHOST_VIRTIO, 0xb0, struct vhost_vdpa_config) ++#define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) ++#define VHOST_GET_DEV_BUFFER_SIZE _IOR(VHOST_VIRTIO, 0xb3, __u32) ++ ++/* set device migtration state */ ++#define VHOST_VDPA_SET_MIG_STATE _IOW(VHOST_VIRTIO, 0xb2, __u8) ++ + #endif +-- +2.27.0 + diff --git a/vhost-implement-vhost-vdpa-suspend-resume.patch b/vhost-implement-vhost-vdpa-suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..620b963673bb22390b6f2d4bb36bbdfd3c060c53 --- /dev/null +++ b/vhost-implement-vhost-vdpa-suspend-resume.patch @@ -0,0 +1,80 @@ +From a7f9a67ee98a5261f7639619055034f40bccfef0 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:22:20 +0800 +Subject: [PATCH] vhost: implement vhost-vdpa suspend/resume + +vhost-vdpa implements the vhost_dev_suspend interface, +which will be called during the shutdown phase of the +live migration source virtual machine to suspend the +device but not reset the device information. + +vhost-vdpa implements the vhost_dev_resume interface. +If the live migration fails, it will be called during +the startup phase of the source virtual machine. +Enable the device but set the status, etc. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 037a9c6e4c..063e941544 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1513,6 +1513,45 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + return true; + } + ++static int vhost_vdpa_suspend_device(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ int ret; ++ ++ vhost_vdpa_svqs_stop(dev); ++ vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); ++ ++ if (dev->vq_index + dev->nvqs != dev->vq_index_end) { ++ return 0; ++ } ++ ++ ret = vhost_vdpa_call(dev, VHOST_VDPA_SUSPEND, NULL); ++ memory_listener_unregister(&v->listener); ++ return ret; ++} ++ ++static int vhost_vdpa_resume_device(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ bool ok; ++ ++ vhost_vdpa_host_notifiers_init(dev); ++ ok = vhost_vdpa_svqs_start(dev); ++ if (unlikely(!ok)) { ++ return -1; ++ } ++ for (int i = 0; i < v->dev->nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(v, v->dev->vq_index + i); ++ } ++ ++ if (dev->vq_index + dev->nvqs != dev->vq_index_end) { ++ return 0; ++ } ++ ++ memory_listener_register(&v->listener, &address_space_memory); ++ return vhost_vdpa_call(dev, VHOST_VDPA_RESUME, NULL); ++} ++ + static int vhost_vdpa_log_sync(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; +@@ -1559,4 +1598,6 @@ const VhostOps vdpa_ops = { + .vhost_log_sync = vhost_vdpa_log_sync, + .vhost_set_config_call = vhost_vdpa_set_config_call, + .vhost_reset_status = vhost_vdpa_reset_status, ++ .vhost_dev_suspend = vhost_vdpa_suspend_device, ++ .vhost_dev_resume = vhost_vdpa_resume_device, + }; +-- +2.27.0 + diff --git a/vhost-implement-vhost_vdpa_device_suspend-resume.patch b/vhost-implement-vhost_vdpa_device_suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9ef199773c3fe3c136ad0fc6d5d29b67b7ef2c9 --- /dev/null +++ b/vhost-implement-vhost_vdpa_device_suspend-resume.patch @@ -0,0 +1,447 @@ +From 4c5a9a0703e227186639124f09cdf7214e40ea7d Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:27:34 +0800 +Subject: [PATCH] vhost: implement vhost_vdpa_device_suspend/resume + +Implement vhost device suspend & resume interface + +Signed-off-by: jiangdongxu +Signed-off-by: fangyi +Signed-off-by: libai +--- + hw/virtio/meson.build | 2 +- + hw/virtio/vdpa-dev-mig.c | 178 +++++++++++++++++++++++++++++++ + hw/virtio/vhost.c | 138 ++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev-mig.h | 16 +++ + include/hw/virtio/vdpa-dev.h | 1 + + include/hw/virtio/vhost.h | 3 + + migration/migration.c | 3 +- + migration/migration.h | 2 + + 8 files changed, 340 insertions(+), 3 deletions(-) + create mode 100644 hw/virtio/vdpa-dev-mig.c + create mode 100644 include/hw/virtio/vdpa-dev-mig.h + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index c0055a7832..596651d113 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -5,7 +5,7 @@ system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c') + system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) + system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +-system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) ++system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c')) + + specific_virtio_ss = ss.source_set() + specific_virtio_ss.add(files('virtio.c')) +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +new file mode 100644 +index 0000000000..1d2bed2571 +--- /dev/null ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -0,0 +1,178 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, see . ++ */ ++ ++#include ++#include ++#include "qemu/osdep.h" ++#include "hw/virtio/vhost.h" ++#include "hw/virtio/vdpa-dev.h" ++#include "hw/virtio/virtio-bus.h" ++#include "migration/migration.h" ++#include "qemu/error-report.h" ++#include "hw/virtio/vdpa-dev-mig.h" ++ ++static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, ++ void *arg) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ int fd = v->device_fd; ++ ++ if (dev->vhost_ops->backend_type != VHOST_BACKEND_TYPE_VDPA) { ++ error_report("backend type isn't VDPA. Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ return ioctl(fd, request, arg); ++} ++ ++static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ int ret; ++ ++ if (!vdpa->started) { ++ return -EFAULT; ++ } ++ ++ if (!k->set_guest_notifiers) { ++ return -EFAULT; ++ } ++ ++ vdpa->started = false; ++ ++ ret = vhost_dev_suspend(&vdpa->dev, vdev, false); ++ if (ret) { ++ goto suspend_fail; ++ } ++ ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); ++ if (ret < 0) { ++ error_report("vhost guest notifier cleanup failed: %d\n", ret); ++ goto set_guest_notifiers_fail; ++ } ++ ++ vhost_dev_disable_notifiers(&vdpa->dev, vdev); ++ return ret; ++ ++set_guest_notifiers_fail: ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); ++ if (ret) { ++ error_report("vhost guest notifier restore failed: %d\n", ret); ++ } ++ ++suspend_fail: ++ vdpa->started = true; ++ return ret; ++} ++ ++static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ int i, ret; ++ ++ if (!k->set_guest_notifiers) { ++ error_report("binding does not support guest notifiers\n"); ++ return -ENOSYS; ++ } ++ ++ ret = vhost_dev_enable_notifiers(&vdpa->dev, vdev); ++ if (ret < 0) { ++ error_report("Error enabling host notifiers: %d\n", ret); ++ return ret; ++ } ++ ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); ++ if (ret < 0) { ++ error_report("Error binding guest notifier: %d\n", ret); ++ goto err_host_notifiers; ++ } ++ ++ vdpa->dev.acked_features = vdev->guest_features; ++ ++ ret = vhost_dev_resume(&vdpa->dev, vdev, false); ++ if (ret < 0) { ++ error_report("Error starting vhost: %d\n", ret); ++ goto err_guest_notifiers; ++ } ++ vdpa->started = true; ++ ++ /* ++ * guest_notifier_mask/pending not used yet, so just unmask ++ * everything here. virtio-pci will do the right thing by ++ * enabling/disabling irqfd. ++ */ ++ for (i = 0; i < vdpa->dev.nvqs; i++) { ++ vhost_virtqueue_mask(&vdpa->dev, vdev, i, false); ++ } ++ ++ return ret; ++ ++err_guest_notifiers: ++ k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); ++err_host_notifiers: ++ vhost_dev_disable_notifiers(&vdpa->dev, vdev); ++ return ret; ++} ++ ++static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) ++{ ++ VhostVdpaDevice *vdpa = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdpa->dev; ++ int ret; ++ MigrationState *ms = migrate_get_current(); ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++ if (!running) { ++ if (ms->state == RUN_STATE_PAUSED) { ++ ret = vhost_vdpa_device_suspend(vdpa); ++ if (ret) { ++ error_report("suspend vdpa device failed: %d\n", ret); ++ if (ms->migration_thread_running) { ++ migrate_fd_cancel(ms); ++ } ++ } ++ } ++ } else { ++ if (ms->state == RUN_STATE_RESTORE_VM) { ++ ret = vhost_vdpa_device_resume(vdpa); ++ if (ret) { ++ error_report("migration dest resume device failed, abort!\n"); ++ exit(EXIT_FAILURE); ++ } ++ } ++ ++ if (mis->state == RUN_STATE_RESTORE_VM) { ++ vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ } ++ } ++} ++ ++void vdpa_migration_register(VhostVdpaDevice *vdev) ++{ ++ vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), ++ vdpa_dev_vmstate_change, ++ DEVICE(vdev)); ++} ++ ++void vdpa_migration_unregister(VhostVdpaDevice *vdev) ++{ ++ qemu_del_vm_change_state_handler(vdev->vmstate); ++} +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 438182d850..d073a6d5a5 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -2492,3 +2492,141 @@ bool used_memslots_is_exceeded(void) + { + return used_memslots_exceeded; + } ++ ++int vhost_dev_resume(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) ++{ ++ int i, r; ++ EventNotifier *e = &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ ++ /* should only be called after backend is connected */ ++ if (!hdev->vhost_ops) { ++ error_report("Missing vhost_ops! Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ vdev->vhost_started = true; ++ hdev->started = true; ++ hdev->vdev = vdev; ++ ++ if (vhost_dev_has_iommu(hdev)) { ++ memory_listener_register(&hdev->iommu_listener, vdev->dma_as); ++ } ++ ++ r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed"); ++ goto fail_mem; ++ } ++ for (i = 0; i < hdev->nvqs; ++i) { ++ r = vhost_virtqueue_start(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ if (r < 0) { ++ goto fail_vq; ++ } ++ } ++ ++ r = event_notifier_init(e, 0); ++ if (r < 0) { ++ return r; ++ } ++ event_notifier_test_and_clear(e); ++ if (!vdev->use_guest_notifier_mask) { ++ vhost_config_mask(hdev, vdev, true); ++ } ++ if (vrings) { ++ r = vhost_dev_set_vring_enable(hdev, true); ++ if (r) { ++ goto fail_vq; ++ } ++ } ++ if (hdev->vhost_ops->vhost_dev_resume) { ++ r = hdev->vhost_ops->vhost_dev_resume(hdev); ++ if (r) { ++ goto fail_start; ++ } ++ } ++ if (vhost_dev_has_iommu(hdev)) { ++ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true); ++ ++ /* ++ * Update used ring information for IOTLB to work correctly, ++ * vhost-kernel code requires for this. ++ */ ++ for (i = 0; i < hdev->nvqs; ++i) { ++ struct vhost_virtqueue *vq = hdev->vqs + i; ++ vhost_device_iotlb_miss(hdev, vq->used_phys, true); ++ } ++ } ++ vhost_start_config_intr(hdev); ++ return 0; ++fail_start: ++ if (vrings) { ++ vhost_dev_set_vring_enable(hdev, false); ++ } ++fail_vq: ++ while (--i >= 0) { ++ vhost_virtqueue_stop(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ } ++ ++fail_mem: ++ vdev->vhost_started = false; ++ hdev->started = false; ++ return r; ++} ++ ++int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) ++{ ++ int i; ++ int ret = 0; ++ EventNotifier *e = &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ ++ /* should only be called after backend is connected */ ++ if (!hdev->vhost_ops) { ++ error_report("Missing vhost_ops! Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ event_notifier_test_and_clear(e); ++ event_notifier_test_and_clear(&vdev->config_notifier); ++ ++ if (hdev->vhost_ops->vhost_dev_suspend) { ++ ret = hdev->vhost_ops->vhost_dev_suspend(hdev); ++ if (ret) { ++ goto fail_suspend; ++ } ++ } ++ if (vrings) { ++ ret = vhost_dev_set_vring_enable(hdev, false); ++ if (ret) { ++ goto fail_suspend; ++ } ++ } ++ for (i = 0; i < hdev->nvqs; ++i) { ++ vhost_virtqueue_stop(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ } ++ ++ if (vhost_dev_has_iommu(hdev)) { ++ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); ++ memory_listener_unregister(&hdev->iommu_listener); ++ } ++ vhost_stop_config_intr(hdev); ++ vhost_log_put(hdev, true); ++ hdev->started = false; ++ vdev->vhost_started = false; ++ hdev->vdev = NULL; ++ ++ return ret; ++ ++fail_suspend: ++ event_notifier_test_and_clear(e); ++ ++ return ret; ++} +diff --git a/include/hw/virtio/vdpa-dev-mig.h b/include/hw/virtio/vdpa-dev-mig.h +new file mode 100644 +index 0000000000..89665ca747 +--- /dev/null ++++ b/include/hw/virtio/vdpa-dev-mig.h +@@ -0,0 +1,16 @@ ++/* ++ * Vhost Vdpa Device Migration Header ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All Rights Reserved. ++ */ ++ ++#ifndef _VHOST_VDPA_MIGRATION_H ++#define _VHOST_VDPA_MIGRATION_H ++ ++#include "hw/virtio/vdpa-dev.h" ++ ++void vdpa_migration_register(VhostVdpaDevice *vdev); ++ ++void vdpa_migration_unregister(VhostVdpaDevice *vdev); ++ ++#endif /* _VHOST_VDPA_MIGRATION_H */ +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 4dbf98195c..43cbcef81b 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -38,6 +38,7 @@ struct VhostVdpaDevice { + uint16_t queue_size; + bool started; + int (*post_init)(VhostVdpaDevice *v, Error **errp); ++ VMChangeStateEntry *vmstate; + }; + + #endif +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 6ae86833e3..9ca5819deb 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -466,4 +466,7 @@ int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + */ + int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + ++int vhost_dev_resume(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); ++int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index 23d9233bbe..dce22c2da5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -99,7 +99,6 @@ static bool migration_object_check(MigrationState *ms, Error **errp); + static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state); +-static void migrate_fd_cancel(MigrationState *s); + static bool close_return_path_on_source(MigrationState *s); + + static void migration_downtime_start(MigrationState *s) +@@ -1386,7 +1385,7 @@ void migrate_fd_error(MigrationState *s, const Error *error) + migrate_set_error(s, error); + } + +-static void migrate_fd_cancel(MigrationState *s) ++void migrate_fd_cancel(MigrationState *s) + { + int old_state ; + +diff --git a/migration/migration.h b/migration/migration.h +index 6aafa04314..2f26c9509b 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -551,4 +551,6 @@ void migration_rp_kick(MigrationState *s); + + int migration_stop_vm(RunState state); + ++void migrate_fd_cancel(MigrationState *s); ++ + #endif +-- +2.27.0 + diff --git a/vhost-introduce-bytemap-for-vhost-backend-logging.patch b/vhost-introduce-bytemap-for-vhost-backend-logging.patch new file mode 100644 index 0000000000000000000000000000000000000000..7293b3b13a637d96422a85a17e2fe52cea5cf825 --- /dev/null +++ b/vhost-introduce-bytemap-for-vhost-backend-logging.patch @@ -0,0 +1,304 @@ +From 962acd498b11ae5ccc040d76ec89990add119dec Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:09:26 +0800 +Subject: [PATCH] vhost: introduce bytemap for vhost backend logging + +As vhost backend may use bytemap for logging, when get log_size +of vhost device, check whether vhost device support VHOST_BACKEND_F_BYTEMAPLOG. +If vhost device support, use bytemap for logging. + +By the way, add log_resize func pointer check and vhost_log_sync return +value check. + +Signed-off-by: libai +--- + hw/virtio/vhost.c | 89 ++++++++++++++++++++++++++++++++++++--- + include/exec/memory.h | 9 ++++ + include/exec/ram_addr.h | 44 +++++++++++++++++++ + include/hw/virtio/vhost.h | 1 + + system/physmem.c | 11 +++++ + 5 files changed, 148 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 038ac37dd0..438182d850 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -29,6 +29,7 @@ + #include "migration/migration.h" + #include "sysemu/dma.h" + #include "trace.h" ++#include "qapi/qapi-commands-migration.h" + + /* enabled until disconnected backend stabilizes */ + #define _VHOST_DEBUG 1 +@@ -44,6 +45,11 @@ + do { } while (0) + #endif + ++static inline bool vhost_bytemap_log_support(struct vhost_dev *dev) ++{ ++ return (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG)); ++} ++ + static struct vhost_log *vhost_log; + static struct vhost_log *vhost_log_shm; + +@@ -232,12 +238,40 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + return 0; + } + ++static int vhost_sync_dirty_bytemap(struct vhost_dev *dev, ++ MemoryRegionSection *section) ++{ ++ unsigned long *bytemap = dev->log->log; ++ return memory_section_set_dirty_bytemap(section, bytemap); ++} ++ + static void vhost_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { + struct vhost_dev *dev = container_of(listener, struct vhost_dev, + memory_listener); +- vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); ++ MigrationState *ms = migrate_get_current(); ++ ++ if (!dev->log_enabled || !dev->started) { ++ return; ++ } ++ ++ if (dev->vhost_ops->vhost_log_sync) { ++ int r = dev->vhost_ops->vhost_log_sync(dev); ++ if (r < 0) { ++ error_report("Failed to sync dirty log: 0x%x\n", r); ++ if (migration_is_running(ms->state)) { ++ qmp_migrate_cancel(NULL); ++ } ++ return; ++ } ++ } ++ ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_sync_dirty_bytemap(dev, section); ++ } else { ++ vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); ++ } + } + + static void vhost_log_sync_range(struct vhost_dev *dev, +@@ -247,7 +281,11 @@ static void vhost_log_sync_range(struct vhost_dev *dev, + /* FIXME: this is N^2 in number of sections */ + for (i = 0; i < dev->n_mem_sections; ++i) { + MemoryRegionSection *section = &dev->mem_sections[i]; +- vhost_sync_dirty_bitmap(dev, section, first, last); ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_sync_dirty_bytemap(dev, section); ++ } else { ++ vhost_sync_dirty_bitmap(dev, section, first, last); ++ } + } + } + +@@ -255,11 +293,19 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) + { + uint64_t log_size = 0; + int i; ++ uint64_t vhost_log_chunk_size; ++ ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_log_chunk_size = VHOST_LOG_CHUNK_BYTES; ++ } else { ++ vhost_log_chunk_size = VHOST_LOG_CHUNK; ++ } ++ + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + uint64_t last = range_get_last(reg->guest_phys_addr, + reg->memory_size); +- log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); ++ log_size = MAX(log_size, last / vhost_log_chunk_size + 1); + } + return log_size; + } +@@ -377,12 +423,21 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) + dev->vhost_ops->vhost_requires_shm_log(dev); + } + +-static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) ++static inline int vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + { + struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); +- uint64_t log_base = (uintptr_t)log->log; ++ uint64_t log_base; ++ int log_fd; + int r; + ++ if (!log) { ++ r = -ENOMEM; ++ goto out; ++ } ++ ++ log_base = (uint64_t)log->log; ++ log_fd = log_fd; ++ + /* inform backend of log switching, this must be done before + releasing the current log, to ensure no logging is lost */ + r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log); +@@ -390,9 +445,19 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); + } + ++ if (dev->vhost_ops->vhost_set_log_size) { ++ r = dev->vhost_ops->vhost_set_log_size(dev, size, dev->log); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_log_size failed"); ++ } ++ } ++ + vhost_log_put(dev, true); + dev->log = log; + dev->log_size = size; ++ ++out: ++ return r; + } + + static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, +@@ -1018,7 +1083,11 @@ static int vhost_migration_log(MemoryListener *listener, bool enable) + } + vhost_log_put(dev, false); + } else { +- vhost_dev_log_resize(dev, vhost_get_log_size(dev)); ++ r = vhost_dev_log_resize(dev, vhost_get_log_size(dev)); ++ if ( r < 0 ) { ++ return r; ++ } ++ + r = vhost_dev_set_log(dev, true); + if (r < 0) { + goto check_dev_state; +@@ -2057,6 +2126,14 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); + goto fail_log; + } ++ ++ if (hdev->vhost_ops->vhost_set_log_size) { ++ r = hdev->vhost_ops->vhost_set_log_size(hdev, hdev->log_size, hdev->log); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_log_size failed"); ++ goto fail_log; ++ } ++ } + } + if (vrings) { + r = vhost_dev_set_vring_enable(hdev, true); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 831f7c996d..e131c2682c 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2594,6 +2594,15 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr, + MemOp op, + MemTxAttrs attrs); + ++/** ++ * memory_section_set_dirty_bytemap: Mark a range of bytes as dirty for a memory section ++ * using a bytemap ++ * ++ * @section: the memory section being dirtied. ++ * @bytemap: bytemap that stores dirty page range information. ++ */ ++int64_t memory_section_set_dirty_bytemap(MemoryRegionSection *section, unsigned long *bytemap); ++ + /** + * address_space_init: initializes an address space + * +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index 90676093f5..ef6988b445 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -535,5 +535,49 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, + + return num_dirty; + } ++ ++#define BYTES_PER_LONG (sizeof(unsigned long)) ++#define BYTE_WORD(nr) ((nr) / BYTES_PER_LONG) ++#define BYTES_TO_LONGS(nr) DIV_ROUND_UP(nr, BYTES_PER_LONG) ++ ++static inline int64_t _set_dirty_bytemap_atomic(unsigned long *bytemap, unsigned long cur_pfn) ++{ ++ char *byte_of_long = (char *)bytemap; ++ int i; ++ int64_t dirty_num = 0; ++ ++ for (i = 0; i < BYTES_PER_LONG; i++) { ++ if (byte_of_long[i]) { ++ cpu_physical_memory_set_dirty_range((cur_pfn + i) << TARGET_PAGE_BITS, ++ TARGET_PAGE_SIZE, ++ 1 << DIRTY_MEMORY_MIGRATION); ++ /* Per byte ops, no need to atomic_xchg */ ++ byte_of_long[i] = 0; ++ dirty_num++; ++ } ++ } ++ ++ return dirty_num; ++} ++ ++static inline int64_t cpu_physical_memory_set_dirty_bytemap(unsigned long *bytemap, ++ ram_addr_t start, ++ ram_addr_t pages) ++{ ++ unsigned long i; ++ unsigned long len = BYTES_TO_LONGS(pages); ++ unsigned long pfn = (start >> TARGET_PAGE_BITS) / ++ BYTES_PER_LONG * BYTES_PER_LONG; ++ int64_t dirty_mig_bits = 0; ++ ++ for (i = 0; i < len; i++) { ++ if (bytemap[i]) { ++ dirty_mig_bits += _set_dirty_bytemap_atomic(&bytemap[i], ++ pfn + BYTES_PER_LONG * i); ++ } ++ } ++ ++ return dirty_mig_bits; ++} + #endif + #endif +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 444ca0ad42..6ae86833e3 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -43,6 +43,7 @@ typedef unsigned long vhost_log_chunk_t; + #define VHOST_LOG_PAGE 0x1000 + #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) + #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) ++#define VHOST_LOG_CHUNK_BYTES (VHOST_LOG_PAGE * sizeof(vhost_log_chunk_t)) + #define VHOST_INVALID_FEATURE_BIT (0xff) + #define VHOST_QUEUE_NUM_CONFIG_INR 0 + +diff --git a/system/physmem.c b/system/physmem.c +index f14d64819b..247c252e53 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -2602,6 +2602,17 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, + cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); + } + ++int64_t memory_section_set_dirty_bytemap(MemoryRegionSection *section, unsigned long *bytemap) ++{ ++ ram_addr_t start = section->offset_within_region + ++ memory_region_get_ram_addr(section->mr); ++ ram_addr_t pages = int128_get64(section->size) >> TARGET_PAGE_BITS; ++ ++ hwaddr idx = BYTE_WORD( ++ section->offset_within_address_space >> TARGET_PAGE_BITS); ++ return cpu_physical_memory_set_dirty_bytemap(bytemap + idx, start, pages); ++} ++ + void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) + { + /* +-- +2.27.0 + diff --git a/vhost-user-Add-support-reconnect-vhost-user-socket.patch b/vhost-user-Add-support-reconnect-vhost-user-socket.patch new file mode 100644 index 0000000000000000000000000000000000000000..eec650e70d8b06bb45a20ad0504e803153bfa805 --- /dev/null +++ b/vhost-user-Add-support-reconnect-vhost-user-socket.patch @@ -0,0 +1,168 @@ +From 0bc608ab4117818b32d2a1aaf2d4f5c2aeb54af7 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:05:47 +0800 +Subject: [PATCH] vhost-user: Add support reconnect vhost-user socket + +Add support reconnect vhost-user socket, the reconnect time +is set to be 3 seconds. + +Signed-off-by: Jinhua Cao +--- + chardev/char-socket.c | 19 ++++++++++++++++++- + hw/net/vhost_net.c | 4 +++- + hw/virtio/vhost-user.c | 6 ++++++ + include/chardev/char.h | 16 ++++++++++++++++ + net/vhost-user.c | 3 +++ + 5 files changed, 46 insertions(+), 2 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 034840593d..9c60e15c8e 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -337,6 +337,22 @@ static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond) + return qio_channel_create_watch(s->ioc, cond); + } + ++static void tcp_chr_set_reconnect_time(Chardev *chr, ++ int64_t reconnect_time) ++{ ++ SocketChardev *s = SOCKET_CHARDEV(chr); ++ s->reconnect_time = reconnect_time; ++} ++ ++void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time) ++{ ++ ChardevClass *cc = CHARDEV_GET_CLASS(chr); ++ ++ if (cc->chr_set_reconnect_time) { ++ cc->chr_set_reconnect_time(chr, reconnect_time); ++ } ++} ++ + static void remove_hup_source(SocketChardev *s) + { + if (s->hup_source != NULL) { +@@ -537,7 +553,7 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len) + if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) { + qio_channel_set_blocking(s->ioc, false, NULL); + } +- if (size == 0) { ++ if (size == 0 && chr->chr_for_flag != CHR_FOR_VHOST_USER) { + /* connection closed */ + tcp_chr_disconnect(chr); + } +@@ -1543,6 +1559,7 @@ static void char_socket_class_init(ObjectClass *oc, void *data) + cc->set_msgfds = tcp_set_msgfds; + cc->chr_add_client = tcp_chr_add_client; + cc->chr_add_watch = tcp_chr_add_watch; ++ cc->chr_set_reconnect_time = tcp_chr_set_reconnect_time; + cc->chr_update_read_handler = tcp_chr_update_read_handler; + + object_class_property_add(oc, "addr", "SocketAddress", +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 1b08b02477..e48c373b14 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -459,7 +459,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + peer = qemu_get_peer(ncs, n->max_queue_pairs); + } + +- if (peer->vring_enable) { ++ /* ovs needs to restore all states of vring */ ++ if (peer->vring_enable || ++ ncs[i].peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { + /* restore vring enable state */ + r = vhost_set_vring_enable(peer, peer->vring_enable); + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index f214df804b..05e14e1eff 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -2126,9 +2126,15 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + struct vhost_user *u; + VhostUserState *vus = (VhostUserState *) opaque; + int err; ++ Chardev *chr; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + ++ chr = qemu_chr_fe_get_driver(((VhostUserState *)opaque)->chr); ++ if (chr) { ++ chr->chr_for_flag = CHR_FOR_VHOST_USER; ++ } ++ + u = g_new0(struct vhost_user, 1); + u->user = vus; + u->dev = dev; +diff --git a/include/chardev/char.h b/include/chardev/char.h +index 01df55f9e8..f8bd469466 100644 +--- a/include/chardev/char.h ++++ b/include/chardev/char.h +@@ -14,6 +14,8 @@ + #define IAC_SB 250 + #define IAC 255 + ++#define CHR_FOR_VHOST_USER 0x32a1 ++ + /* character device */ + typedef struct CharBackend CharBackend; + +@@ -70,6 +72,7 @@ struct Chardev { + GSource *gsource; + GMainContext *gcontext; + DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST); ++ int chr_for_flag; + }; + + /** +@@ -227,6 +230,16 @@ int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all); + #define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true) + int qemu_chr_wait_connected(Chardev *chr, Error **errp); + ++/** ++ * @qemu_chr_set_reconnect_time: ++ * ++ * Set reconnect time for char disconnect. ++ * Currently, only vhost user will call it. ++ * ++ * @reconnect_time the reconnect_time to be set ++ */ ++void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time); ++ + #define TYPE_CHARDEV "chardev" + OBJECT_DECLARE_TYPE(Chardev, ChardevClass, CHARDEV) + +@@ -306,6 +319,9 @@ struct ChardevClass { + + /* handle various events */ + void (*chr_be_event)(Chardev *s, QEMUChrEvent event); ++ ++ /* set reconnect time */ ++ void (*chr_set_reconnect_time)(Chardev *chr, int64_t reconnect_time); + }; + + Chardev *qemu_chardev_new(const char *id, const char *typename, +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 12555518e8..51fa8c678f 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -21,6 +21,8 @@ + #include "qemu/option.h" + #include "trace.h" + ++#define VHOST_USER_RECONNECT_TIME (3) ++ + typedef struct NetVhostUserState { + NetClientState nc; + CharBackend chr; /* only queue index 0 */ +@@ -292,6 +294,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) + trace_vhost_user_event(chr->label, event); + switch (event) { + case CHR_EVENT_OPENED: ++ qemu_chr_set_reconnect_time(chr, VHOST_USER_RECONNECT_TIME); + if (vhost_user_start(queues, ncs, s->vhost_user) < 0) { + qemu_chr_fe_disconnect(&s->chr); + return; +-- +2.27.0 + diff --git a/vhost-user-Print-unexpected-slave-message-types.patch b/vhost-user-Print-unexpected-slave-message-types.patch deleted file mode 100644 index 4287428e059d06ffaac516ed2c2aa83b4f5d4e98..0000000000000000000000000000000000000000 --- a/vhost-user-Print-unexpected-slave-message-types.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 6e084ff24ad73eb4f7541573c6097013f5b94959 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 7 Feb 2019 18:22:40 +0000 -Subject: [PATCH] vhost-user: Print unexpected slave message types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When we receive an unexpected message type on the slave fd, print -the type. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert ---- - hw/virtio/vhost-user.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 4ca5b2551e..f012774210 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -1054,7 +1054,7 @@ static void slave_read(void *opaque) - fd[0]); - break; - default: -- error_report("Received unexpected msg type."); -+ error_report("Received unexpected msg type: %d.", hdr.request); - ret = -EINVAL; - } - --- -2.27.0 - diff --git a/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch b/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch new file mode 100644 index 0000000000000000000000000000000000000000..24c2670561fc19f9c6c4e856c74c7bf9799c3f75 --- /dev/null +++ b/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch @@ -0,0 +1,96 @@ +From 0154183e118169be5945cb5ebec2b79379071591 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:49:21 +0800 +Subject: [PATCH] vhost-user: Set the acked_features to vm's featrue + +Fix the problem when vm restart, the ovs restart and lead to the net +unreachable. The soluation is set the acked_features to vm's featrue +just the same as guest virtio-net mod load. + +Signed-off-by: Jinhua Cao +--- + hw/net/vhost_net.c | 58 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 57 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index e8e1661646..1b08b02477 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -167,9 +167,26 @@ static int vhost_net_get_fd(NetClientState *backend) + } + } + ++static uint64_t vhost_get_mask_features(const int *feature_bits, uint64_t features) ++{ ++ const int *bit = feature_bits; ++ uint64_t out_features = 0; ++ ++ while (*bit != VHOST_INVALID_FEATURE_BIT) { ++ uint64_t bit_mask = (1ULL << *bit); ++ if (features & bit_mask) { ++ out_features |= bit_mask; ++ } ++ bit++; ++ } ++ return out_features; ++} ++ + struct vhost_net *vhost_net_init(VhostNetOptions *options) + { + int r; ++ VirtIONet *n; ++ VirtIODevice *vdev; + bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; + struct vhost_net *net = g_new0(struct vhost_net, 1); + uint64_t features = 0; +@@ -195,7 +212,46 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) + net->backend = r; + net->dev.protocol_features = 0; + } else { +- net->dev.backend_features = 0; ++ /* for ovs restart when vm start. ++ * Normal situation: ++ * 1.vm start. ++ * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. ++ * 3.guest virtio-net mod load. qemu will call virtio_net_set_features set ++ * dev.acked_features to 0x40408000. ++ * 4.feature set to ovs's vhostuser(0x40408000). ++ * 5.ovs restart. ++ * 6.vhost_user_stop will save net->dev.acked_features(0x40408000) to ++ * VhostUserState's acked_features(0x40408000). ++ * 7.restart ok. ++ * 8.vhost_net_init fun call vhost_user_get_acked_features get the save ++ * features, and set to net->dev.acked_features. ++ * Abnormal situation: ++ * 1.vm start. ++ * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. ++ * 3.ovs restart. ++ * 4.vhost_user_stop will save net->dev.acked_features(0x40000000) to ++ * VhostUserState's acked_features(0x40000000). ++ * 5.guest virtio-net mod load. qemu will call virtio_net_set_features set ++ * dev.acked_features to 0x40408000. ++ * 6.restart ok. ++ * 7.vhost_net_init fun call vhost_user_get_acked_features get the save ++ * features(0x40000000), and set to net->dev.acked_features(0x40000000). ++ * 8.feature set to ovs's vhostuser(0x40000000). ++ * ++ * in abnormal situation, qemu set the wrong features to ovs's vhostuser, ++ * then the vm's network will be down. ++ * in abnormal situation, we found it just lost the guest feartures in ++ * acked_features, so hear we set the acked_features to vm's featrue ++ * just the same as guest virtio-net mod load. ++ */ ++ if (options->net_backend->peer) { ++ n = qemu_get_nic_opaque(options->net_backend->peer); ++ vdev = VIRTIO_DEVICE(n); ++ net->dev.backend_features = vhost_get_mask_features(vhost_net_get_feature_bits(net), ++ vdev->guest_features); ++ } else { ++ net->dev.backend_features = 0; ++ } + net->dev.protocol_features = 0; + net->backend = -1; + +-- +2.27.0 + diff --git a/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch b/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch new file mode 100644 index 0000000000000000000000000000000000000000..558464ce14870d63b86b4685410cff74235a2865 --- /dev/null +++ b/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch @@ -0,0 +1,32 @@ +From c65ff10063a6c599b88cba27fd70a72e2e0cc0ff Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 20:21:33 +0800 +Subject: [PATCH] vhost-user: add unregister_savevm when vhost-user cleanup + +commit 12cf5e9ece ("vhost-user: add vhost_set_mem_table +when vm load_setup at destination") only register savevm +handler but not unregister it, which will cause the +number of handers increase when vhost-user devices hotplug, +so this commit add unregister_savevm when vhost-user cleanup. + +Fixes: 12cf5e9ece ("vhost-user: add vhost_set_mem_table when vm load_setup at destination") +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost-user.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 6739dfc98e..e589ee3572 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -2310,6 +2310,7 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) + u->region_rb_len = 0; + g_free(u); + dev->opaque = 0; ++ unregister_savevm(NULL, "vhost-user", dev); + + return 0; + } +-- +2.27.0 + diff --git a/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch b/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch new file mode 100644 index 0000000000000000000000000000000000000000..795185b2a1d522411e6b1bb5b127e56048afa7c1 --- /dev/null +++ b/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch @@ -0,0 +1,130 @@ +From 12cf5e9ece9cb0825f14ca80f6b1c5d1eb95c3e5 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:59:34 +0800 +Subject: [PATCH] vhost-user: add vhost_set_mem_table when vm load_setup at + destination + +When migrate huge vm, packages lost are 90+. + +During the load_setup of the destination vm, pass the +vm mem structure to ovs, the netcard could be enabled +when the migration finish state shifting. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost-user.c | 24 ++++++++++++++++++++++++ + tests/qtest/vhost-user-test.c | 35 ++++++++++++++++++----------------- + 2 files changed, 42 insertions(+), 17 deletions(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index f214df804b..6739dfc98e 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -28,6 +28,7 @@ + #include "sysemu/cryptodev.h" + #include "migration/migration.h" + #include "migration/postcopy-ram.h" ++#include "migration/register.h" + #include "trace.h" + #include "exec/ramblock.h" + +@@ -2119,6 +2120,28 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, + return 0; + } + ++static int vhost_user_load_setup(QEMUFile *f, void *opaque) ++{ ++ struct vhost_dev *hdev = opaque; ++ int r; ++ ++ if (hdev->vhost_ops && hdev->vhost_ops->vhost_set_mem_table) { ++ r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); ++ if (r < 0) { ++ qemu_log("error: vhost_set_mem_table failed: %s(%d)\n", ++ strerror(errno), errno); ++ return r; ++ } else { ++ qemu_log("info: vhost_set_mem_table OK\n"); ++ } ++ } ++ return 0; ++} ++ ++SaveVMHandlers savevm_vhost_user_handlers = { ++ .load_setup = vhost_user_load_setup, ++}; ++ + static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + Error **errp) + { +@@ -2255,6 +2278,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + + u->postcopy_notifier.notify = vhost_user_postcopy_notifier; + postcopy_add_notifier(&u->postcopy_notifier); ++ register_savevm_live("vhost-user", -1, 1, &savevm_vhost_user_handlers, dev); + + return 0; + } +diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c +index d4e437265f..fadf3f0f2e 100644 +--- a/tests/qtest/vhost-user-test.c ++++ b/tests/qtest/vhost-user-test.c +@@ -799,6 +799,23 @@ static void test_read_guest_mem(void *obj, void *arg, QGuestAllocator *alloc) + read_guest_mem_server(global_qtest, server); + } + ++static void wait_for_rings_started(TestServer *s, size_t count) ++{ ++ gint64 end_time; ++ ++ g_mutex_lock(&s->data_mutex); ++ end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; ++ while (ctpop64(s->rings) != count) { ++ if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) { ++ /* timeout has passed */ ++ g_assert_cmpint(ctpop64(s->rings), ==, count); ++ break; ++ } ++ } ++ ++ g_mutex_unlock(&s->data_mutex); ++} ++ + static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + { + TestServer *s = arg; +@@ -869,6 +886,7 @@ static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + qtest_qmp_eventwait(to, "RESUME"); + + g_assert(wait_for_fds(dest)); ++ wait_for_rings_started(dest, 2); + read_guest_mem_server(to, dest); + + g_source_destroy(source); +@@ -880,23 +898,6 @@ static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + g_string_free(dest_cmdline, true); + } + +-static void wait_for_rings_started(TestServer *s, size_t count) +-{ +- gint64 end_time; +- +- g_mutex_lock(&s->data_mutex); +- end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; +- while (ctpop64(s->rings) != count) { +- if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) { +- /* timeout has passed */ +- g_assert_cmpint(ctpop64(s->rings), ==, count); +- break; +- } +- } +- +- g_mutex_unlock(&s->data_mutex); +-} +- + static inline void test_server_connect(TestServer *server) + { + test_server_create_chr(server, ",reconnect=1"); +-- +2.27.0 + diff --git a/vhost-user-blk-convert-to-new-virtio_delete_queue.patch b/vhost-user-blk-convert-to-new-virtio_delete_queue.patch deleted file mode 100644 index 3f419966ec034fe4eecd8e6606b6e7dc611517e1..0000000000000000000000000000000000000000 --- a/vhost-user-blk-convert-to-new-virtio_delete_queue.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 30d20e1258722431198cd2a8298c85b7af2a0c1b Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Mon, 24 Feb 2020 12:13:36 +0800 -Subject: [PATCH 5/9] vhost-user-blk: convert to new virtio_delete_queue - -use the new virtio_delete_queue function to cleanup. - -Signed-off-by: Pan Nengyuan -Message-Id: <20200224041336.30790-3-pannengyuan@huawei.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: AlexChen ---- - hw/block/vhost-user-blk.c | 20 ++++++++++++-------- - include/hw/virtio/vhost-user-blk.h | 4 +++- - 2 files changed, 15 insertions(+), 9 deletions(-) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index dbc0a2e..146b927 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -303,7 +303,7 @@ static int vhost_user_blk_connect(DeviceState *dev) - s->connected = true; - - s->dev.nvqs = s->num_queues; -- s->dev.vqs = s->vqs; -+ s->dev.vqs = s->vhost_vqs; - s->dev.vq_index = 0; - s->dev.backend_features = 0; - -@@ -430,13 +430,15 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, - sizeof(struct virtio_blk_config)); - -+ s->virtqs = g_new(VirtQueue *, s->num_queues); - for (i = 0; i < s->num_queues; i++) { -- virtio_add_queue(vdev, s->queue_size, -- vhost_user_blk_handle_output); -+ s->virtqs[i] = virtio_add_queue(vdev, s->queue_size, -+ vhost_user_blk_handle_output); - } - - s->inflight = g_new0(struct vhost_inflight, 1); -- s->vqs = g_new(struct vhost_virtqueue, s->num_queues); -+ s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); -+ s->watch = 0; - s->connected = false; - - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, -@@ -467,11 +469,12 @@ reconnect: - return; - - virtio_err: -- g_free(s->vqs); -+ g_free(s->vhost_vqs); - g_free(s->inflight); - for (i = 0; i < s->num_queues; i++) { -- virtio_del_queue(vdev, i); -+ virtio_delete_queue(s->virtqs[i]); - } -+ g_free(s->virtqs); - virtio_cleanup(vdev); - vhost_user_cleanup(&s->vhost_user); - } -@@ -487,12 +490,13 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) - NULL, NULL, NULL, false); - vhost_dev_cleanup(&s->dev); - vhost_dev_free_inflight(s->inflight); -- g_free(s->vqs); -+ g_free(s->vhost_vqs); - g_free(s->inflight); - - for (i = 0; i < s->num_queues; i++) { -- virtio_del_queue(vdev, i); -+ virtio_delete_queue(s->virtqs[i]); - } -+ g_free(s->virtqs); - virtio_cleanup(vdev); - vhost_user_cleanup(&s->vhost_user); - } -diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h -index ad9b742..29375dd 100644 ---- a/include/hw/virtio/vhost-user-blk.h -+++ b/include/hw/virtio/vhost-user-blk.h -@@ -37,7 +37,9 @@ typedef struct VHostUserBlk { - struct vhost_dev dev; - struct vhost_inflight *inflight; - VhostUserState vhost_user; -- struct vhost_virtqueue *vqs; -+ struct vhost_virtqueue *vhost_vqs; -+ VirtQueue **virtqs; -+ guint watch; - bool connected; - } VHostUserBlk; - --- -1.8.3.1 - diff --git a/vhost-user-blk-delay-vhost_user_blk_disconnect.patch b/vhost-user-blk-delay-vhost_user_blk_disconnect.patch deleted file mode 100644 index 422e2a17b028d83690cc620a57829260c76aab52..0000000000000000000000000000000000000000 --- a/vhost-user-blk-delay-vhost_user_blk_disconnect.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 632a841b6ba547906b475250f5c2cb46774ab4af Mon Sep 17 00:00:00 2001 -From: Dima Stepanov -Date: Thu, 28 May 2020 12:11:19 +0300 -Subject: [PATCH 14/14] vhost-user-blk: delay vhost_user_blk_disconnect - -A socket write during vhost-user communication may trigger a disconnect -event, calling vhost_user_blk_disconnect() and clearing all the -vhost_dev structures holding data that vhost-user functions expect to -remain valid to roll back initialization correctly. Delay the cleanup to -keep vhost_dev structure valid. -There are two possible states to handle: -1. RUN_STATE_PRELAUNCH: skip bh oneshot call and perform disconnect in -the caller routine. -2. RUN_STATE_RUNNING: delay by using bh - -BH changes are based on the similar changes for the vhost-user-net -device: - commit e7c83a885f865128ae3cf1946f8cb538b63cbfba - "vhost-user: delay vhost_user_stop" - -Signed-off-by: Dima Stepanov -Message-Id: <69b73b94dcd066065595266c852810e0863a0895.1590396396.git.dimastep@yandex-team.ru> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Li Feng -Reviewed-by: Raphael Norwitz -Signed-off-by: Peng Liang ---- - hw/block/vhost-user-blk.c | 38 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 37 insertions(+), 1 deletion(-) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index dc66f8a5febd..6b719d1d80e1 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -346,6 +346,19 @@ static void vhost_user_blk_disconnect(DeviceState *dev) - vhost_dev_cleanup(&s->dev); - } - -+static void vhost_user_blk_event(void *opaque, int event); -+ -+static void vhost_user_blk_chr_closed_bh(void *opaque) -+{ -+ DeviceState *dev = opaque; -+ VirtIODevice *vdev = VIRTIO_DEVICE(dev); -+ VHostUserBlk *s = VHOST_USER_BLK(vdev); -+ -+ vhost_user_blk_disconnect(dev); -+ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, -+ NULL, opaque, NULL, true); -+} -+ - static void vhost_user_blk_event(void *opaque, int event) - { - DeviceState *dev = opaque; -@@ -360,7 +373,30 @@ static void vhost_user_blk_event(void *opaque, int event) - } - break; - case CHR_EVENT_CLOSED: -- vhost_user_blk_disconnect(dev); -+ /* -+ * A close event may happen during a read/write, but vhost -+ * code assumes the vhost_dev remains setup, so delay the -+ * stop & clear. There are two possible paths to hit this -+ * disconnect event: -+ * 1. When VM is in the RUN_STATE_PRELAUNCH state. The -+ * vhost_user_blk_device_realize() is a caller. -+ * 2. In tha main loop phase after VM start. -+ * -+ * For p2 the disconnect event will be delayed. We can't -+ * do the same for p1, because we are not running the loop -+ * at this moment. So just skip this step and perform -+ * disconnect in the caller function. -+ * -+ * TODO: maybe it is a good idea to make the same fix -+ * for other vhost-user devices. -+ */ -+ if (runstate_is_running()) { -+ AioContext *ctx = qemu_get_current_aio_context(); -+ -+ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, -+ NULL, NULL, false); -+ aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque); -+ } - break; - } - } --- -2.26.2 - diff --git a/vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch b/vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch deleted file mode 100644 index e57f5b59c972cf4c769d7b87cc2a654b72eb3b64..0000000000000000000000000000000000000000 --- a/vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch +++ /dev/null @@ -1,69 +0,0 @@ -From d8febdc4940d719dba77a17a10a8d36ad08305ab Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Mon, 24 Feb 2020 12:13:35 +0800 -Subject: [PATCH 4/9] vhost-user-blk: delete virtioqueues in unrealize to fix - memleaks - -virtio queues forgot to delete in unrealize, and aslo error path in -realize, this patch fix these memleaks, the leak stack is as follow: - -Direct leak of 114688 byte(s) in 16 object(s) allocated from: - #0 0x7f24024fdbf0 in calloc (/lib64/libasan.so.3+0xcabf0) - #1 0x7f2401642015 in g_malloc0 (/lib64/libglib-2.0.so.0+0x50015) - #2 0x55ad175a6447 in virtio_add_queue /mnt/sdb/qemu/hw/virtio/virtio.c:2327 - #3 0x55ad17570cf9 in vhost_user_blk_device_realize /mnt/sdb/qemu/hw/block/vhost-user-blk.c:419 - #4 0x55ad175a3707 in virtio_device_realize /mnt/sdb/qemu/hw/virtio/virtio.c:3509 - #5 0x55ad176ad0d1 in device_set_realized /mnt/sdb/qemu/hw/core/qdev.c:876 - #6 0x55ad1781ff9d in property_set_bool /mnt/sdb/qemu/qom/object.c:2080 - #7 0x55ad178245ae in object_property_set_qobject /mnt/sdb/qemu/qom/qom-qobject.c:26 - #8 0x55ad17821eb4 in object_property_set_bool /mnt/sdb/qemu/qom/object.c:1338 - #9 0x55ad177aeed7 in virtio_pci_realize /mnt/sdb/qemu/hw/virtio/virtio-pci.c:1801 - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Reviewed-by: Stefan Hajnoczi -Message-Id: <20200224041336.30790-2-pannengyuan@huawei.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: AlexChen ---- - hw/block/vhost-user-blk.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index 6b719d1..dbc0a2e 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -469,6 +469,9 @@ reconnect: - virtio_err: - g_free(s->vqs); - g_free(s->inflight); -+ for (i = 0; i < s->num_queues; i++) { -+ virtio_del_queue(vdev, i); -+ } - virtio_cleanup(vdev); - vhost_user_cleanup(&s->vhost_user); - } -@@ -477,6 +480,7 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBlk *s = VHOST_USER_BLK(dev); -+ int i; - - virtio_set_status(vdev, 0); - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, -@@ -485,6 +489,10 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) - vhost_dev_free_inflight(s->inflight); - g_free(s->vqs); - g_free(s->inflight); -+ -+ for (i = 0; i < s->num_queues; i++) { -+ virtio_del_queue(vdev, i); -+ } - virtio_cleanup(vdev); - vhost_user_cleanup(&s->vhost_user); - } --- -1.8.3.1 - diff --git a/vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch b/vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch deleted file mode 100644 index 11f9ce807bca6c991b77e007d078a73289d27e2e..0000000000000000000000000000000000000000 --- a/vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch +++ /dev/null @@ -1,51 +0,0 @@ -From acb9f3aadde7222eacf95b2d70204dd6f8351ed7 Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Tue, 15 Jun 2021 10:14:06 +0800 -Subject: [PATCH] vhost-user-gpu: fix OOB write in 'virgl_cmd_get_capset' - (CVE-2021-3546) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-3544 - -If 'virgl_cmd_get_capset' set 'max_size' to 0, -the 'virgl_renderer_fill_caps' will write the data after the 'resp'. -This patch avoid this by checking the returned 'max_size'. - -virtio-gpu fix: abd7f08b - - ("display: virtio-gpu-3d: check -virgl capabilities max_size") - -Fixes: CVE-2021-3546 -Reported-by: default avatarLi Qiang -Reviewed-by: default avatarPrasad J Pandit -Signed-off-by: default avatarLi Qiang -Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau -Message-Id: <20210516030403.107723-8-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann - -Signed-off-by: Jiajie Li ---- - contrib/vhost-user-gpu/virgl.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c -index 44e79ab82a..ad2834902b 100644 ---- a/contrib/vhost-user-gpu/virgl.c -+++ b/contrib/vhost-user-gpu/virgl.c -@@ -173,6 +173,10 @@ virgl_cmd_get_capset(VuGpu *g, - - virgl_renderer_get_cap_set(gc.capset_id, &max_ver, - &max_size); -+ if (!max_size) { -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } - resp = g_malloc0(sizeof(*resp) + max_size); - - resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET; --- -2.27.0 - diff --git a/vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch b/vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch deleted file mode 100644 index 46353183baae4d3465048614bb198988597901a7..0000000000000000000000000000000000000000 --- a/vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 511cac8cbc60fafdae2589d674b7aeab15388eef Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Tue, 15 Jun 2021 10:11:17 +0800 -Subject: [PATCH] vhost-user-gpu: fix memory disclosure in - virgl_cmd_get_capset_info (CVE-2021-3545) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-3544 - -Otherwise some of the 'resp' will be leaked to guest. - -Fixes: CVE-2021-3545 -Reported-by: default avatarLi Qiang -virtio-gpu fix: 42a8dadc - - ("virtio-gpu: fix information leak -in getting capset info dispatch") -Signed-off-by: default avatarLi Qiang -Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau -Message-Id: <20210516030403.107723-2-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann - -Signed-off-by: Jiajie Li ---- - contrib/vhost-user-gpu/virgl.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c -index 79556df094..44e79ab82a 100644 ---- a/contrib/vhost-user-gpu/virgl.c -+++ b/contrib/vhost-user-gpu/virgl.c -@@ -131,6 +131,7 @@ virgl_cmd_get_capset_info(VuGpu *g, - - VUGPU_FILL_CMD(info); - -+ memset(&resp, 0, sizeof(resp)); - if (info.capset_index == 0) { - resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL; - virgl_renderer_get_cap_set(resp.capset_id, --- -2.27.0 - diff --git a/vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch b/vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch deleted file mode 100644 index 7c44ec4578db09283d0a6fa67c85298679472f4d..0000000000000000000000000000000000000000 --- a/vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch +++ /dev/null @@ -1,49 +0,0 @@ -From b9f6004899adb8e501e1b9ce1cb0976a2268ad60 Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Tue, 15 Jun 2021 09:56:42 +0800 -Subject: [PATCH] vhost-user-gpu: fix memory leak in vg_resource_attach_backing - (CVE-2021-3544) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-3544 - -Check whether the 'res' has already been attach_backing to avoid -memory leak. - -Fixes: CVE-2021-3544 -Reported-by: default avatarLi Qiang -virtio-gpu fix: 204f01b3 - - ("virtio-gpu: fix memory leak -in resource attach backing") -Signed-off-by: default avatarLi Qiang -Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau -Message-Id: <20210516030403.107723-4-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann - -Signed-off-by: Jiajie Li ---- - contrib/vhost-user-gpu/main.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c -index f69af7d17f..4f087d6000 100644 ---- a/contrib/vhost-user-gpu/main.c -+++ b/contrib/vhost-user-gpu/main.c -@@ -468,6 +468,11 @@ vg_resource_attach_backing(VuGpu *g, - return; - } - -+ if (res->iov) { -+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -+ return; -+ } -+ - ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); - if (ret != 0) { - cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; --- -2.27.0 - diff --git a/vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch b/vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch deleted file mode 100644 index c0dccd7a33a11fbcc7e14ed605f8eeff080e3c9a..0000000000000000000000000000000000000000 --- a/vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 5bdbe19681e151318b749cb6b2443626bf54b82e Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Tue, 15 Jun 2021 10:05:40 +0800 -Subject: [PATCH] vhost-user-gpu: fix memory leak in 'virgl_cmd_resource_unref' - (CVE-2021-3544) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-3544 - -The 'res->iov' will be leaked if the guest trigger following sequences: - - virgl_cmd_create_resource_2d - virgl_resource_attach_backing - virgl_cmd_resource_unref - -This patch fixes this. - -Fixes: CVE-2021-3544 -Reported-by: default avatarLi Qiang -virtio-gpu fix: 5e8e3c4c - - ("virtio-gpu: fix resource leak -in virgl_cmd_resource_unref" -Signed-off-by: default avatarLi Qiang -Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau -Message-Id: <20210516030403.107723-6-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann - -Signed-off-by: Jiajie Li ---- - contrib/vhost-user-gpu/virgl.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c -index 43413e29df..4b8b536edf 100644 ---- a/contrib/vhost-user-gpu/virgl.c -+++ b/contrib/vhost-user-gpu/virgl.c -@@ -105,8 +105,14 @@ virgl_cmd_resource_unref(VuGpu *g, - struct virtio_gpu_ctrl_command *cmd) - { - struct virtio_gpu_resource_unref unref; -+ struct iovec *res_iovs = NULL; -+ int num_iovs = 0; - - VUGPU_FILL_CMD(unref); -+ virgl_renderer_resource_detach_iov(unref.resource_id, -+ &res_iovs, -+ &num_iovs); -+ g_free(res_iovs); - - virgl_renderer_resource_unref(unref.resource_id); - } --- -2.27.0 - diff --git a/vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch b/vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch deleted file mode 100644 index 986a49f81fa988ed176c541265571ca01d5c5c85..0000000000000000000000000000000000000000 --- a/vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 6348348ee6a76c28159c64d6392fb6ba5a0b4374 Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Tue, 15 Jun 2021 10:09:13 +0800 -Subject: [PATCH] vhost-user-gpu: fix memory leak in - 'virgl_resource_attach_backing' (CVE-2021-3544) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-3544 - -If 'virgl_renderer_resource_attach_iov' failed, the 'res_iovs' will -be leaked. - -Fixes: CVE-2021-3544 -Reported-by: default avatarLi Qiang -virtio-gpu fix: 33243031 - - ("virtio-gpu-3d: fix memory leak -in resource attach backing") -Signed-off-by: default avatarLi Qiang -Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau -Message-Id: <20210516030403.107723-7-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann - -Signed-off-by: Jiajie Li ---- - contrib/vhost-user-gpu/virgl.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c -index 4b8b536edf..79556df094 100644 ---- a/contrib/vhost-user-gpu/virgl.c -+++ b/contrib/vhost-user-gpu/virgl.c -@@ -282,8 +282,11 @@ virgl_resource_attach_backing(VuGpu *g, - return; - } - -- virgl_renderer_resource_attach_iov(att_rb.resource_id, -+ ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, - res_iovs, att_rb.nr_entries); -+ if (ret != 0) { -+ g_free(res_iovs); -+ } - } - - static void --- -2.27.0 - diff --git a/vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch b/vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch deleted file mode 100644 index e67cd9a6f4220e628237a6cee4eeb5f755c67196..0000000000000000000000000000000000000000 --- a/vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch +++ /dev/null @@ -1,51 +0,0 @@ -From c276538416e9238e352d0f720db57ea1020e555f Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Tue, 15 Jun 2021 10:02:08 +0800 -Subject: [PATCH] vhost-user-gpu: fix memory leak while calling - 'vg_resource_unref' (CVE-2021-3544) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-3544 - -If the guest trigger following sequences, the attach_backing will be leaked: - - vg_resource_create_2d - vg_resource_attach_backing - vg_resource_unref - -This patch fix this by freeing 'res->iov' in vg_resource_destroy. - -Fixes: CVE-2021-3544 -Reported-by: default avatarLi Qiang -virtio-gpu fix: 5e8e3c4c - - ("virtio-gpu: fix resource leak -in virgl_cmd_resource_unref") -Reviewed-by: default avatarPrasad J Pandit -Signed-off-by: default avatarLi Qiang -Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau -Message-Id: <20210516030403.107723-5-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann - -Signed-off-by: Jiajie Li ---- - contrib/vhost-user-gpu/main.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c -index 4f087d6000..43d9851800 100644 ---- a/contrib/vhost-user-gpu/main.c -+++ b/contrib/vhost-user-gpu/main.c -@@ -379,6 +379,7 @@ vg_resource_destroy(VuGpu *g, - } - - vugbm_buffer_destroy(&res->buffer); -+ g_free(res->iov); - pixman_image_unref(res->image); - QTAILQ_REMOVE(&g->reslist, res, next); - g_free(res); --- -2.27.0 - diff --git a/vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch b/vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch deleted file mode 100644 index 8d565004e122e2c0308162fafb5ce3294a69f873..0000000000000000000000000000000000000000 --- a/vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 58e7327879e89700630ca766974a18f9ac55897c Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Tue, 15 Jun 2021 09:53:22 +0800 -Subject: [PATCH] vhost-user-gpu: fix resource leak in 'vg_resource_create_2d' - (CVE-2021-3544) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix CVE-2021-3544 - -Call 'vugbm_buffer_destroy' in error path to avoid resource leak. - -Fixes: CVE-2021-3544 -Reported-by: default avatarLi Qiang -Reviewed-by: default avatarPrasad J Pandit -Signed-off-by: default avatarLi Qiang -Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau -Message-Id: <20210516030403.107723-3-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann - -Signed-off-by: Jiajie Li ---- - contrib/vhost-user-gpu/main.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c -index b45d2019b4..f69af7d17f 100644 ---- a/contrib/vhost-user-gpu/main.c -+++ b/contrib/vhost-user-gpu/main.c -@@ -328,6 +328,7 @@ vg_resource_create_2d(VuGpu *g, - g_critical("%s: resource creation failed %d %d %d", - __func__, c2d.resource_id, c2d.width, c2d.height); - g_free(res); -+ vugbm_buffer_destroy(&res->buffer); - cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; - return; - } --- -2.27.0 - diff --git a/vhost-user-quit-infinite-loop-while-used-memslots-is.patch b/vhost-user-quit-infinite-loop-while-used-memslots-is.patch new file mode 100644 index 0000000000000000000000000000000000000000..c62111f584f741e6d64f9d08f77939bf3c525814 --- /dev/null +++ b/vhost-user-quit-infinite-loop-while-used-memslots-is.patch @@ -0,0 +1,89 @@ +From 90d4333d4bbde45a10892bf9004979d239d39e28 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 19:24:30 +0800 +Subject: [PATCH] vhost-user: quit infinite loop while used memslots is more + than the backend limit + +When used memslots is more than the backend limit, +the vhost-user netcard would attach fail and quit +infinite loop. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost.c | 10 ++++++++++ + include/hw/virtio/vhost.h | 1 + + net/vhost-user.c | 5 +++++ + 3 files changed, 16 insertions(+) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index a8adc149ad..038ac37dd0 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -56,6 +56,8 @@ static unsigned int used_shared_memslots; + static QLIST_HEAD(, vhost_dev) vhost_devices = + QLIST_HEAD_INITIALIZER(vhost_devices); + ++bool used_memslots_exceeded; ++ + unsigned int vhost_get_max_memslots(void) + { + unsigned int max = UINT_MAX; +@@ -1569,8 +1571,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + error_setg(errp, "vhost backend memory slots limit (%d) is less" + " than current number of used (%d) and reserved (%d)" + " memory slots for memory devices.", limit, used, reserved); ++ used_memslots_exceeded = true; + r = -EINVAL; + goto fail_busyloop; ++ } else { ++ used_memslots_exceeded = false; + } + + return 0; +@@ -2405,3 +2410,8 @@ fail: + + return ret; + } ++ ++bool used_memslots_is_exceeded(void) ++{ ++ return used_memslots_exceeded; ++} +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 02477788df..444ca0ad42 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -340,6 +340,7 @@ int vhost_dev_set_inflight(struct vhost_dev *dev, + struct vhost_inflight *inflight); + int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, + struct vhost_inflight *inflight); ++bool used_memslots_is_exceeded(void); + bool vhost_dev_has_iommu(struct vhost_dev *dev); + + #ifdef CONFIG_VHOST +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 51fa8c678f..86fd5056ab 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -20,6 +20,7 @@ + #include "qemu/error-report.h" + #include "qemu/option.h" + #include "trace.h" ++#include "include/hw/virtio/vhost.h" + + #define VHOST_USER_RECONNECT_TIME (3) + +@@ -373,6 +374,10 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, + net_vhost_user_event, NULL, nc0->name, NULL, + true); ++ if (used_memslots_is_exceeded()) { ++ error_report("used memslots exceeded the backend limit, quit loop"); ++ goto err; ++ } + } while (!s->started); + + assert(s->vhost_net); +-- +2.27.0 + diff --git a/vhost-user-save-features-if-the-char-dev-is-closed.patch b/vhost-user-save-features-if-the-char-dev-is-closed.patch deleted file mode 100644 index 9a0d04f4d7d98fd97e84bfba35f99d4871605b37..0000000000000000000000000000000000000000 --- a/vhost-user-save-features-if-the-char-dev-is-closed.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 7b404cae7fa2850d476c29258f03b8e77a5b4bd0 Mon Sep 17 00:00:00 2001 -From: Adrian Moreno -Date: Tue, 24 Sep 2019 18:20:44 +0200 -Subject: [PATCH] vhost-user: save features if the char dev is closed - -That way the state can be correctly restored when the device is opened -again. This might happen if the backend is restarted. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1738768 -Reported-by: Pei Zhang -Fixes: 6ab79a20af3a ("do not call vhost_net_cleanup() on running net from char user event") -Cc: ddstreet@canonical.com -Cc: Michael S. Tsirkin -Cc: qemu-stable@nongnu.org -Signed-off-by: Adrian Moreno -Message-Id: <20190924162044.11414-1-amorenoz@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit c6beefd674fff8d41b90365dfccad32e53a5abcb) -Signed-off-by: Michael Roth ---- - net/vhost-user.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/net/vhost-user.c b/net/vhost-user.c -index 51921de443..014199d600 100644 ---- a/net/vhost-user.c -+++ b/net/vhost-user.c -@@ -235,6 +235,10 @@ static void chr_closed_bh(void *opaque) - - s = DO_UPCAST(NetVhostUserState, nc, ncs[0]); - -+ if (s->vhost_net) { -+ s->acked_features = vhost_net_get_acked_features(s->vhost_net); -+ } -+ - qmp_set_link(name, false, &err); - - qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, --- -2.23.0 diff --git a/vhost-user-scsi-prevent-using-uninitialized-vqs.patch b/vhost-user-scsi-prevent-using-uninitialized-vqs.patch deleted file mode 100644 index d1bf2a087bf4fcbedf2a8c0fbb8b62188737d1e7..0000000000000000000000000000000000000000 --- a/vhost-user-scsi-prevent-using-uninitialized-vqs.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 4d8f2885b3f1219c3df2cf1a00dc0c55b23ee715 Mon Sep 17 00:00:00 2001 -From: Raphael Norwitz -Date: Tue, 14 Apr 2020 21:39:05 +0800 -Subject: [PATCH] vhost-user-scsi: prevent using uninitialized vqs - -Of the 3 virtqueues, seabios only sets cmd, leaving ctrl -and event without a physical address. This can cause -vhost_verify_ring_part_mapping to return ENOMEM, causing -the following logs: - -qemu-system-x86_64: Unable to map available ring for ring 0 -qemu-system-x86_64: Verify ring failure on region 0 - -The qemu commit e6cc11d64fc998c11a4dfcde8fda3fc33a74d844 -has already resolved the issue for vhost scsi devices but -the fix was never applied to vhost-user scsi devices. - -Signed-off-by: Raphael Norwitz -Reviewed-by: Stefan Hajnoczi -Message-id: 1560299717-177734-1-git-send-email-raphael.norwitz@nutanix.com -Message-Id: <1560299717-177734-1-git-send-email-raphael.norwitz@nutanix.com> -Signed-off-by: Stefan Hajnoczi -(cherry-picked from commit 5d4c1ed3d46d7e2010b389fe5f3376f605182ab0) ---- - hw/scsi/vhost-user-scsi.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c -index fcee67d5..affc2431 100644 ---- a/hw/scsi/vhost-user-scsi.c -+++ b/hw/scsi/vhost-user-scsi.c -@@ -91,7 +91,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) - } - - vsc->dev.nvqs = 2 + vs->conf.num_queues; -- vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs); -+ vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); - vsc->dev.vq_index = 0; - vsc->dev.backend_features = 0; - vqs = vsc->dev.vqs; --- -2.23.0 diff --git a/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch b/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch new file mode 100644 index 0000000000000000000000000000000000000000..583fc50067bd42d94ccb891dbc358b4c9470613d --- /dev/null +++ b/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch @@ -0,0 +1,49 @@ +From 3fe9a15feba924675ffcc5b797185091cfb8a007 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 14:49:53 +0800 +Subject: [PATCH] vhost-vdpa: add VHOST_BACKEND_F_BYTEMAPLOG + +support VHOST_BACKEND_F_BYTEMAPLOG to support vhost +device bytemap logging. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 9 +++++---- + include/standard-headers/linux/vhost_types.h | 2 ++ + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 819b2d811a..ce8ff7f417 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -829,10 +829,11 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + { + uint64_t features; +- uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | +- 0x1ULL << VHOST_BACKEND_F_SUSPEND; ++ uint64_t f = BIT_ULL(VHOST_BACKEND_F_IOTLB_MSG_V2) | ++ BIT_ULL(VHOST_BACKEND_F_IOTLB_BATCH) | ++ BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID) | ++ BIT_ULL(VHOST_BACKEND_F_SUSPEND) | ++ BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG); + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { +diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h +index fd54044936..46fc53cd83 100644 +--- a/include/standard-headers/linux/vhost_types.h ++++ b/include/standard-headers/linux/vhost_types.h +@@ -192,5 +192,7 @@ struct vhost_vdpa_iova_range { + #define VHOST_BACKEND_F_DESC_ASID 0x7 + /* IOTLB don't flush memory mapping across device reset */ + #define VHOST_BACKEND_F_IOTLB_PERSIST 0x8 ++/* device can use bytemap log */ ++#define VHOST_BACKEND_F_BYTEMAPLOG 0x3f + + #endif +-- +2.27.0 + diff --git a/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch b/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch new file mode 100644 index 0000000000000000000000000000000000000000..83b2612fbff63dda4649c055176a43423092c1d6 --- /dev/null +++ b/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch @@ -0,0 +1,127 @@ +From 3bc7a4e430e01fd90b427bf74a904664eda9ece6 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:04:25 +0800 +Subject: [PATCH] vhost-vdpa: add migration log ops for VhostOps + +Implement vhost_set_log_size for setting buffer size for logging. +Implement vhost_set_log_fd to specify an eventfd to signal on log write. +Implement vhost_log_sync for getting dirtymap logged by vhost backend. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 37 +++++++++++++++++++++++++++++++ + include/hw/virtio/vhost-backend.h | 8 +++++++ + linux-headers/linux/vhost.h | 4 ++++ + 3 files changed, 49 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index ce8ff7f417..037a9c6e4c 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1355,6 +1355,30 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); + } + ++static int vhost_vdpa_set_log_fd(struct vhost_dev *dev, int fd, ++ struct vhost_log *log) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_SET_LOG_FD, &fd); ++} ++ ++static int vhost_vdpa_set_log_size(struct vhost_dev *dev, uint64_t size, ++ struct vhost_log *log) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ uint64_t logsize = size * sizeof(*(log->log)); ++ ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_SET_LOG_SIZE, &logsize); ++} ++ + static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) + { +@@ -1489,11 +1513,23 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + return true; + } + ++static int vhost_vdpa_log_sync(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_LOG_SYNC, NULL); ++} ++ + const VhostOps vdpa_ops = { + .backend_type = VHOST_BACKEND_TYPE_VDPA, + .vhost_backend_init = vhost_vdpa_init, + .vhost_backend_cleanup = vhost_vdpa_cleanup, + .vhost_set_log_base = vhost_vdpa_set_log_base, ++ .vhost_set_log_size = vhost_vdpa_set_log_size, ++ .vhost_set_log_fd = vhost_vdpa_set_log_fd, + .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, + .vhost_set_vring_num = vhost_vdpa_set_vring_num, + .vhost_set_vring_base = vhost_vdpa_set_vring_base, +@@ -1520,6 +1556,7 @@ const VhostOps vdpa_ops = { + .vhost_get_device_id = vhost_vdpa_get_device_id, + .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, + .vhost_force_iommu = vhost_vdpa_force_iommu, ++ .vhost_log_sync = vhost_vdpa_log_sync, + .vhost_set_config_call = vhost_vdpa_set_config_call, + .vhost_reset_status = vhost_vdpa_reset_status, + }; +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index a86d103f82..71b02e4a12 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -65,6 +65,11 @@ typedef int (*vhost_scsi_get_abi_version_op)(struct vhost_dev *dev, + int *version); + typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log); ++typedef int (*vhost_set_log_size_op)(struct vhost_dev *dev, uint64_t size, ++ struct vhost_log *log); ++typedef int (*vhost_set_log_fd_op)(struct vhost_dev *dev, int fd, ++ struct vhost_log *log); ++typedef int (*vhost_log_sync_op)(struct vhost_dev *dev); + typedef int (*vhost_set_mem_table_op)(struct vhost_dev *dev, + struct vhost_memory *mem); + typedef int (*vhost_set_vring_addr_op)(struct vhost_dev *dev, +@@ -162,6 +167,9 @@ typedef struct VhostOps { + vhost_scsi_clear_endpoint_op vhost_scsi_clear_endpoint; + vhost_scsi_get_abi_version_op vhost_scsi_get_abi_version; + vhost_set_log_base_op vhost_set_log_base; ++ vhost_set_log_size_op vhost_set_log_size; ++ vhost_set_log_fd_op vhost_set_log_fd; ++ vhost_log_sync_op vhost_log_sync; + vhost_set_mem_table_op vhost_set_mem_table; + vhost_set_vring_addr_op vhost_set_vring_addr; + vhost_set_vring_endian_op vhost_set_vring_endian; +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 649560c685..19dc7fd36c 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -43,6 +43,10 @@ + * The bit is set using an atomic 32 bit operation. */ + /* Set base address for logging. */ + #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) ++/* Set buffer size for logging */ ++#define VHOST_SET_LOG_SIZE _IOW(VHOST_VIRTIO, 0x05, __u64) ++/* Logging sync */ ++#define VHOST_LOG_SYNC _IO(VHOST_VIRTIO, 0x06) + /* Specify an eventfd file descriptor to signal on log write. */ + #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + /* By default, a device gets one vhost_worker that its virtqueues share. This +-- +2.27.0 + diff --git a/virtcca-add-kvm-isolation-when-get-tmi-version.patch b/virtcca-add-kvm-isolation-when-get-tmi-version.patch new file mode 100644 index 0000000000000000000000000000000000000000..797768e12b8f1505980d2577abe4ec62b3141203 --- /dev/null +++ b/virtcca-add-kvm-isolation-when-get-tmi-version.patch @@ -0,0 +1,72 @@ +From c8eba92f97b68fad3f84dde2fb6fd4409738e626 Mon Sep 17 00:00:00 2001 +From: lihuhua +Date: Sat, 22 Mar 2025 12:01:26 +0800 +Subject: [PATCH] virtcca: add kvm isolation when get tmi version. + +--- + hw/arm/boot.c | 7 ++++++- + hw/arm/virt.c | 6 +++++- + linux-headers/asm-arm64/kvm.h | 2 -- + linux-headers/linux/kvm.h | 1 + + 4 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index a3e0dbb68c..9a33601d35 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -1163,7 +1163,12 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, + const char *firmware_filename) + { + uint64_t tmi_version = 0; +- if (kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version) < 0) { ++ int ret = -1; ++ ++ if (kvm_enabled()) { ++ ret = kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version); ++ } ++ if (ret < 0) { + error_report("please check the kernel version!"); + exit(EXIT_FAILURE); + } +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 3c31d3667e..fed2f8c4d7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2057,7 +2057,11 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + vms->memmap[VIRT_PCIE_MMIO] = (MemMapEntry) { 0x10000000, 0x2edf0000 }; + vms->memmap[VIRT_KAE_DEVICE] = (MemMapEntry) { 0x3edf0000, 0x00200000 }; + uint64_t tmi_version = 0; +- if (kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version) < 0) { ++ int ret = -1; ++ if (kvm_enabled()) { ++ ret = kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version); ++ } ++ if (ret < 0) { + warn_report("can not get tmi version"); + } + if (tmi_version < MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM) { +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index d69a71cbec..552fdcb18f 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -597,6 +597,4 @@ struct kvm_cap_arm_tmm_populate_region_args { + + #endif + +-#define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001 +- + #endif /* __ARM_KVM_H__ */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index c9ec7f862a..b94c5fd90f 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -2430,5 +2430,6 @@ struct kvm_s390_zpci_op { + + /* get tmi version */ + #define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, uint64_t) ++#define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001 + + #endif /* __LINUX_KVM_H */ +-- +2.41.0.windows.1 + diff --git a/virtio-add-ability-to-delete-vq-through-a-pointer.patch b/virtio-add-ability-to-delete-vq-through-a-pointer.patch deleted file mode 100644 index e0989895e40fe4e0d3816966a24075d74b8797a0..0000000000000000000000000000000000000000 --- a/virtio-add-ability-to-delete-vq-through-a-pointer.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 98ae454efe48b2a465dfe9bc3c412b6375f1fbfc Mon Sep 17 00:00:00 2001 -From: "Michael S. Tsirkin" -Date: Mon, 9 Dec 2019 11:46:13 -0500 -Subject: [PATCH 1/9] virtio: add ability to delete vq through a pointer - -Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. - -Signed-off-by: Michael S. Tsirkin -Reviewed-by: David Hildenbrand -Signed-off-by: AlexChen ---- - hw/virtio/virtio.c | 13 +++++++++---- - include/hw/virtio/virtio.h | 2 ++ - 2 files changed, 11 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 79c2dcf..3d027d3 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -1636,16 +1636,21 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - return &vdev->vq[i]; - } - -+void virtio_delete_queue(VirtQueue *vq) -+{ -+ vq->vring.num = 0; -+ vq->vring.num_default = 0; -+ vq->handle_output = NULL; -+ vq->handle_aio_output = NULL; -+} -+ - void virtio_del_queue(VirtIODevice *vdev, int n) - { - if (n < 0 || n >= VIRTIO_QUEUE_MAX) { - abort(); - } - -- vdev->vq[n].vring.num = 0; -- vdev->vq[n].vring.num_default = 0; -- vdev->vq[n].handle_output = NULL; -- vdev->vq[n].handle_aio_output = NULL; -+ virtio_delete_queue(&vdev->vq[n]); - } - - static void virtio_set_isr(VirtIODevice *vdev, int value) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index f9f6237..ca2fbae 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -187,6 +187,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - - void virtio_del_queue(VirtIODevice *vdev, int n); - -+void virtio_delete_queue(VirtQueue *vq); -+ - void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len); - void virtqueue_flush(VirtQueue *vq, unsigned int count); --- -1.8.3.1 - diff --git a/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch b/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch deleted file mode 100644 index 3c2a3f2a424bd30ff95b8fcb53ae23c324d43153..0000000000000000000000000000000000000000 --- a/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 01be50603be4f17af4318a7a3fe58dcc6dab1b31 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 16 Aug 2019 19:15:03 +0200 -Subject: [PATCH] virtio-blk: Cancel the pending BH when the dataplane is reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When 'system_reset' is called, the main loop clear the memory -region cache before the BH has a chance to execute. Later when -the deferred function is called, some assumptions that were -made when scheduling them are no longer true when they actually -execute. - -This is what happens using a virtio-blk device (fresh RHEL7.8 install): - - $ (sleep 12.3; echo system_reset; sleep 12.3; echo system_reset; sleep 1; echo q) \ - | qemu-system-x86_64 -m 4G -smp 8 -boot menu=on \ - -device virtio-blk-pci,id=image1,drive=drive_image1 \ - -drive file=/var/lib/libvirt/images/rhel78.qcow2,if=none,id=drive_image1,format=qcow2,cache=none \ - -device virtio-net-pci,netdev=net0,id=nic0,mac=52:54:00:c4:e7:84 \ - -netdev tap,id=net0,script=/bin/true,downscript=/bin/true,vhost=on \ - -monitor stdio -serial null -nographic - (qemu) system_reset - (qemu) system_reset - (qemu) qemu-system-x86_64: hw/virtio/virtio.c:225: vring_get_region_caches: Assertion `caches != NULL' failed. - Aborted - - (gdb) bt - Thread 1 (Thread 0x7f109c17b680 (LWP 10939)): - #0 0x00005604083296d1 in vring_get_region_caches (vq=0x56040a24bdd0) at hw/virtio/virtio.c:227 - #1 0x000056040832972b in vring_avail_flags (vq=0x56040a24bdd0) at hw/virtio/virtio.c:235 - #2 0x000056040832d13d in virtio_should_notify (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1648 - #3 0x000056040832d1f8 in virtio_notify_irqfd (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1662 - #4 0x00005604082d213d in notify_guest_bh (opaque=0x56040a243ec0) at hw/block/dataplane/virtio-blk.c:75 - #5 0x000056040883dc35 in aio_bh_call (bh=0x56040a243f10) at util/async.c:90 - #6 0x000056040883dccd in aio_bh_poll (ctx=0x560409161980) at util/async.c:118 - #7 0x0000560408842af7 in aio_dispatch (ctx=0x560409161980) at util/aio-posix.c:460 - #8 0x000056040883e068 in aio_ctx_dispatch (source=0x560409161980, callback=0x0, user_data=0x0) at util/async.c:261 - #9 0x00007f10a8fca06d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 - #10 0x0000560408841445 in glib_pollfds_poll () at util/main-loop.c:215 - #11 0x00005604088414bf in os_host_main_loop_wait (timeout=0) at util/main-loop.c:238 - #12 0x00005604088415c4 in main_loop_wait (nonblocking=0) at util/main-loop.c:514 - #13 0x0000560408416b1e in main_loop () at vl.c:1923 - #14 0x000056040841e0e8 in main (argc=20, argv=0x7ffc2c3f9c58, envp=0x7ffc2c3f9d00) at vl.c:4578 - -Fix this by cancelling the BH when the virtio dataplane is stopped. - -[This is version of the patch was modified as discussed with Philippe on -the mailing list thread. ---Stefan] - -Reported-by: Yihuang Yu -Suggested-by: Stefan Hajnoczi -Fixes: https://bugs.launchpad.net/qemu/+bug/1839428 -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20190816171503.24761-1-philmd@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ebb6ff25cd888a52a64a9adc3692541c6d1d9a42) -Signed-off-by: Michael Roth ---- - hw/block/dataplane/virtio-blk.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 158c78f852..5fea76df85 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -297,6 +297,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - -+ qemu_bh_cancel(s->bh); -+ notify_guest_bh(s); /* final chance to notify guest */ -+ - /* Clean up guest notifier (irq) */ - k->set_guest_notifiers(qbus->parent, nvqs, false); - --- -2.23.0 diff --git a/virtio-blk-On-restart-process-queued-requests-in-the.patch b/virtio-blk-On-restart-process-queued-requests-in-the.patch deleted file mode 100644 index 5edb6fd96f56ff66757af73d40bb90db142bbdc9..0000000000000000000000000000000000000000 --- a/virtio-blk-On-restart-process-queued-requests-in-the.patch +++ /dev/null @@ -1,191 +0,0 @@ -From 882897127955fbede44c73703ec297c8ae89775d Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Thu, 21 Jan 2021 15:46:52 +0800 -Subject: [PATCH] virtio-blk: On restart, process queued requests in the proper - context - -On restart, we were scheduling a BH to process queued requests, which -would run before starting up the data plane, leading to those requests -being assigned and started on coroutines on the main context. - -This could cause requests to be wrongly processed in parallel from -different threads (the main thread and the iothread managing the data -plane), potentially leading to multiple issues. - -For example, stopping and resuming a VM multiple times while the guest -is generating I/O on a virtio_blk device can trigger a crash with a -stack tracing looking like this one: - -<------> - Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): - #0 0x00005567a13b99d6 in iov_memset - (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) - at util/iov.c:69 - #1 0x00005567a13bab73 in qemu_iovec_memset - (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 - #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 - #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 - #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 - #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) - at block/linux-aio.c:236 - #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 - #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) - at util/aio-posix.c:520 - #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) - at util/aio-posix.c:562 - #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) - at util/aio-posix.c:597 - #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 - #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 - #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 - #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 - #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6 - - Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): - #0 0x00005567a13b99d6 in iov_memset - (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) - at util/iov.c:69 - #1 0x00005567a13bab73 in qemu_iovec_memset - (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 - #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 - #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 - #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 - #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) - at block/linux-aio.c:375 - #6 0x00005567a12f4af2 in laio_co_submit - (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) - at block/linux-aio.c:394 - #7 0x00005567a12f1803 in raw_co_prw - (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) - at block/file-posix.c:1892 - #8 0x00005567a12f1941 in raw_co_pwritev - (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) - at block/file-posix.c:1925 - #9 0x00005567a12fe3e1 in bdrv_driver_pwritev - (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) - at block/io.c:1183 - #10 0x00005567a1300340 in bdrv_aligned_pwritev - (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 - #11 0x00005567a1300b29 in bdrv_co_pwritev_part - (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) - at block/io.c:2137 - #12 0x00005567a12baba1 in qcow2_co_pwritev_task - (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 - #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 - #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 - #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 - #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 - #17 0x00007ff6626f1350 in () - #18 0x0000000000000000 in () -<------> - -This is also known to cause crashes with this message (assertion -failed): - - aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 -Signed-off-by: Sergio Lopez -Message-Id: <20200603093240.40489-3-slp(a)redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/block/dataplane/virtio-blk.c | 8 ++++++++ - hw/block/virtio-blk.c | 18 ++++++++++++------ - include/hw/virtio/virtio-blk.h | 2 +- - 3 files changed, 21 insertions(+), 7 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 5fea76df85..4476f97960 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -219,6 +219,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - goto fail_guest_notifiers; - } - -+ /* Process queued requests before the ones in vring */ -+ virtio_blk_process_queued_requests(vblk, false); -+ - /* Kick right away to begin processing requests already in vring */ - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); -@@ -238,6 +241,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - return 0; - - fail_guest_notifiers: -+ /* -+ * If we failed to set up the guest notifiers queued requests will be -+ * processed on the main context. -+ */ -+ virtio_blk_process_queued_requests(vblk, false); - vblk->dataplane_disabled = true; - s->starting = false; - vblk->dataplane_started = true; -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index cee2c673a5..ddf525b9d7 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -809,7 +809,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - virtio_blk_handle_output_do(s, vq); - } - --void virtio_blk_process_queued_requests(VirtIOBlock *s) -+void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) - { - VirtIOBlockReq *req = s->rq; - MultiReqBuffer mrb = {}; -@@ -837,7 +837,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s) - if (mrb.num_reqs) { - virtio_blk_submit_multireq(s->blk, &mrb); - } -- blk_dec_in_flight(s->conf.conf.blk); -+ if (is_bh) { -+ blk_dec_in_flight(s->conf.conf.blk); -+ } - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - -@@ -848,21 +850,25 @@ static void virtio_blk_dma_restart_bh(void *opaque) - qemu_bh_delete(s->bh); - s->bh = NULL; - -- virtio_blk_process_queued_requests(s); -+ virtio_blk_process_queued_requests(s, true); - } - - static void virtio_blk_dma_restart_cb(void *opaque, int running, - RunState state) - { - VirtIOBlock *s = opaque; -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); -+ VirtioBusState *bus = VIRTIO_BUS(qbus); - - if (!running) { - return; - } - -- if (!s->bh) { -- /* FIXME The data plane is not started yet, so these requests are -- * processed in the main thread. */ -+ /* -+ * If ioeventfd is enabled, don't schedule the BH here as queued -+ * requests will be processed while starting the data plane. -+ */ -+ if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { - s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), - virtio_blk_dma_restart_bh, s); - blk_inc_in_flight(s->conf.conf.blk); -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index cf8eea2f58..e77f0db3b0 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -84,6 +84,6 @@ typedef struct MultiReqBuffer { - } MultiReqBuffer; - - bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); --void virtio_blk_process_queued_requests(VirtIOBlock *s); -+void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); - - #endif --- -2.27.0 - diff --git a/virtio-blk-Refactor-the-code-that-processes-queued-r.patch b/virtio-blk-Refactor-the-code-that-processes-queued-r.patch deleted file mode 100644 index 2848fbd0aae20aceae61a1ad4b07a47d147659b4..0000000000000000000000000000000000000000 --- a/virtio-blk-Refactor-the-code-that-processes-queued-r.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 21c5ffb363930dfe6213bb677c5811fede3bcee2 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez -Date: Thu, 21 Jan 2021 15:46:51 +0800 -Subject: [PATCH] virtio-blk: Refactor the code that processes queued requests - -Move the code that processes queued requests from -virtio_blk_dma_restart_bh() to its own, non-static, function. This -will allow us to call it from the virtio_blk_data_plane_start() in a -future patch. - -Signed-off-by: Sergio Lopez -Message-Id: <20200603093240.40489-2-slp(a)redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 16 +++++++++++----- - include/hw/virtio/virtio-blk.h | 1 + - 2 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 703ed4c93b..cee2c673a5 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -809,15 +809,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - virtio_blk_handle_output_do(s, vq); - } - --static void virtio_blk_dma_restart_bh(void *opaque) -+void virtio_blk_process_queued_requests(VirtIOBlock *s) - { -- VirtIOBlock *s = opaque; - VirtIOBlockReq *req = s->rq; - MultiReqBuffer mrb = {}; - -- qemu_bh_delete(s->bh); -- s->bh = NULL; -- - s->rq = NULL; - - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); -@@ -845,6 +841,16 @@ static void virtio_blk_dma_restart_bh(void *opaque) - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - -+static void virtio_blk_dma_restart_bh(void *opaque) -+{ -+ VirtIOBlock *s = opaque; -+ -+ qemu_bh_delete(s->bh); -+ s->bh = NULL; -+ -+ virtio_blk_process_queued_requests(s); -+} -+ - static void virtio_blk_dma_restart_cb(void *opaque, int running, - RunState state) - { -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index cddcfbebe9..cf8eea2f58 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -84,5 +84,6 @@ typedef struct MultiReqBuffer { - } MultiReqBuffer; - - bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); -+void virtio_blk_process_queued_requests(VirtIOBlock *s); - - #endif --- -2.27.0 - diff --git a/virtio-blk-delete-vqs-on-the-error-path-in-realize.patch b/virtio-blk-delete-vqs-on-the-error-path-in-realize.patch deleted file mode 100644 index 205f663470d3aa594910bd19e2be8547d226e1a8..0000000000000000000000000000000000000000 --- a/virtio-blk-delete-vqs-on-the-error-path-in-realize.patch +++ /dev/null @@ -1,45 +0,0 @@ -From ec8a25fec9898f46a6a94aa4f328fe02948b3d59 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Sat, 28 Mar 2020 08:57:04 +0800 -Subject: [PATCH 12/14] virtio-blk: delete vqs on the error path in realize() - -virtio_vqs forgot to free on the error path in realize(). Fix that. - -The asan stack: -Direct leak of 14336 byte(s) in 1 object(s) allocated from: - #0 0x7f58b93fd970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) - #1 0x7f58b858249d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) - #2 0x5562cc627f49 in virtio_add_queue /mnt/sdb/qemu/hw/virtio/virtio.c:2413 - #3 0x5562cc4b524a in virtio_blk_device_realize /mnt/sdb/qemu/hw/block/virtio-blk.c:1202 - #4 0x5562cc613050 in virtio_device_realize /mnt/sdb/qemu/hw/virtio/virtio.c:3615 - #5 0x5562ccb7a568 in device_set_realized /mnt/sdb/qemu/hw/core/qdev.c:891 - #6 0x5562cd39cd45 in property_set_bool /mnt/sdb/qemu/qom/object.c:2238 - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Reviewed-by: Stefano Garzarella -Message-Id: <20200328005705.29898-2-pannengyuan@huawei.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Peng Liang ---- - hw/block/virtio-blk.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index cbb3729158fe..703ed4c93bff 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1173,6 +1173,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); - if (err != NULL) { - error_propagate(errp, err); -+ for (i = 0; i < conf->num_queues; i++) { -+ virtio_del_queue(vdev, i); -+ } - virtio_cleanup(vdev); - return; - } --- -2.26.2 - diff --git a/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch b/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d476ac2edd6745c975e35694679cbe38c8e3cd2 --- /dev/null +++ b/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch @@ -0,0 +1,38 @@ +From 7b4a9547e68147291e68258db9415ef5a20fe06b Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 11:16:26 +0800 +Subject: [PATCH] virtio: bugfix: add rcu_read_lock when vring_avail_idx is + called + +viring_avail_idx should be called within rcu_read_lock(), +or may get NULL caches in vring_get_region_caches() and +trigger assert(). + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 27ceab92be..ec09d515c2 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2801,6 +2801,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) + { + uint16_t nheads; + ++ rcu_read_lock(); + /* Check it isn't doing strange things with descriptor numbers. */ + nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; + if (nheads > vdev->vq[index].vring.num) { +@@ -2811,6 +2812,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) + vring_avail_idx(&vdev->vq[index]), + vdev->vq[index].last_avail_idx, nheads); + } ++ rcu_read_unlock(); + } + + int virtio_save(VirtIODevice *vdev, QEMUFile *f) +-- +2.27.0 + diff --git a/virtio-bugfix-check-the-value-of-caches-before-acces.patch b/virtio-bugfix-check-the-value-of-caches-before-acces.patch new file mode 100644 index 0000000000000000000000000000000000000000..56d0513fecb8f477e785bed17a1d59ab51ca42f4 --- /dev/null +++ b/virtio-bugfix-check-the-value-of-caches-before-acces.patch @@ -0,0 +1,42 @@ +From f6b3e8ea39d00d25ab979f7b24842dc24e263ed8 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 14:37:52 +0800 +Subject: [PATCH] virtio: bugfix: check the value of caches before accessing it + +Vring caches may be NULL in check_vring_avail_num() if +virtio_reset() is called at the same time, such as when +the virtual machine starts. +So check it before accessing it in vring_avail_idx(). + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 1f78b74c00..d93ea62723 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2800,8 +2800,19 @@ static const VMStateDescription vmstate_virtio = { + static void check_vring_avail_num(VirtIODevice *vdev, int index) + { + uint16_t nheads; ++ VRingMemoryRegionCaches *caches; + + rcu_read_lock(); ++ caches = qatomic_rcu_read(&vdev->vq[index].vring.caches); ++ if (caches == NULL) { ++ /* ++ * caches may be NULL if virtio_reset is called at the same time, ++ * such as when the virtual machine starts. ++ */ ++ rcu_read_unlock(); ++ return; ++ } ++ + /* Check it isn't doing strange things with descriptor numbers. */ + nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; + if (nheads > vdev->vq[index].vring.num) { +-- +2.27.0 + diff --git a/virtio-check-descriptor-numbers.patch b/virtio-check-descriptor-numbers.patch new file mode 100644 index 0000000000000000000000000000000000000000..f20748d899e42c610ff9653c39b99a331e919ba8 --- /dev/null +++ b/virtio-check-descriptor-numbers.patch @@ -0,0 +1,52 @@ +From b57e956ea522b487081d1c94aa2e4af6a3314d20 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 11:09:36 +0800 +Subject: [PATCH] virtio: check descriptor numbers + +Check if the vring num is normal in virtio_save(), and add LOG +the vm push the wrong viring num down through writing IO Port. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index a9aa0c4f66..27ceab92be 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2797,6 +2797,22 @@ static const VMStateDescription vmstate_virtio = { + } + }; + ++static void check_vring_avail_num(VirtIODevice *vdev, int index) ++{ ++ uint16_t nheads; ++ ++ /* Check it isn't doing strange things with descriptor numbers. */ ++ nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; ++ if (nheads > vdev->vq[index].vring.num) { ++ qemu_log("VQ %d size 0x%x Guest index 0x%x " ++ "inconsistent with Host index 0x%x: " ++ "delta 0x%x\n", ++ index, vdev->vq[index].vring.num, ++ vring_avail_idx(&vdev->vq[index]), ++ vdev->vq[index].last_avail_idx, nheads); ++ } ++} ++ + int virtio_save(VirtIODevice *vdev, QEMUFile *f) + { + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); +@@ -2827,6 +2843,8 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) + if (vdev->vq[i].vring.num == 0) + break; + ++ check_vring_avail_num(vdev, i); ++ + qemu_put_be32(f, vdev->vq[i].vring.num); + if (k->has_variable_vring_alignment) { + qemu_put_be32(f, vdev->vq[i].vring.align); +-- +2.27.0 + diff --git a/virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch b/virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch deleted file mode 100644 index aab3e86663021d2576763fff2a4f9c5985a2fb32..0000000000000000000000000000000000000000 --- a/virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 62ded4fc6b38e2642ea4d95a93d70d0f608bee65 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Tue, 25 Feb 2020 15:55:54 +0800 -Subject: [PATCH 3/9] virtio-crypto: do delete ctrl_vq in - virtio_crypto_device_unrealize - -Similar to other virtio-deivces, ctrl_vq forgot to delete in virtio_crypto_device_unrealize, this patch fix it. -This device has aleardy maintained vq pointers. Thus, we use the new virtio_delete_queue function directly to do the cleanup. - -The leak stack: -Direct leak of 10752 byte(s) in 3 object(s) allocated from: - #0 0x7f4c024b1970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) - #1 0x7f4c018be49d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) - #2 0x55a2f8017279 in virtio_add_queue /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio.c:2333 - #3 0x55a2f8057035 in virtio_crypto_device_realize /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio-crypto.c:814 - #4 0x55a2f8005d80 in virtio_device_realize /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio.c:3531 - #5 0x55a2f8497d1b in device_set_realized /mnt/sdb/qemu-new/qemu_test/qemu/hw/core/qdev.c:891 - #6 0x55a2f8b48595 in property_set_bool /mnt/sdb/qemu-new/qemu_test/qemu/qom/object.c:2238 - #7 0x55a2f8b54fad in object_property_set_qobject /mnt/sdb/qemu-new/qemu_test/qemu/qom/qom-qobject.c:26 - #8 0x55a2f8b4de2c in object_property_set_bool /mnt/sdb/qemu-new/qemu_test/qemu/qom/object.c:1390 - #9 0x55a2f80609c9 in virtio_crypto_pci_realize /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio-crypto-pci.c:58 - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Cc: "Gonglei (Arei)" -Message-Id: <20200225075554.10835-5-pannengyuan@huawei.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: AlexChen ---- - hw/virtio/virtio-crypto.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index 45187d3..0076b4b 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -830,12 +830,13 @@ static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp) - - max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1; - for (i = 0; i < max_queues; i++) { -- virtio_del_queue(vdev, i); -+ virtio_delete_queue(vcrypto->vqs[i].dataq); - q = &vcrypto->vqs[i]; - qemu_bh_delete(q->dataq_bh); - } - - g_free(vcrypto->vqs); -+ virtio_delete_queue(vcrypto->ctrl_vq); - - virtio_cleanup(vdev); - cryptodev_backend_set_used(vcrypto->cryptodev, false); --- -1.8.3.1 - diff --git a/virtio-don-t-enable-notifications-during-polling.patch b/virtio-don-t-enable-notifications-during-polling.patch deleted file mode 100644 index cb77429e9e2fa70688ed245f7cf440fb61cd38ad..0000000000000000000000000000000000000000 --- a/virtio-don-t-enable-notifications-during-polling.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 0592b1e444e8ef7f00fb04a637dba72b732b70e4 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 9 Dec 2019 21:09:57 +0000 -Subject: [PATCH] virtio: don't enable notifications during polling - -Virtqueue notifications are not necessary during polling, so we disable -them. This allows the guest driver to avoid MMIO vmexits. -Unfortunately the virtio-blk and virtio-scsi handler functions re-enable -notifications, defeating this optimization. - -Fix virtio-blk and virtio-scsi emulation so they leave notifications -disabled. The key thing to remember for correctness is that polling -always checks one last time after ending its loop, therefore it's safe -to lose the race when re-enabling notifications at the end of polling. - -There is a measurable performance improvement of 5-10% with the null-co -block driver. Real-life storage configurations will see a smaller -improvement because the MMIO vmexit overhead contributes less to -latency. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20191209210957.65087-1-stefanha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - hw/block/virtio-blk.c | 9 +++++++-- - hw/scsi/virtio-scsi.c | 9 +++++++-- - hw/virtio/virtio.c | 12 ++++++------ - include/hw/virtio/virtio.h | 1 + - 4 files changed, 21 insertions(+), 10 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 2db9804cfe..fbe2ed6779 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -766,13 +766,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - { - VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - aio_context_acquire(blk_get_aio_context(s->blk)); - blk_io_plug(s->blk); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_blk_get_request(s, vq))) { - progress = true; -@@ -783,7 +786,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (!virtio_queue_empty(vq)); - - if (mrb.num_reqs) { -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 8b9e5e2b49..eddb13e7c6 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -594,12 +594,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req, *next; - int ret = 0; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_scsi_pop_req(s, vq))) { - progress = true; -@@ -619,7 +622,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (ret != -EINVAL && !virtio_queue_empty(vq)); - - QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 90971f4afa..daa8250332 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -390,6 +390,11 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) - rcu_read_unlock(); - } - -+bool virtio_queue_get_notification(VirtQueue *vq) -+{ -+ return vq->notification; -+} -+ - int virtio_queue_ready(VirtQueue *vq) - { - return vq->vring.avail != 0; -@@ -2572,17 +2577,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) - { - EventNotifier *n = opaque; - VirtQueue *vq = container_of(n, VirtQueue, host_notifier); -- bool progress; - - if (!vq->vring.desc || virtio_queue_empty(vq)) { - return false; - } - -- progress = virtio_queue_notify_aio_vq(vq); -- -- /* In case the handler function re-enabled notifications */ -- virtio_queue_set_notification(vq, 0); -- return progress; -+ return virtio_queue_notify_aio_vq(vq); - } - - static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index ca2fbaeb35..7394715407 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -229,6 +229,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); - - void virtio_notify_config(VirtIODevice *vdev); - -+bool virtio_queue_get_notification(VirtQueue *vq); - void virtio_queue_set_notification(VirtQueue *vq, int enable); - - int virtio_queue_ready(VirtQueue *vq); --- -2.27.0 - diff --git a/virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch b/virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch new file mode 100644 index 0000000000000000000000000000000000000000..fe1ade710d449101dbca7ca0d7e09e03c697500f --- /dev/null +++ b/virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch @@ -0,0 +1,73 @@ +From ffb0dcccbf5f6e662e7c0b6afa4fe7308d96cc06 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 17:06:01 +0800 +Subject: [PATCH] virtio-gpu: Correct virgl_renderer_resource_get_info() error + check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 574b64aa6754ba491f51024c5a823a674d48a658 + +virgl_renderer_resource_get_info() returns errno and not -1 on error. +Correct the return-value check. + +Reviewed-by: Marc-André Lureau +Signed-off-by: Dmitry Osipenko +Message-Id: <20240129073921.446869-1-dmitry.osipenko@collabora.com> +Cc: qemu-stable@nongnu.org +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: dinglimin +--- + contrib/vhost-user-gpu/virgl.c | 6 +++--- + hw/display/virtio-gpu-virgl.c | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c +index d1ccdf7d06..51da0e3667 100644 +--- a/contrib/vhost-user-gpu/virgl.c ++++ b/contrib/vhost-user-gpu/virgl.c +@@ -327,7 +327,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id, + #ifdef VIRGL_RENDERER_RESOURCE_INFO_EXT_VERSION + struct virgl_renderer_resource_info_ext info_ext; + ret = virgl_renderer_resource_get_info_ext(resource_id, &info_ext); +- if (ret < 0) { ++ if (ret) { + return ret; + } + +@@ -335,7 +335,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id, + *modifiers = info_ext.modifiers; + #else + ret = virgl_renderer_resource_get_info(resource_id, info); +- if (ret < 0) { ++ if (ret) { + return ret; + } + +@@ -372,7 +372,7 @@ virgl_cmd_set_scanout(VuGpu *g, + uint64_t modifiers = 0; + ret = virgl_get_resource_info_modifiers(ss.resource_id, &info, + &modifiers); +- if (ret == -1) { ++ if (ret) { + g_critical("%s: illegal resource specified %d\n", + __func__, ss.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c +index 8bb7a2c21f..9f34d0e661 100644 +--- a/hw/display/virtio-gpu-virgl.c ++++ b/hw/display/virtio-gpu-virgl.c +@@ -181,7 +181,7 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g, + memset(&info, 0, sizeof(info)); + ret = virgl_renderer_resource_get_info(ss.resource_id, &info); + #endif +- if (ret == -1) { ++ if (ret) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: illegal resource specified %d\n", + __func__, ss.resource_id); +-- +2.27.0 + diff --git a/virtio-gpu-remove-needless-condition.patch b/virtio-gpu-remove-needless-condition.patch new file mode 100644 index 0000000000000000000000000000000000000000..4930a2c5129cef2ccf57167eaba31cf43aad920d --- /dev/null +++ b/virtio-gpu-remove-needless-condition.patch @@ -0,0 +1,44 @@ +From 77b2f29dce6ddedcc13488eb80add2f9023b4b89 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Wed, 13 Mar 2024 11:23:35 +0800 +Subject: [PATCH] virtio-gpu: remove needless condition +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from cab47b210598c11b76053a01316df9835b94dc09 +qemu_create_displaysurface_pixman() never returns NULL. +Signed-off-by: Marc-André Lureau +Signed-off-by: dinglimin +--- + hw/display/virtio-gpu.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index b016d3bac8..b02d1e3a4c 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -679,10 +679,6 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, + + /* realloc the surface ptr */ + scanout->ds = qemu_create_displaysurface_pixman(rect); +- if (!scanout->ds) { +- *error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- return; +- } + #ifdef WIN32 + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, fb->offset); + #endif +@@ -1418,9 +1414,6 @@ static int virtio_gpu_post_load(void *opaque, int version_id) + return -EINVAL; + } + scanout->ds = qemu_create_displaysurface_pixman(res->image); +- if (!scanout->ds) { +- return -EINVAL; +- } + #ifdef WIN32 + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); + #endif +-- +2.27.0 + diff --git a/virtio-gracefully-handle-invalid-region-caches.patch b/virtio-gracefully-handle-invalid-region-caches.patch deleted file mode 100644 index 2793f21b0d988625b4f53624c471c403937fcedc..0000000000000000000000000000000000000000 --- a/virtio-gracefully-handle-invalid-region-caches.patch +++ /dev/null @@ -1,238 +0,0 @@ -From 63a3c25baa9c7372b80df80be4447552af6d6ba0 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 7 Feb 2020 10:46:19 +0000 -Subject: [PATCH 7/9] virtio: gracefully handle invalid region caches - -The virtqueue code sets up MemoryRegionCaches to access the virtqueue -guest RAM data structures. The code currently assumes that -VRingMemoryRegionCaches is initialized before device emulation code -accesses the virtqueue. An assertion will fail in -vring_get_region_caches() when this is not true. Device fuzzing found a -case where this assumption is false (see below). - -Virtqueue guest RAM addresses can also be changed from a vCPU thread -while an IOThread is accessing the virtqueue. This breaks the same -assumption but this time the caches could become invalid partway through -the virtqueue code. The code fetches the caches RCU pointer multiple -times so we will need to validate the pointer every time it is fetched. - -Add checks each time we call vring_get_region_caches() and treat invalid -caches as a nop: memory stores are ignored and memory reads return 0. - -The fuzz test failure is as follows: - - $ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \ - -drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \ - -drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \ - -display none \ - -qtest stdio - endianness - outl 0xcf8 0x80002020 - outl 0xcfc 0xe0000000 - outl 0xcf8 0x80002004 - outw 0xcfc 0x7 - write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001 - inb 0x4 - writew 0xe000001c 0x1 - write 0xe0000014 0x1 0x0d - -The following error message is produced: - - qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed. - -The backtrace looks like this: - - #0 0x00007ffff5520625 in raise () at /lib64/libc.so.6 - #1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6 - #2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6 - #3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6 - #4 0x00005555559073da in vring_get_region_caches (vq=) at qemu/hw/virtio/virtio.c:286 - #5 vring_get_region_caches (vq=) at qemu/hw/virtio/virtio.c:283 - #6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398 - #7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398 - #8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451 - #9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444 - #10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775 - #11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244 - #12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429 - #13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460 - #14 0x0000555555cb307e in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 - #15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 - #16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219 - #17 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #18 main_loop_wait (nonblocking=) at util/main-loop.c:518 - #19 0x00005555559b20c9 in main_loop () at vl.c:1683 - #20 0x0000555555838115 in main (argc=, argv=, envp=) at vl.c:4441 - -Reported-by: Alexander Bulekov -Cc: Michael Tsirkin -Cc: Cornelia Huck -Cc: Paolo Bonzini -Cc: qemu-stable@nongnu.org -Signed-off-by: Stefan Hajnoczi -Message-Id: <20200207104619.164892-1-stefanha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: AlexChen ---- - hw/virtio/virtio.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++------ - 1 file changed, 59 insertions(+), 7 deletions(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 3d027d3..90971f4 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -221,15 +221,19 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, - - static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) - { -- VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches); -- assert(caches != NULL); -- return caches; -+ return atomic_rcu_read(&vq->vring.caches); - } -+ - /* Called within rcu_read_lock(). */ - static inline uint16_t vring_avail_flags(VirtQueue *vq) - { - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - hwaddr pa = offsetof(VRingAvail, flags); -+ -+ if (!caches) { -+ return 0; -+ } -+ - return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); - } - -@@ -238,6 +242,11 @@ static inline uint16_t vring_avail_idx(VirtQueue *vq) - { - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - hwaddr pa = offsetof(VRingAvail, idx); -+ -+ if (!caches) { -+ return 0; -+ } -+ - vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); - return vq->shadow_avail_idx; - } -@@ -247,6 +256,11 @@ static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) - { - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - hwaddr pa = offsetof(VRingAvail, ring[i]); -+ -+ if (!caches) { -+ return 0; -+ } -+ - return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); - } - -@@ -262,6 +276,11 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, - { - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - hwaddr pa = offsetof(VRingUsed, ring[i]); -+ -+ if (!caches) { -+ return; -+ } -+ - virtio_tswap32s(vq->vdev, &uelem->id); - virtio_tswap32s(vq->vdev, &uelem->len); - address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem)); -@@ -273,6 +292,11 @@ static uint16_t vring_used_idx(VirtQueue *vq) - { - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - hwaddr pa = offsetof(VRingUsed, idx); -+ -+ if (!caches) { -+ return 0; -+ } -+ - return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); - } - -@@ -281,8 +305,12 @@ static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) - { - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - hwaddr pa = offsetof(VRingUsed, idx); -- virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); -- address_space_cache_invalidate(&caches->used, pa, sizeof(val)); -+ -+ if (caches) { -+ virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); -+ address_space_cache_invalidate(&caches->used, pa, sizeof(val)); -+ } -+ - vq->used_idx = val; - } - -@@ -292,8 +320,13 @@ static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - VirtIODevice *vdev = vq->vdev; - hwaddr pa = offsetof(VRingUsed, flags); -- uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); -+ uint16_t flags; -+ -+ if (!caches) { -+ return; -+ } - -+ flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); - virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask); - address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); - } -@@ -304,8 +337,13 @@ static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) - VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); - VirtIODevice *vdev = vq->vdev; - hwaddr pa = offsetof(VRingUsed, flags); -- uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); -+ uint16_t flags; - -+ if (!caches) { -+ return; -+ } -+ -+ flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); - virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask); - address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); - } -@@ -320,6 +358,10 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) - } - - caches = vring_get_region_caches(vq); -+ if (!caches) { -+ return; -+ } -+ - pa = offsetof(VRingUsed, ring[vq->vring.num]); - virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); - address_space_cache_invalidate(&caches->used, pa, sizeof(val)); -@@ -626,6 +668,11 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - - max = vq->vring.num; - caches = vring_get_region_caches(vq); -+ if (!caches) { -+ virtio_error(vdev, "Region cached not initialized"); -+ goto err; -+ } -+ - if (caches->desc.len < max * sizeof(VRingDesc)) { - virtio_error(vdev, "Cannot map descriptor ring"); - goto err; -@@ -894,6 +941,11 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) - i = head; - - caches = vring_get_region_caches(vq); -+ if (!caches) { -+ virtio_error(vdev, "Region caches not initialized"); -+ goto done; -+ } -+ - if (caches->desc.len < max * sizeof(VRingDesc)) { - virtio_error(vdev, "Cannot map descriptor ring"); - goto done; --- -1.8.3.1 - diff --git a/virtio-input-fix-memory-leak-on-unrealize.patch b/virtio-input-fix-memory-leak-on-unrealize.patch deleted file mode 100644 index df83453f04525eb4b9fa29ba3d03dc6fa8b31fc5..0000000000000000000000000000000000000000 --- a/virtio-input-fix-memory-leak-on-unrealize.patch +++ /dev/null @@ -1,45 +0,0 @@ -From e29f08036ff11bf220463b4327b315505e760a44 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 21 Nov 2019 13:56:49 +0400 -Subject: [PATCH 9/9] virtio-input: fix memory leak on unrealize -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Spotted by ASAN + minor stylistic change. - -Signed-off-by: Marc-André Lureau -Reviewed-by: Michael S. Tsirkin -Message-Id: <20191121095649.25453-1-marcandre.lureau@redhat.com> -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Marc-André Lureau -Reviewed-by: Michael S. Tsirkin -Signed-off-by: AlexChen ---- - hw/input/virtio-input.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c -index 9946394..401c1de 100644 ---- a/hw/input/virtio-input.c -+++ b/hw/input/virtio-input.c -@@ -275,6 +275,7 @@ static void virtio_input_finalize(Object *obj) - - g_free(vinput->queue); - } -+ - static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) - { - VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev); -@@ -288,6 +289,8 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) - return; - } - } -+ virtio_del_queue(vdev, 0); -+ virtio_del_queue(vdev, 1); - virtio_cleanup(vdev); - } - --- -1.8.3.1 - diff --git a/virtio-irqfd-Batch-processing-of-irqfd-related-opera.patch b/virtio-irqfd-Batch-processing-of-irqfd-related-opera.patch new file mode 100644 index 0000000000000000000000000000000000000000..f5ee317e6bbe11af4db52cc994b14244f185022a --- /dev/null +++ b/virtio-irqfd-Batch-processing-of-irqfd-related-opera.patch @@ -0,0 +1,188 @@ +From d6e6ef58847bf34db9535649bf33e2a72b59495e Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 1 Apr 2025 17:28:02 +0800 +Subject: [PATCH] virtio/irqfd: Batch processing of irqfd related operations + during virtio device startup + +This patch adds batch processing for unmask operations + +Signed-off-by: libai +--- + hw/virtio/virtio-pci.c | 97 ++++++++++++++++++++++++++++++++++---- + include/hw/virtio/virtio.h | 1 + + 2 files changed, 89 insertions(+), 9 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index a677fa0736..558471307a 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -51,14 +51,86 @@ + + static KVMRouteChange virtio_pci_route_change; + +-static inline void virtio_pci_begin_route_changes(void) ++static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, ++ EventNotifier *n, ++ unsigned int vector); ++ ++static inline void virtio_pci_begin_route_changes(VirtIODevice *vdev) ++{ ++ if (!vdev->defer_kvm_irq_routing) { ++ virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); ++ } ++} ++ ++static inline void virtio_pci_commit_route_changes(VirtIODevice *vdev) + { ++ if (!vdev->defer_kvm_irq_routing) { ++ kvm_irqchip_commit_route_changes(&virtio_pci_route_change); ++ } ++} ++ ++static void virtio_pci_prepare_kvm_msi_virq_batch(VirtIOPCIProxy *proxy) ++{ ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ ++ if (vdev->defer_kvm_irq_routing) { ++ qemu_log("invaild defer kvm irq routing state: %d\n", vdev->defer_kvm_irq_routing); ++ return; ++ } + virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); ++ vdev->defer_kvm_irq_routing = true; + } + +-static inline void virtio_pci_commit_route_changes(void) ++static void virtio_pci_commit_kvm_msi_virq_batch(VirtIOPCIProxy *proxy) + { ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); ++ EventNotifier *n; ++ VirtQueue *vq; ++ int vector, index, ret; ++ ++ if (!vdev->defer_kvm_irq_routing) { ++ qemu_log("invaild defer kvm irq routing state: %d\n", vdev->defer_kvm_irq_routing); ++ return; ++ } ++ vdev->defer_kvm_irq_routing = false; + kvm_irqchip_commit_route_changes(&virtio_pci_route_change); ++ ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ return; ++ } ++ ++ for (vector = 0; vector < proxy->pci_dev.msix_entries_nr; vector++) { ++ if (msix_is_masked(&proxy->pci_dev, vector)) { ++ continue; ++ } ++ ++ if (vector == vdev->config_vector) { ++ n = virtio_config_get_guest_notifier(vdev); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); ++ if (ret) { ++ qemu_log("config irqfd use failed: %d\n", ret); ++ } ++ continue; ++ } ++ ++ vq = virtio_vector_first_queue(vdev, vector); ++ ++ while (vq) { ++ index = virtio_get_queue_index(vq); ++ if (!virtio_queue_get_num(vdev, index)) { ++ break; ++ } ++ if (index < proxy->nvqs_with_notifiers) { ++ n = virtio_queue_get_guest_notifier(vq); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); ++ if (ret < 0) { ++ qemu_log("Error: irqfd use failed: %d\n", ret); ++ } ++ } ++ vq = virtio_vector_next_queue(vq); ++ } ++ } + } + + static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, +@@ -959,15 +1031,17 @@ static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + kvm_create_shadow_device(&proxy->pci_dev); + } + #endif +- +- virtio_pci_begin_route_changes(); + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; + } ++ } ++ ++ virtio_pci_begin_route_changes(vdev); ++ for (queue_no = 0; queue_no < nvqs; queue_no++) { + ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } +- virtio_pci_commit_route_changes(); ++ virtio_pci_commit_route_changes(vdev); + + #ifdef __aarch64__ + if (!strcmp(vdev->name, "virtio-net") && ret != 0) { +@@ -1044,13 +1118,13 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + if (proxy->vector_irqfd) { + irqfd = &proxy->vector_irqfd[vector]; + if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { +- KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); +- ret = kvm_irqchip_update_msi_route(&c, irqfd->virq, msg, ++ virtio_pci_begin_route_changes(vdev); ++ ret = kvm_irqchip_update_msi_route(&virtio_pci_route_change, irqfd->virq, msg, + &proxy->pci_dev); + if (ret < 0) { + return ret; + } +- kvm_irqchip_commit_route_changes(&c); ++ virtio_pci_commit_route_changes(vdev); + } + } + +@@ -1065,7 +1139,9 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + event_notifier_set(n); + } + } else { +- ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); ++ if (!vdev->defer_kvm_irq_routing) { ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); ++ } + } + return ret; + } +@@ -1322,6 +1398,8 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + if ((with_irqfd || + (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && + assign) { ++ ++ virtio_pci_prepare_kvm_msi_virq_batch(proxy); + if (with_irqfd) { + proxy->vector_irqfd = + g_malloc0(sizeof(*proxy->vector_irqfd) * +@@ -1339,6 +1417,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, + virtio_pci_vector_mask, + virtio_pci_vector_poll); ++ virtio_pci_commit_kvm_msi_virq_batch(proxy); + if (r < 0) { + goto notifiers_error; + } +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 78db2bde98..672f7445dd 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -147,6 +147,7 @@ struct VirtIODevice + bool use_started; + bool started; + bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ ++ bool defer_kvm_irq_routing; + bool disable_legacy_check; + bool vhost_started; + VMChangeStateEntry *vmstate; +-- +2.41.0.windows.1 + diff --git a/virtio-net-Avoid-indirection_table_mask-overflow.patch b/virtio-net-Avoid-indirection_table_mask-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ac7af0b5f23d59e24e3b23f40c70ecfd0a99f2d --- /dev/null +++ b/virtio-net-Avoid-indirection_table_mask-overflow.patch @@ -0,0 +1,48 @@ +From cc875acdbf0ab210ce467f27c621fe7dc2159110 Mon Sep 17 00:00:00 2001 +From: zhangchujun +Date: Wed, 30 Oct 2024 10:57:05 +0800 +Subject: [PATCH] virtio-net: Avoid indirection_table_mask overflow + +We computes indirections_len by adding 1 to indirection_table_mask, but +it may overflow indirection_table_mask is UINT16_MAX. Check if +indirection_table_mask is small enough before adding 1. + +Fixes: 5907902 ("virtio-net: implement RSS configuration command") +Signed-off-by: Akihiko Odaki +Signed-off-by: Jason Wang +Signed-off-by: zhangchujun +--- + hw/net/virtio-net.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 432c433540..d5008b65ec 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1400,17 +1400,17 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, + n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); + n->rss_data.indirections_len = + virtio_lduw_p(vdev, &cfg.indirection_table_mask); +- n->rss_data.indirections_len++; + if (!do_rss) { +- n->rss_data.indirections_len = 1; ++ n->rss_data.indirections_len = 0; + } +- if (!is_power_of_2(n->rss_data.indirections_len)) { +- err_msg = "Invalid size of indirection table"; ++ if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) { ++ err_msg = "Too large indirection table"; + err_value = n->rss_data.indirections_len; + goto error; + } +- if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { +- err_msg = "Too large indirection table"; ++ n->rss_data.indirections_len++; ++ if (!is_power_of_2(n->rss_data.indirections_len)) { ++ err_msg = "Invalid size of indirection table"; + err_value = n->rss_data.indirections_len; + goto error; + } +-- +2.41.0.windows.1 + diff --git a/virtio-net-Ensure-queue-index-fits-with-RSS-CVE-2024.patch b/virtio-net-Ensure-queue-index-fits-with-RSS-CVE-2024.patch new file mode 100644 index 0000000000000000000000000000000000000000..abaa891643d2609030f45a184026532deba373c4 --- /dev/null +++ b/virtio-net-Ensure-queue-index-fits-with-RSS-CVE-2024.patch @@ -0,0 +1,36 @@ +From a8bc17bf7f94f684ba518c56e56b41974c50305e Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki +Date: Mon, 1 Jul 2024 20:58:04 +0900 +Subject: [PATCH] virtio-net: Ensure queue index fits with RSS (CVE-2024-6505) + +Ensure the queue index points to a valid queue when software RSS +enabled. The new calculation matches with the behavior of Linux's TAP +device with the RSS eBPF program. + +Fixes: 4474e37a5b3a ("virtio-net: implement RX RSS processing") +Reported-by: Zhibin Hu +Cc: qemu-stable@nongnu.org +Signed-off-by: Akihiko Odaki +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 91c1504544..432c433540 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1931,7 +1931,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { + int index = virtio_net_process_rss(nc, buf, size); + if (index >= 0) { +- NetClientState *nc2 = qemu_get_subqueue(n->nic, index); ++ NetClientState *nc2 = ++ qemu_get_subqueue(n->nic, index % n->curr_queue_pairs); + return virtio_net_receive_rcu(nc2, buf, size, true); + } + } +-- +2.41.0.windows.1 + diff --git a/virtio-net-Fix-network-stall-at-the-host-side-waitin.patch b/virtio-net-Fix-network-stall-at-the-host-side-waitin.patch new file mode 100644 index 0000000000000000000000000000000000000000..b3745947f2d926a9ae23fdc8eab1f203872cc064 --- /dev/null +++ b/virtio-net-Fix-network-stall-at-the-host-side-waitin.patch @@ -0,0 +1,339 @@ +From a481451a811877640a57ccbef2b33b39567f2802 Mon Sep 17 00:00:00 2001 +From: thomas +Date: Fri, 12 Jul 2024 11:10:53 +0800 +Subject: [PATCH] virtio-net: Fix network stall at the host side waiting for + kick + +commit f937309fbdbb48c354220a3e7110c202ae4aa7fa upstream. + +Patch 06b12970174 ("virtio-net: fix network stall under load") +added double-check to test whether the available buffer size +can satisfy the request or not, in case the guest has added +some buffers to the avail ring simultaneously after the first +check. It will be lucky if the available buffer size becomes +okay after the double-check, then the host can send the packet +to the guest. If the buffer size still can't satisfy the request, +even if the guest has added some buffers, viritio-net would +stall at the host side forever. + +The patch enables notification and checks whether the guest has +added some buffers since last check of available buffers when +the available buffers are insufficient. If no buffer is added, +return false, else recheck the available buffers in the loop. +If the available buffers are sufficient, disable notification +and return true. + +Changes: +1. Change the return type of virtqueue_get_avail_bytes() from void + to int, it returns an opaque that represents the shadow_avail_idx + of the virtqueue on success, else -1 on error. +2. Add a new API: virtio_queue_enable_notification_and_check(), + it takes an opaque as input arg which is returned from + virtqueue_get_avail_bytes(). It enables notification firstly, + then checks whether the guest has added some buffers since + last check of available buffers or not by virtio_queue_poll(), + return ture if yes. + +The patch also reverts patch "06b12970174". + +The case below can reproduce the stall. + + Guest 0 + +--------+ + | iperf | + ---------------> | server | + Host | +--------+ + +--------+ | ... + | iperf |---- + | client |---- Guest n + +--------+ | +--------+ + | | iperf | + ---------------> | server | + +--------+ + +Boot many guests from qemu with virtio network: + qemu ... -netdev tap,id=net_x \ + -device virtio-net-pci-non-transitional,\ + iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x + +Each guest acts as iperf server with commands below: + iperf3 -s -D -i 10 -p 8001 + iperf3 -s -D -i 10 -p 8002 + +The host as iperf client: + iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 40000 + iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 40000 + +After some time, the host loses connection to the guest, +the guest can send packet to the host, but can't receive +packet from the host. + +It's more likely to happen if SWIOTLB is enabled in the guest, +allocating and freeing bounce buffer takes some CPU ticks, +copying from/to bounce buffer takes more CPU ticks, compared +with that there is no bounce buffer in the guest. +Once the rate of producing packets from the host approximates +the rate of receiveing packets in the guest, the guest would +loop in NAPI. + + receive packets --- + | | + v | + free buf virtnet_poll + | | + v | + add buf to avail ring --- + | + | need kick the host? + | NAPI continues + v + receive packets --- + | | + v | + free buf virtnet_poll + | | + v | + add buf to avail ring --- + | + v + ... ... + +On the other hand, the host fetches free buf from avail +ring, if the buf in the avail ring is not enough, the +host notifies the guest the event by writing the avail +idx read from avail ring to the event idx of used ring, +then the host goes to sleep, waiting for the kick signal +from the guest. + +Once the guest finds the host is waiting for kick singal +(in virtqueue_kick_prepare_split()), it kicks the host. + +The host may stall forever at the sequences below: + + Host Guest + ------------ ----------- + fetch buf, send packet receive packet --- + ... ... | + fetch buf, send packet add buf | + ... add buf virtnet_poll + buf not enough avail idx-> add buf | + read avail idx add buf | + add buf --- + receive packet --- + write event idx ... | + wait for kick add buf virtnet_poll + ... | + --- + no more packet, exit NAPI + +In the first loop of NAPI above, indicated in the range of +virtnet_poll above, the host is sending packets while the +guest is receiving packets and adding buffers. + step 1: The buf is not enough, for example, a big packet + needs 5 buf, but the available buf count is 3. + The host read current avail idx. + step 2: The guest adds some buf, then checks whether the + host is waiting for kick signal, not at this time. + The used ring is not empty, the guest continues + the second loop of NAPI. + step 3: The host writes the avail idx read from avail + ring to used ring as event idx via + virtio_queue_set_notification(q->rx_vq, 1). + step 4: At the end of the second loop of NAPI, recheck + whether kick is needed, as the event idx in the + used ring written by the host is beyound the + range of kick condition, the guest will not + send kick signal to the host. + +Fixes: 06b12970174 ("virtio-net: fix network stall under load") +Cc: qemu-stable@nongnu.org +Signed-off-by: Wencheng Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 28 ++++++++++------- + hw/virtio/virtio.c | 64 +++++++++++++++++++++++++++++++++++--- + include/hw/virtio/virtio.h | 19 +++++++++-- + 3 files changed, 92 insertions(+), 19 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index c9c83fe297..7184c9c526 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1662,24 +1662,28 @@ static bool virtio_net_can_receive(NetClientState *nc) + + static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) + { ++ int opaque; ++ unsigned int in_bytes; + VirtIONet *n = q->n; +- if (virtio_queue_empty(q->rx_vq) || +- (n->mergeable_rx_bufs && +- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { +- virtio_queue_set_notification(q->rx_vq, 1); +- +- /* To avoid a race condition where the guest has made some buffers +- * available after the above check but before notification was +- * enabled, check for available buffers again. +- */ +- if (virtio_queue_empty(q->rx_vq) || +- (n->mergeable_rx_bufs && +- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { ++ ++ while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { ++ opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, ++ bufsize, 0); ++ /* Buffer is enough, disable notifiaction */ ++ if (bufsize <= in_bytes) { ++ break; ++ } ++ ++ if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { ++ /* Guest has added some buffers, try again */ ++ continue; ++ } else { + return 0; + } + } + + virtio_queue_set_notification(q->rx_vq, 0); ++ + return 1; + } + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 8c3b6b87aa..4f5b241fd3 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -743,6 +743,60 @@ int virtio_queue_empty(VirtQueue *vq) + } + } + ++static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx) ++{ ++ if (unlikely(!vq->vring.avail)) { ++ return false; ++ } ++ ++ return (uint16_t)shadow_idx != vring_avail_idx(vq); ++} ++ ++static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx) ++{ ++ VRingPackedDesc desc; ++ VRingMemoryRegionCaches *caches; ++ ++ if (unlikely(!vq->vring.desc)) { ++ return false; ++ } ++ ++ caches = vring_get_region_caches(vq); ++ if (!caches) { ++ return false; ++ } ++ ++ vring_packed_desc_read(vq->vdev, &desc, &caches->desc, ++ shadow_idx, true); ++ ++ return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter); ++} ++ ++static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx) ++{ ++ if (virtio_device_disabled(vq->vdev)) { ++ return false; ++ } ++ ++ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { ++ return virtio_queue_packed_poll(vq, shadow_idx); ++ } else { ++ return virtio_queue_split_poll(vq, shadow_idx); ++ } ++} ++ ++bool virtio_queue_enable_notification_and_check(VirtQueue *vq, ++ int opaque) ++{ ++ virtio_queue_set_notification(vq, 1); ++ ++ if (opaque >= 0) { ++ return virtio_queue_poll(vq, (unsigned)opaque); ++ } else { ++ return false; ++ } ++} ++ + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) + { +@@ -1322,9 +1376,9 @@ err: + goto done; + } + +-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, +- unsigned int *out_bytes, +- unsigned max_in_bytes, unsigned max_out_bytes) ++int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, ++ unsigned int *out_bytes, unsigned max_in_bytes, ++ unsigned max_out_bytes) + { + uint16_t desc_size; + VRingMemoryRegionCaches *caches; +@@ -1357,7 +1411,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + caches); + } + +- return; ++ return (int)vq->shadow_avail_idx; + err: + if (in_bytes) { + *in_bytes = 0; +@@ -1365,6 +1419,8 @@ err: + if (out_bytes) { + *out_bytes = 0; + } ++ ++ return -1; + } + + int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 60494aed62..78db2bde98 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -273,9 +273,13 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, + VirtQueueElement *elem); + int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, + unsigned int out_bytes); +-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, +- unsigned int *out_bytes, +- unsigned max_in_bytes, unsigned max_out_bytes); ++/** ++ * Return <0 on error or an opaque >=0 to pass to ++ * virtio_queue_enable_notification_and_check on success. ++ */ ++int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, ++ unsigned int *out_bytes, unsigned max_in_bytes, ++ unsigned max_out_bytes); + + void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq); + void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); +@@ -309,6 +313,15 @@ int virtio_queue_ready(VirtQueue *vq); + + int virtio_queue_empty(VirtQueue *vq); + ++/** ++ * Enable notification and check whether guest has added some ++ * buffers since last call to virtqueue_get_avail_bytes. ++ * ++ * @opaque: value returned from virtqueue_get_avail_bytes ++ */ ++bool virtio_queue_enable_notification_and_check(VirtQueue *vq, ++ int opaque); ++ + /* Host binding interface. */ + + uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr); +-- +2.41.0.windows.1 + diff --git a/virtio-net-Fix-num_buffers-for-version-1.patch b/virtio-net-Fix-num_buffers-for-version-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..764af99b0e0e82abc40283aa9984400490b09529 --- /dev/null +++ b/virtio-net-Fix-num_buffers-for-version-1.patch @@ -0,0 +1,41 @@ +From a9f926fc672b06739f6feed770187d705d7d3e6c Mon Sep 17 00:00:00 2001 +From: lijunwei +Date: Tue, 1 Jul 2025 17:55:18 +0800 +Subject: [PATCH] virtio-net: Fix num_buffers for version 1 + +The specification says the device MUST set num_buffers to 1 if +VIRTIO_NET_F_MRG_RXBUF has not been negotiated. + +Fixes: df91055db5c9 ("virtio-net: enable virtio 1.0") +Signed-off-by: Akihiko Odaki +Message-Id: <20250108-buffers-v1-1-a0c85ff31aeb@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit c17ad4b11bd268a35506cd976884562df6ca69d7) +(Mjt: adjust for 8.2.x) +Signed-off-by: Michael Tokarev +--- + hw/net/virtio-net.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 7184c9c526..25044385dc 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1996,7 +1996,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + sg, elem->in_num, + offsetof(typeof(mhdr), num_buffers), + sizeof(mhdr.num_buffers)); +- } ++ }else { ++ mhdr.num_buffers = cpu_to_le16(1); ++ } + + receive_header(n, sg, elem->in_num, buf, size); + if (n->rss_data.populate_hash) { +-- +2.33.0 + diff --git a/virtio-net-Use-virtual-time-for-RSC-timers.patch b/virtio-net-Use-virtual-time-for-RSC-timers.patch new file mode 100644 index 0000000000000000000000000000000000000000..abb744c48df1c84241d674738fdea8f9ba6895cc --- /dev/null +++ b/virtio-net-Use-virtual-time-for-RSC-timers.patch @@ -0,0 +1,58 @@ +From 8f6c35e3acb54208564fcb773cf79809d7412cf5 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 20 Aug 2024 09:48:42 +0800 +Subject: [PATCH] virtio-net: Use virtual time for RSC timers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 44bc14fa1e78f01bfddcb265fc41c29204ebbfd8 + +Receive coalescing is visible to the target machine, so its timers +should use virtual time like other timers in virtio-net, to be +compatible with record-replay. + +Signed-off-by: Nicholas Piggin +Message-Id: <20240813050638.446172-10-npiggin@gmail.com> +Acked-by: Michael S. Tsirkin +Signed-off-by: Alex Bennée +Message-Id: <20240813202329.1237572-18-alex.bennee@linaro.org> +Signed-off-by: qihao_yewu +--- + hw/net/virtio-net.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index c0a54f2d61..91c1504544 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2141,7 +2141,7 @@ static void virtio_net_rsc_purge(void *opq) + chain->stat.timer++; + if (!QTAILQ_EMPTY(&chain->buffers)) { + timer_mod(chain->drain_timer, +- qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); ++ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); + } + } + +@@ -2377,7 +2377,7 @@ static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, + chain->stat.empty_cache++; + virtio_net_rsc_cache_buf(chain, nc, buf, size); + timer_mod(chain->drain_timer, +- qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); ++ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); + return size; + } + +@@ -2615,7 +2615,7 @@ static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, + chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; + chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + } +- chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, ++ chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + virtio_net_rsc_purge, chain); + memset(&chain->stat, 0, sizeof(chain->stat)); + +-- +2.41.0.windows.1 + diff --git a/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch b/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch new file mode 100644 index 0000000000000000000000000000000000000000..e33cf68d855ec417f92778a3c5cea4b5da32e4d7 --- /dev/null +++ b/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch @@ -0,0 +1,38 @@ +From 3cd74fd83d58aa88f9a006980c73844d6b79d1fb Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 10:31:38 +0800 +Subject: [PATCH] virtio-net: bugfix: do not delete netdev before virtio net + +For the vhost-user net-card, it is allow to delete its +network backend while the virtio-net device still exists. +However, when the status of the device changes in guest, +QEMU will check whether the network backend exists, otherwise +it will crash. +So do not allowed to delete the network backend directly +without delete virtio-net device. + +Signed-off-by: Jinhua Cao +--- + net/net.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/net.c b/net/net.c +index 0520bc1681..bcd3d7e04c 100644 +--- a/net/net.c ++++ b/net/net.c +@@ -1322,6 +1322,12 @@ void qmp_netdev_del(const char *id, Error **errp) + return; + } + ++ if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER && nc->peer) { ++ error_setg(errp, "Device '%s' is a netdev for vhostuser," ++ "please delete the peer front-end device (virtio-net) first.", id); ++ return; ++ } ++ + qemu_del_net_client(nc); + + /* +-- +2.27.0 + diff --git a/virtio-net-correctly-copy-vnet-header-when-flushing-.patch b/virtio-net-correctly-copy-vnet-header-when-flushing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b381e1024885a043e43e688f319cd4b44cc34a4c --- /dev/null +++ b/virtio-net-correctly-copy-vnet-header-when-flushing-.patch @@ -0,0 +1,72 @@ +From 912641a75955a75f37ab8695a0753b1571762717 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 2 Jan 2024 11:29:01 +0800 +Subject: [PATCH] virtio-net: correctly copy vnet header when flushing TX + (CVE-2023-6693) + +When HASH_REPORT is negotiated, the guest_hdr_len might be larger than +the size of the mergeable rx buffer header. Using +virtio_net_hdr_mrg_rxbuf during the header swap might lead a stack +overflow in this case. Fixing this by using virtio_net_hdr_v1_hash +instead. + +Reported-by: Xiao Lei +Cc: Yuri Benditovich +Cc: qemu-stable@nongnu.org +Cc: Mauro Matteo Cascella +Fixes: CVE-2023-6693 +Fixes: e22f0603fb2f ("virtio-net: reference implementation of hash report") +Reviewed-by: Michael Tokarev +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 80c56f0cfc..73024babd4 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -674,6 +674,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + + n->mergeable_rx_bufs = mergeable_rx_bufs; + ++ /* ++ * Note: when extending the vnet header, please make sure to ++ * change the vnet header copying logic in virtio_net_flush_tx() ++ * as well. ++ */ + if (version_1) { + n->guest_hdr_len = hash_report ? + sizeof(struct virtio_net_hdr_v1_hash) : +@@ -2693,7 +2698,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + ssize_t ret; + unsigned int out_num; + struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; +- struct virtio_net_hdr_mrg_rxbuf mhdr; ++ struct virtio_net_hdr_v1_hash vhdr; + + elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); + if (!elem) { +@@ -2710,7 +2715,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + } + + if (n->has_vnet_hdr) { +- if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < ++ if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) < + n->guest_hdr_len) { + virtio_error(vdev, "virtio-net header incorrect"); + virtqueue_detach_element(q->tx_vq, elem, 0); +@@ -2718,8 +2723,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + return -EINVAL; + } + if (n->needs_vnet_hdr_swap) { +- virtio_net_hdr_swap(vdev, (void *) &mhdr); +- sg2[0].iov_base = &mhdr; ++ virtio_net_hdr_swap(vdev, (void *) &vhdr); ++ sg2[0].iov_base = &vhdr; + sg2[0].iov_len = n->guest_hdr_len; + out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, + out_sg, out_num, +-- +2.27.0 + diff --git a/virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/virtio-net-delete-also-control-queue-when-TX-RX-dele.patch deleted file mode 100644 index f955fbb1a7f6b0ae8e646a366d3c6401cf699788..0000000000000000000000000000000000000000 --- a/virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 358e2bfe2e1a65b1e926163d7d1ffaefd601d874 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:31 +0000 -Subject: [PATCH] virtio-net: delete also control queue when TX/RX deleted - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-5-jusual@redhat.com> -Patchwork-id: 93983 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: Yuri Benditovich - -https://bugzilla.redhat.com/show_bug.cgi?id=1708480 -If the control queue is not deleted together with TX/RX, it -later will be ignored in freeing cache resources and hot -unplug will not be completed. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Yuri Benditovich -Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 6adb0fe252..63f1bae99c 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -2803,7 +2803,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) - for (i = 0; i < max_queues; i++) { - virtio_net_del_queue(n, i); - } -- -+ /* delete also control vq */ -+ virtio_del_queue(vdev, max_queues * 2); - qemu_announce_timer_del(&n->announce_timer, false); - g_free(n->vqs); - qemu_del_nic(n->nic); --- -2.27.0 - diff --git a/virtio-net-drop-too-short-packets-early.patch b/virtio-net-drop-too-short-packets-early.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffe4c0bfd72869e2700ce2d5a2331207c269c155 --- /dev/null +++ b/virtio-net-drop-too-short-packets-early.patch @@ -0,0 +1,91 @@ +From 5651eb5cfd3a49506be4be97f8def3fed713c641 Mon Sep 17 00:00:00 2001 +From: Alexey Dobriyan +Date: Tue, 30 Apr 2024 13:53:33 +0300 +Subject: [PATCH] virtio-net: drop too short packets early + +Reproducer from https://gitlab.com/qemu-project/qemu/-/issues/1451 +creates small packet (1 segment, len = 10 == n->guest_hdr_len), +then destroys queue. + +"if (n->host_hdr_len != n->guest_hdr_len)" is triggered, if body creates +zero length/zero segment packet as there is nothing after guest header. + +qemu_sendv_packet_async() tries to send it. + +slirp discards it because it is smaller than Ethernet header, +but returns 0 because tx hooks are supposed to return total length of data. + +0 is propagated upwards and is interpreted as "packet has been sent" +which is terrible because queue is being destroyed, nobody is waiting for TX +to complete and assert it triggered. + +Fix is discard such empty packets instead of sending them. + +Length 1 packets will go via different codepath: + + virtqueue_push(q->tx_vq, elem, 0); + virtio_notify(vdev, q->tx_vq); + g_free(elem); + +and aren't problematic. + +Signed-off-by: Alexey Dobriyan +Signed-off-by: Jason Wang +(cherry picked from commit 2c3e4e2de699cd4d9f6c71f30a22d8f125cd6164) +Signed-off-by: zhujun2 +--- + hw/net/virtio-net.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 432c433540..b17137a686 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2732,18 +2732,14 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + out_sg = elem->out_sg; + if (out_num < 1) { + virtio_error(vdev, "virtio-net header not in first element"); +- virtqueue_detach_element(q->tx_vq, elem, 0); +- g_free(elem); +- return -EINVAL; ++ goto detach; + } + + if (n->has_vnet_hdr) { + if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) < + n->guest_hdr_len) { + virtio_error(vdev, "virtio-net header incorrect"); +- virtqueue_detach_element(q->tx_vq, elem, 0); +- g_free(elem); +- return -EINVAL; ++ goto detach; + } + if (n->needs_vnet_hdr_swap) { + virtio_net_hdr_swap(vdev, (void *) &vhdr); +@@ -2774,6 +2770,11 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + n->guest_hdr_len, -1); + out_num = sg_num; + out_sg = sg; ++ ++ if (out_num < 1) { ++ virtio_error(vdev, "virtio-net nothing to send"); ++ goto detach; ++ } + } + + ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), +@@ -2794,6 +2795,11 @@ drop: + } + } + return num_packets; ++ ++detach: ++ virtqueue_detach_element(q->tx_vq, elem, 0); ++ g_free(elem); ++ return -EINVAL; + } + + static void virtio_net_tx_timer(void *opaque); +-- +2.41.0.windows.1 + diff --git a/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch b/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb492879329a7e56b5fee12ccd67e36f15798987 --- /dev/null +++ b/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch @@ -0,0 +1,52 @@ +From 4321c9f8b85c6a4c1549399aa11e351b66bd1879 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 10:48:27 +0800 +Subject: [PATCH] virtio-net: fix max vring buf size when set ring num + +Set the max vring buf size of virtio-net devices to 4096 + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 9 +++++++-- + include/hw/virtio/virtio.h | 1 + + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d93ea62723..267c1e6fd0 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2196,12 +2196,17 @@ void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, + + void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) + { ++ int vq_max_size = VIRTQUEUE_MAX_SIZE; ++ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ vq_max_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ + /* Don't allow guest to flip queue between existent and + * nonexistent states, or to set it to an invalid size. + */ + if (!!num != !!vdev->vq[n].vring.num || +- num > VIRTQUEUE_MAX_SIZE || +- num < 0) { ++ num > vq_max_size || num < 0) { + return; + } + vdev->vq[n].vring.num = num; +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 7c35bb841b..e612441357 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -60,6 +60,7 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, + typedef struct VirtQueue VirtQueue; + + #define VIRTQUEUE_MAX_SIZE 1024 ++#define VIRTIO_NET_VQ_MAX_SIZE (4096) + + typedef struct VirtQueueElement + { +-- +2.27.0 + diff --git a/virtio-net-prevent-offloads-reset-on-migration.patch b/virtio-net-prevent-offloads-reset-on-migration.patch deleted file mode 100644 index ab8fbe26115279359c6a3928e93bf134ca88a2cb..0000000000000000000000000000000000000000 --- a/virtio-net-prevent-offloads-reset-on-migration.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 4887acf574a573137660aa98d9d422ece0a41a5a Mon Sep 17 00:00:00 2001 -From: Mikhail Sennikovsky -Date: Fri, 11 Oct 2019 15:58:04 +0200 -Subject: [PATCH] virtio-net: prevent offloads reset on migration - -Currently offloads disabled by guest via the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET -command are not preserved on VM migration. -Instead all offloads reported by guest features (via VIRTIO_PCI_GUEST_FEATURES) -get enabled. -What happens is: first the VirtIONet::curr_guest_offloads gets restored and offloads -are getting set correctly: - - #0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=0, tso6=0, ecn=0, ufo=0) at net/net.c:474 - #1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720 - #2 virtio_net_post_load_device (opaque=0x555557701ca0, version_id=11) at hw/net/virtio-net.c:2334 - #3 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577c80 , opaque=0x555557701ca0, version_id=11) - at migration/vmstate.c:168 - #4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2197 - #5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036 - #6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 , opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143 - #7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829 - #8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211 - #9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395 - #10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467 - #11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449 - -However later on the features are getting restored, and offloads get reset to -everything supported by features: - - #0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/net.c:474 - #1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720 - #2 virtio_net_set_features (vdev=0x555557701ca0, features=5104441767) at hw/net/virtio-net.c:773 - #3 virtio_set_features_nocheck (vdev=0x555557701ca0, val=5104441767) at hw/virtio/virtio.c:2052 - #4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2220 - #5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036 - #6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 , opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143 - #7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829 - #8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211 - #9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395 - #10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467 - #11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449 - -Fix this by preserving the state in saved_guest_offloads field and -pushing out offload initialization to the new post load hook. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Mikhail Sennikovsky -Signed-off-by: Jason Wang -(cherry picked from commit 7788c3f2e21e35902d45809b236791383bbb613e) -Signed-off-by: Michael Roth ---- - hw/net/virtio-net.c | 27 ++++++++++++++++++++++++--- - include/hw/virtio/virtio-net.h | 2 ++ - 2 files changed, 26 insertions(+), 3 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index b9e1cd71cf..6adb0fe252 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -2330,9 +2330,13 @@ static int virtio_net_post_load_device(void *opaque, int version_id) - n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); - } - -- if (peer_has_vnet_hdr(n)) { -- virtio_net_apply_guest_offloads(n); -- } -+ /* -+ * curr_guest_offloads will be later overwritten by the -+ * virtio_set_features_nocheck call done from the virtio_load. -+ * Here we make sure it is preserved and restored accordingly -+ * in the virtio_net_post_load_virtio callback. -+ */ -+ n->saved_guest_offloads = n->curr_guest_offloads; - - virtio_net_set_queues(n); - -@@ -2367,6 +2371,22 @@ static int virtio_net_post_load_device(void *opaque, int version_id) - return 0; - } - -+static int virtio_net_post_load_virtio(VirtIODevice *vdev) -+{ -+ VirtIONet *n = VIRTIO_NET(vdev); -+ /* -+ * The actual needed state is now in saved_guest_offloads, -+ * see virtio_net_post_load_device for detail. -+ * Restore it back and apply the desired offloads. -+ */ -+ n->curr_guest_offloads = n->saved_guest_offloads; -+ if (peer_has_vnet_hdr(n)) { -+ virtio_net_apply_guest_offloads(n); -+ } -+ -+ return 0; -+} -+ - /* tx_waiting field of a VirtIONetQueue */ - static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { - .name = "virtio-net-queue-tx_waiting", -@@ -2909,6 +2929,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) - vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; - vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; - vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); -+ vdc->post_load = virtio_net_post_load_virtio; - vdc->vmsd = &vmstate_virtio_net_device; - } - -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index b96f0c643f..07a9319f4b 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -182,6 +182,8 @@ struct VirtIONet { - char *netclient_name; - char *netclient_type; - uint64_t curr_guest_offloads; -+ /* used on saved state restore phase to preserve the curr_guest_offloads */ -+ uint64_t saved_guest_offloads; - AnnounceTimer announce_timer; - bool needs_vnet_hdr_swap; - bool mtu_bypass_backend; --- -2.23.0 diff --git a/virtio-net-set-the-max-of-queue-size-to-4096.patch b/virtio-net-set-the-max-of-queue-size-to-4096.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e3e067ffaaf7ebad67639c9db209050a29f943c --- /dev/null +++ b/virtio-net-set-the-max-of-queue-size-to-4096.patch @@ -0,0 +1,58 @@ +From 58fe483bf5824db177843675629ed955051078fd Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Sat, 12 Feb 2022 17:22:38 +0800 +Subject: [PATCH] virtio-net: set the max of queue size to 4096 + +set the max of virtio-net queue size to 4096. Now the +queue_size of virtio-net is set by rx_queue_size and +tx_queue_size + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 5 +++-- + hw/virtio/virtio.c | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 7f69a4b842..0ae2ddc002 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -710,6 +710,7 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) + + switch(peer->info->type) { + case NET_CLIENT_DRIVER_VHOST_USER: ++ return VIRTIO_NET_VQ_MAX_SIZE; + case NET_CLIENT_DRIVER_VHOST_VDPA: + return VIRTQUEUE_MAX_SIZE; + default: +@@ -3638,12 +3639,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + * help from us (using virtio 1 and up). + */ + if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || +- n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || ++ n->net_conf.rx_queue_size > VIRTIO_NET_VQ_MAX_SIZE || + !is_power_of_2(n->net_conf.rx_queue_size)) { + error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " + "must be a power of 2 between %d and %d.", + n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, +- VIRTQUEUE_MAX_SIZE); ++ VIRTIO_NET_VQ_MAX_SIZE); + virtio_cleanup(vdev); + return; + } +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 267c1e6fd0..d00effe4d5 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2338,7 +2338,7 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + break; + } + +- if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) { ++ if (i == VIRTIO_QUEUE_MAX) { + qemu_log("unacceptable queue_size (%d) or num (%d)\n", + queue_size, i); + abort(); +-- +2.27.0 + diff --git a/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch b/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch new file mode 100644 index 0000000000000000000000000000000000000000..9817a2280cf9b883781794f0a4cc442c01bca77d --- /dev/null +++ b/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch @@ -0,0 +1,110 @@ +From c2221815b79be9847c4729709809779b4b0550a7 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 17:28:49 +0800 +Subject: [PATCH] virtio-net: update the default and max of rx/tx_queue_size + +Set the max of tx_queue_size to 4096 even if the backends +are not vhost-user. + +Set the default of rx/tx_queue_size to 2048 if the backends +are vhost-user, otherwise to 4096. + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 43 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 32 insertions(+), 11 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 0ae2ddc002..523d01746d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -50,12 +50,11 @@ + #define VIRTIO_NET_VM_VERSION 11 + + /* previously fixed value */ +-#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 +-#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 ++#define VIRTIO_NET_VHOST_USER_DEFAULT_SIZE 2048 + + /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ +-#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE +-#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE ++#define VIRTIO_NET_RX_QUEUE_MIN_SIZE 256 ++#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256 + + #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ + +@@ -696,6 +695,28 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + } + } + ++static void virtio_net_set_default_queue_size(VirtIONet *n) ++{ ++ NetClientState *peer = n->nic_conf.peers.ncs[0]; ++ ++ /* Default value is 0 if not set */ ++ if (n->net_conf.rx_queue_size == 0) { ++ if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { ++ n->net_conf.rx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; ++ } else { ++ n->net_conf.rx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ } ++ ++ if (n->net_conf.tx_queue_size == 0) { ++ if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { ++ n->net_conf.tx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; ++ } else { ++ n->net_conf.tx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ } ++} ++ + static int virtio_net_max_tx_queue_size(VirtIONet *n) + { + NetClientState *peer = n->nic_conf.peers.ncs[0]; +@@ -705,16 +726,16 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) + * size. + */ + if (!peer) { +- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + } + + switch(peer->info->type) { + case NET_CLIENT_DRIVER_VHOST_USER: + return VIRTIO_NET_VQ_MAX_SIZE; + case NET_CLIENT_DRIVER_VHOST_VDPA: +- return VIRTQUEUE_MAX_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + default: +- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + }; + } + +@@ -3633,6 +3654,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + virtio_net_set_config_size(n, n->host_features); + virtio_init(vdev, VIRTIO_ID_NET, n->config_size); + ++ virtio_net_set_default_queue_size(n); ++ + /* + * We set a lower limit on RX queue size to what it always was. + * Guests that want a smaller ring can always resize it without +@@ -3934,10 +3957,8 @@ static Property virtio_net_properties[] = { + TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), +- DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, +- VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), +- DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, +- VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), ++ DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 0), ++ DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 0), + DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), + DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, + true), +-- +2.27.0 + diff --git a/virtio-new-post_load-hook.patch b/virtio-new-post_load-hook.patch deleted file mode 100644 index 974f286c6730c66cc3cb0a64b046bea341dd262b..0000000000000000000000000000000000000000 --- a/virtio-new-post_load-hook.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 8010d3fce008dd13f155bc0babfe236ea44a2712 Mon Sep 17 00:00:00 2001 -From: "Michael S. Tsirkin" -Date: Fri, 11 Oct 2019 15:58:03 +0200 -Subject: [PATCH] virtio: new post_load hook - -Post load hook in virtio vmsd is called early while device is processed, -and when VirtIODevice core isn't fully initialized. Most device -specific code isn't ready to deal with a device in such state, and -behaves weirdly. - -Add a new post_load hook in a device class instead. Devices should use -this unless they specifically want to verify the migration stream as -it's processed, e.g. for bounds checking. - -Cc: qemu-stable@nongnu.org -Suggested-by: "Dr. David Alan Gilbert" -Cc: Mikhail Sennikovsky -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Jason Wang -(cherry picked from commit 1dd713837cac8ec5a97d3b8492d72ce5ac94803c) -Signed-off-by: Michael Roth ---- - hw/virtio/virtio.c | 7 +++++++ - include/hw/virtio/virtio.h | 6 ++++++ - 2 files changed, 13 insertions(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index a94ea18a9c..7c3822c3a0 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2287,6 +2287,13 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) - } - rcu_read_unlock(); - -+ if (vdc->post_load) { -+ ret = vdc->post_load(vdev); -+ if (ret) { -+ return ret; -+ } -+ } -+ - return 0; - } - -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index b189788cb2..f9f62370e9 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -158,6 +158,12 @@ typedef struct VirtioDeviceClass { - */ - void (*save)(VirtIODevice *vdev, QEMUFile *f); - int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id); -+ /* Post load hook in vmsd is called early while device is processed, and -+ * when VirtIODevice isn't fully initialized. Devices should use this instead, -+ * unless they specifically want to verify the migration stream as it's -+ * processed, e.g. for bounds checking. -+ */ -+ int (*post_load)(VirtIODevice *vdev); - const VMStateDescription *vmsd; - } VirtioDeviceClass; - --- -2.23.0 diff --git a/virtio-pci-Batch-processing-of-IRQFD-mapping-for-mul.patch b/virtio-pci-Batch-processing-of-IRQFD-mapping-for-mul.patch new file mode 100644 index 0000000000000000000000000000000000000000..3d11e48ac2ff8ea41d796effac111d4b320a3a53 --- /dev/null +++ b/virtio-pci-Batch-processing-of-IRQFD-mapping-for-mul.patch @@ -0,0 +1,68 @@ +From c709e84c483af5466b9bf1d289a70813942da7e0 Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 1 Apr 2025 15:33:54 +0800 +Subject: [PATCH] virtio-pci:Batch processing of IRQFD mapping for multi queue + Virtio devices + +The virtio device with multiple queues currently calls ioctl every time it establishes an irqfd route for vq. Since the devices will not actually run until all queue irqfds are completed, these irqfd routes can be temporarily stored and submitted to kvm through ioctl at once to reduce the number of ioctl attempts and optimize the startup speed of virtio devices. + +Signed-off-by: libai +--- + hw/virtio/virtio-pci.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 06b125ec62..7cd15f70e3 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -49,6 +49,18 @@ + * configuration space */ + #define VIRTIO_PCI_CONFIG_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev)) + ++static KVMRouteChange virtio_pci_route_change; ++ ++static inline void virtio_pci_begin_route_changes(void) ++{ ++ virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); ++} ++ ++static inline void virtio_pci_commit_route_changes(void) ++{ ++ kvm_irqchip_commit_route_changes(&virtio_pci_route_change); ++} ++ + static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, + VirtIOPCIProxy *dev); + static void virtio_pci_reset(DeviceState *qdev); +@@ -815,12 +827,10 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, + int ret; + + if (irqfd->users == 0) { +- KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); +- ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); ++ ret = kvm_irqchip_add_msi_route(&virtio_pci_route_change, vector, &proxy->pci_dev); + if (ret < 0) { + return ret; + } +- kvm_irqchip_commit_route_changes(&c); + irqfd->virq = ret; + } + irqfd->users++; +@@ -950,12 +960,14 @@ static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + } + #endif + ++ virtio_pci_begin_route_changes(); + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; + } + ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } ++ virtio_pci_commit_route_changes(); + + #ifdef __aarch64__ + if (!strcmp(vdev->name, "virtio-net") && ret != 0) { +-- +2.41.0.windows.1 + diff --git a/virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch b/virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc84f24743a3329dc072acf35b335cbdccec5434 --- /dev/null +++ b/virtio-pci-Fix-the-use-of-an-uninitialized-irqfd.patch @@ -0,0 +1,80 @@ +From 9cd544b83ccd37b9dd7977717a245437533830cd Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Tue, 6 Aug 2024 17:37:12 +0800 +Subject: [PATCH] virtio-pci: Fix the use of an uninitialized irqfd + +The crash was reported in MAC OS and NixOS, here is the link for this bug +https://gitlab.com/qemu-project/qemu/-/issues/2334 +https://gitlab.com/qemu-project/qemu/-/issues/2321 + +In this bug, they are using the virtio_input device. The guest notifier was +not supported for this device, The function virtio_pci_set_guest_notifiers() +was not called, and the vector_irqfd was not initialized. + +So the fix is adding the check for vector_irqfd in virtio_pci_get_notifier() + +The function virtio_pci_get_notifier() can be used in various devices. +It could also be called when VIRTIO_CONFIG_S_DRIVER_OK is not set. In this situation, +the vector_irqfd being NULL is acceptable. We can allow the device continue to boot + +If the vector_irqfd still hasn't been initialized after VIRTIO_CONFIG_S_DRIVER_OK +is set, it means that the function set_guest_notifiers was not called before the +driver started. This indicates that the device is not using the notifier. +At this point, we will let the check fail. + +This fix is verified in vyatta,MacOS,NixOS,fedora system. + +The bt tree for this bug is: +Thread 6 "CPU 0/KVM" received signal SIGSEGV, Segmentation fault. +[Switching to Thread 0x7c817be006c0 (LWP 1269146)] +kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817 +817 if (irqfd->users == 0) { +(gdb) thread apply all bt +... +Thread 6 (Thread 0x7c817be006c0 (LWP 1269146) "CPU 0/KVM"): +0 kvm_virtio_pci_vq_vector_use () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:817 +1 kvm_virtio_pci_vector_use_one () at ../qemu-9.0.0/hw/virtio/virtio-pci.c:893 +2 0x00005983657045e2 in memory_region_write_accessor () at ../qemu-9.0.0/system/memory.c:497 +3 0x0000598365704ba6 in access_with_adjusted_size () at ../qemu-9.0.0/system/memory.c:573 +4 0x0000598365705059 in memory_region_dispatch_write () at ../qemu-9.0.0/system/memory.c:1528 +5 0x00005983659b8e1f in flatview_write_continue_step.isra.0 () at ../qemu-9.0.0/system/physmem.c:2713 +6 0x000059836570ba7d in flatview_write_continue () at ../qemu-9.0.0/system/physmem.c:2743 +7 flatview_write () at ../qemu-9.0.0/system/physmem.c:2774 +8 0x000059836570bb76 in address_space_write () at ../qemu-9.0.0/system/physmem.c:2894 +9 0x0000598365763afe in address_space_rw () at ../qemu-9.0.0/system/physmem.c:2904 +10 kvm_cpu_exec () at ../qemu-9.0.0/accel/kvm/kvm-all.c:2917 +11 0x000059836576656e in kvm_vcpu_thread_fn () at ../qemu-9.0.0/accel/kvm/kvm-accel-ops.c:50 +12 0x0000598365926ca8 in qemu_thread_start () at ../qemu-9.0.0/util/qemu-thread-posix.c:541 +13 0x00007c8185bcd1cf in ??? () at /usr/lib/libc.so.6 +14 0x00007c8185c4e504 in clone () at /usr/lib/libc.so.6 + +Fixes: 2ce6cff94d ("virtio-pci: fix use of a released vector") +Cc: qemu-stable@nongnu.org +Signed-off-by: Cindy Lu +Message-Id: <20240806093715.65105-1-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a8e63ff289d137197ad7a701a587cc432872d798) +Signed-off-by: zhujun2 +--- + hw/virtio/virtio-pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 3ad7487411..06b125ec62 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -860,6 +860,9 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtQueue *vq; + ++ if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) ++ return -1; ++ + if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { + *n = virtio_config_get_guest_notifier(vdev); + *vector = vdev->config_vector; +-- +2.41.0.windows.1 + diff --git a/virtio-pci-fix-queue_enable-write.patch b/virtio-pci-fix-queue_enable-write.patch deleted file mode 100644 index 481b41bbf11f4ebb94ae8fd746b13ad4ac41555d..0000000000000000000000000000000000000000 --- a/virtio-pci-fix-queue_enable-write.patch +++ /dev/null @@ -1,58 +0,0 @@ -From aebd6a1512e03ba51f6824fcdbaa09f67e9ff5e2 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Wed, 10 Jun 2020 13:43:51 +0800 -Subject: [PATCH 11/11] virtio-pci: fix queue_enable write - -Spec said: The driver uses this to selectively prevent the device from -executing requests from this virtqueue. 1 - enabled; 0 - disabled. - -Though write 0 to queue_enable is forbidden by the spec, we should not -assume that the value is 1. - -Fix this by ignore the write value other than 1. - -Signed-off-by: Jason Wang -Message-Id: <20200610054351.15811-1-jasowang@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Stefano Garzarella -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: BiaoXiang Ye ---- - hw/virtio/virtio-pci.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index b4b0ed26..4b8845a6 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -1259,16 +1259,20 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, - virtio_queue_set_vector(vdev, vdev->queue_sel, val); - break; - case VIRTIO_PCI_COMMON_Q_ENABLE: -- virtio_queue_set_num(vdev, vdev->queue_sel, -- proxy->vqs[vdev->queue_sel].num); -- virtio_queue_set_rings(vdev, vdev->queue_sel, -+ if (val == 1) { -+ virtio_queue_set_num(vdev, vdev->queue_sel, -+ proxy->vqs[vdev->queue_sel].num); -+ virtio_queue_set_rings(vdev, vdev->queue_sel, - ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | - proxy->vqs[vdev->queue_sel].desc[0], - ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | - proxy->vqs[vdev->queue_sel].avail[0], - ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | - proxy->vqs[vdev->queue_sel].used[0]); -- proxy->vqs[vdev->queue_sel].enabled = 1; -+ proxy->vqs[vdev->queue_sel].enabled = 1; -+ } else { -+ virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val); -+ } - break; - case VIRTIO_PCI_COMMON_Q_DESCLO: - proxy->vqs[vdev->queue_sel].desc[0] = val; --- -2.27.0.dirty - diff --git a/virtio-pci-fix-use-of-a-released-vector.patch b/virtio-pci-fix-use-of-a-released-vector.patch new file mode 100644 index 0000000000000000000000000000000000000000..00af28832a00fa29f1d3e14ac7a4a5d0af7ba1b1 --- /dev/null +++ b/virtio-pci-fix-use-of-a-released-vector.patch @@ -0,0 +1,156 @@ +From 11e71bc99d8811644ddf1a854e556170bb8f5db3 Mon Sep 17 00:00:00 2001 +From: Gao Jiazhen +Date: Thu, 12 Sep 2024 16:01:04 +0800 +Subject: [PATCH] virtio-pci: fix use of a released vector +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry picked from commit 2ce6cff94df2650c460f809e5ad263f1d22507c0 + +During the booting process of the non-standard image, the behavior of the +called function in qemu is as follows: + +1. vhost_net_stop() was triggered by guest image. This will call the function +virtio_pci_set_guest_notifiers() with assgin= false, +virtio_pci_set_guest_notifiers() will release the irqfd for vector 0 + +2. virtio_reset() was triggered, this will set configure vector to VIRTIO_NO_VECTOR + +3.vhost_net_start() was called (at this time, the configure vector is +still VIRTIO_NO_VECTOR) and then call virtio_pci_set_guest_notifiers() with +assgin=true, so the irqfd for vector 0 is still not "init" during this process + +4. The system continues to boot and sets the vector back to 0. After that +msix_fire_vector_notifier() was triggered to unmask the vector 0 and meet the crash + +To fix the issue, we need to support changing the vector after VIRTIO_CONFIG_S_DRIVER_OK is set. + +(gdb) bt +0 __pthread_kill_implementation (threadid=, signo=signo@entry=6, no_tid=no_tid@entry=0) + at pthread_kill.c:44 +1 0x00007fc87148ec53 in __pthread_kill_internal (signo=6, threadid=) at pthread_kill.c:78 +2 0x00007fc87143e956 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 +3 0x00007fc8714287f4 in __GI_abort () at abort.c:79 +4 0x00007fc87142871b in __assert_fail_base + (fmt=0x7fc8715bbde0 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=) at assert.c:92 +5 0x00007fc871437536 in __GI___assert_fail + (assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=0x5606413f06f0 <__PRETTY_FUNCTION__.19> "kvm_irqchip_commit_routes") at assert.c:101 +6 0x0000560640f884b5 in kvm_irqchip_commit_routes (s=0x560642cae1f0) at ../accel/kvm/kvm-all.c:1837 +7 0x0000560640c98f8e in virtio_pci_one_vector_unmask + (proxy=0x560643c65f00, queue_no=4294967295, vector=0, msg=..., n=0x560643c6e4c8) + at ../hw/virtio/virtio-pci.c:1005 +8 0x0000560640c99201 in virtio_pci_vector_unmask (dev=0x560643c65f00, vector=0, msg=...) + at ../hw/virtio/virtio-pci.c:1070 +9 0x0000560640bc402e in msix_fire_vector_notifier (dev=0x560643c65f00, vector=0, is_masked=false) + at ../hw/pci/msix.c:120 +10 0x0000560640bc40f1 in msix_handle_mask_update (dev=0x560643c65f00, vector=0, was_masked=true) + at ../hw/pci/msix.c:140 +11 0x0000560640bc4503 in msix_table_mmio_write (opaque=0x560643c65f00, addr=12, val=0, size=4) + at ../hw/pci/msix.c:231 +12 0x0000560640f26d83 in memory_region_write_accessor + (mr=0x560643c66540, addr=12, value=0x7fc86b7bc628, size=4, shift=0, mask=4294967295, attrs=...) + at ../system/memory.c:497 +13 0x0000560640f270a6 in access_with_adjusted_size + + (addr=12, value=0x7fc86b7bc628, size=4, access_size_min=1, access_size_max=4, access_fn=0x560640f26c8d , mr=0x560643c66540, attrs=...) at ../system/memory.c:573 +14 0x0000560640f2a2b5 in memory_region_dispatch_write (mr=0x560643c66540, addr=12, data=0, op=MO_32, attrs=...) + at ../system/memory.c:1521 +15 0x0000560640f37bac in flatview_write_continue + (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., ptr=0x7fc871e9c028, len=4, addr1=12, l=4, mr=0x560643c66540) + at ../system/physmem.c:2714 +16 0x0000560640f37d0f in flatview_write + (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) at ../system/physmem.c:2756 +17 0x0000560640f380bf in address_space_write + (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) + at ../system/physmem.c:2863 +18 0x0000560640f3812c in address_space_rw + (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4, is_write=true) at ../system/physmem.c:2873 +--Type for more, q to quit, c to continue without paging-- +19 0x0000560640f8aa55 in kvm_cpu_exec (cpu=0x560642f205e0) at ../accel/kvm/kvm-all.c:2915 +20 0x0000560640f8d731 in kvm_vcpu_thread_fn (arg=0x560642f205e0) at ../accel/kvm/kvm-accel-ops.c:51 +21 0x00005606411949f4 in qemu_thread_start (args=0x560642f292b0) at ../util/qemu-thread-posix.c:541 +22 0x00007fc87148cdcd in start_thread (arg=) at pthread_create.c:442 +23 0x00007fc871512630 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 +(gdb) + +MST: coding style and typo fixups + +Fixes: f9a09ca ("vhost: add support for configure interrupt") +Cc: qemu-stable@nongnu.org +Signed-off-by: Cindy Lu +Message-ID: <2321ade5f601367efe7380c04e3f61379c59b48f.1713173550.git.mst@redhat.com> +Cc: Lei Yang +Cc: Jason Wang +Signed-off-by: Michael S. Tsirkin +Tested-by: Cindy Lu +Signed-off-by: Gao Jiazhen +--- + hw/virtio/virtio-pci.c | 37 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 35 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index f8adb0520a..3ad7487411 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1456,6 +1456,38 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, + return offset; + } + ++static void virtio_pci_set_vector(VirtIODevice *vdev, ++ VirtIOPCIProxy *proxy, ++ int queue_no, uint16_t old_vector, ++ uint16_t new_vector) ++{ ++ bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && ++ msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); ++ ++ if (new_vector == old_vector) { ++ return; ++ } ++ ++ /* ++ * If the device uses irqfd and the vector changes after DRIVER_OK is ++ * set, we need to release the old vector and set up the new one. ++ * Otherwise just need to set the new vector on the device. ++ */ ++ if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) { ++ kvm_virtio_pci_vector_release_one(proxy, queue_no); ++ } ++ /* Set the new vector on the device. */ ++ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { ++ vdev->config_vector = new_vector; ++ } else { ++ virtio_queue_set_vector(vdev, queue_no, new_vector); ++ } ++ /* If the new vector changed need to set it up. */ ++ if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) { ++ kvm_virtio_pci_vector_use_one(proxy, queue_no); ++ } ++} ++ + int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, + uint8_t bar, uint64_t offset, uint64_t length, + uint8_t id) +@@ -1602,7 +1634,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + } else { + val = VIRTIO_NO_VECTOR; + } +- vdev->config_vector = val; ++ virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX, ++ vdev->config_vector, val); + break; + case VIRTIO_PCI_COMMON_STATUS: + if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { +@@ -1642,7 +1675,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + } else { + val = VIRTIO_NO_VECTOR; + } +- virtio_queue_set_vector(vdev, vdev->queue_sel, val); ++ virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val); + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: + if (val == 1) { +-- +2.41.0.windows.1 + diff --git a/virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch b/virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch deleted file mode 100644 index d8ed58faa8f5c6517d131ced73209bc41122158e..0000000000000000000000000000000000000000 --- a/virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 637606d18c7208e21d8ab4f318cccde64ae58c76 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Tue, 25 Feb 2020 15:55:53 +0800 -Subject: [PATCH 2/9] virtio-pmem: do delete rq_vq in virtio_pmem_unrealize -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Similar to other virtio-devices, rq_vq forgot to delete in -virtio_pmem_unrealize, this patch fix it. This device has already -maintained a vq pointer, thus we use the new virtio_delete_queue -function directly to do the cleanup. - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Message-Id: <20200225075554.10835-4-pannengyuan@huawei.com> -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: AlexChen ---- - hw/virtio/virtio-pmem.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c -index 17c196d..c680b0a 100644 ---- a/hw/virtio/virtio-pmem.c -+++ b/hw/virtio/virtio-pmem.c -@@ -127,6 +127,7 @@ static void virtio_pmem_unrealize(DeviceState *dev, Error **errp) - VirtIOPMEM *pmem = VIRTIO_PMEM(dev); - - host_memory_backend_set_mapped(pmem->memdev, false); -+ virtio_delete_queue(pmem->rq_vq); - virtio_cleanup(vdev); - } - --- -1.8.3.1 - diff --git a/virtio-print-the-guest-virtio_net-features-that-host.patch b/virtio-print-the-guest-virtio_net-features-that-host.patch new file mode 100644 index 0000000000000000000000000000000000000000..15157bb695f275763d5a9cd3eeb8538d86c4a109 --- /dev/null +++ b/virtio-print-the-guest-virtio_net-features-that-host.patch @@ -0,0 +1,112 @@ +From b24730e9abe34898483fa62b24c26abb9d98570c Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 14:16:17 +0800 +Subject: [PATCH] virtio: print the guest virtio_net features that host does + not support + +print the guest virtio_net features that host does not support + +For example: +Please check host config, because host does not support required feature bits 0x1983 +virtio_net_feature: csum, guest_csum, guest_tso4, guest_tso6, host_tso4, host_tso6 +Features 0xef99a3 unsupported. Allowed features: 0x40ff8024 + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 41 ++++++++++++++++++++++++++++++++++++++ + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 1 + + 3 files changed, 49 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 80c56f0cfc..7f69a4b842 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3952,6 +3952,46 @@ static Property virtio_net_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++static void virtio_net_print_features(uint64_t features) ++{ ++ Property *props = virtio_net_properties; ++ int feature_cnt = 0; ++ ++ if (!features) { ++ return; ++ } ++ printf("virtio_net_feature: "); ++ ++ for (; features && props->name; props++) { ++ /* The bitnr of property may be default(0) besides 'csum' property. */ ++ if (props->bitnr == 0 && strcmp(props->name, "csum")) { ++ continue; ++ } ++ ++ /* Features only support 64bit. */ ++ if (props->bitnr > 63) { ++ continue; ++ } ++ ++ if (virtio_has_feature(features, props->bitnr)) { ++ virtio_clear_feature(&features, props->bitnr); ++ if (feature_cnt != 0) { ++ printf(", "); ++ } ++ printf("%s", props->name); ++ feature_cnt++; ++ } ++ } ++ ++ if (features) { ++ if (feature_cnt != 0) { ++ printf(", "); ++ } ++ printf("unkown bits 0x%." PRIx64, features); ++ } ++ printf("\n"); ++} ++ + static void virtio_net_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3966,6 +4006,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) + vdc->set_config = virtio_net_set_config; + vdc->get_features = virtio_net_get_features; + vdc->set_features = virtio_net_set_features; ++ vdc->print_features = virtio_net_print_features; + vdc->bad_features = virtio_net_bad_features; + vdc->reset = virtio_net_reset; + vdc->queue_reset = virtio_net_queue_reset; +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index ec09d515c2..1f78b74c00 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2905,6 +2905,13 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) + { + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + bool bad = (val & ~(vdev->host_features)) != 0; ++ uint64_t feat = val & ~(vdev->host_features); ++ ++ if (bad && k->print_features) { ++ qemu_log("error: Please check host config, "\ ++ "because host does not support required feature bits 0x%" PRIx64 "\n", feat); ++ k->print_features(feat); ++ } + + val &= vdev->host_features; + if (k->set_features) { +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c8f72850bc..7c35bb841b 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -182,6 +182,7 @@ struct VirtioDeviceClass { + int (*validate_features)(VirtIODevice *vdev); + void (*get_config)(VirtIODevice *vdev, uint8_t *config); + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); ++ void (*print_features)(uint64_t features); + void (*reset)(VirtIODevice *vdev); + void (*set_status)(VirtIODevice *vdev, uint8_t val); + /* Device must validate queue_index. */ +-- +2.27.0 + diff --git a/virtio-processes-indirect-descriptors-even-if-the-re.patch b/virtio-processes-indirect-descriptors-even-if-the-re.patch new file mode 100644 index 0000000000000000000000000000000000000000..f0023b9dca59ad8641595f8903e23e4efabae88b --- /dev/null +++ b/virtio-processes-indirect-descriptors-even-if-the-re.patch @@ -0,0 +1,60 @@ +From 62cedbd18455e0b800c9ab0a47eef599c5309eaa Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Sat, 14 Jun 2025 16:40:39 +0800 +Subject: [PATCH] virtio processes indirect descriptors even if the respected + feature VIRTIO_RING_F_INDIRECT_DESC was not negotiated. If qemu is used with + reduced set of features to emulate the hardware device that does not support + indirect descriptors, the will probably trigger problematic flows on the + hardware setup but do not reveal the mistake on qemu. Add LOG_GUEST_ERROR + for such case. This will issue logs with '-d guest_errors' in the command + line + +Signed-off-by: Yuri Benditovich +Message-Id: <20250515063237.808293-1-yuri.benditovich@daynix.com> +Signed-off-by: Yuri Benditovich +Signed-off-by: dinglimin +--- + hw/virtio/virtio.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 4f5b241fd3..f57b6c955e 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -204,6 +204,15 @@ static const char *virtio_id_to_name(uint16_t device_id) + return name; + } + ++static void virtio_check_indirect_feature(VirtIODevice *vdev) ++{ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Device %s: indirect_desc was not negotiated!\n", ++ vdev->name); ++ } ++} ++ + /* Called within call_rcu(). */ + static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) + { +@@ -1614,6 +1623,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) + goto done; + } + ++ virtio_check_indirect_feature(vdev); ++ + /* loop over the indirect descriptor table */ + len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, + desc.addr, desc.len, false); +@@ -1744,6 +1755,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) + goto done; + } + ++ virtio_check_indirect_feature(vdev); ++ + /* loop over the indirect descriptor table */ + len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, + desc.addr, desc.len, false); +-- +2.33.0 + diff --git a/virtio-remove-virtio_tswap16s-call-in-vring_packed_e.patch b/virtio-remove-virtio_tswap16s-call-in-vring_packed_e.patch new file mode 100644 index 0000000000000000000000000000000000000000..c3613175eb1be9ef2f6d118f9c1aa99c8cc711e5 --- /dev/null +++ b/virtio-remove-virtio_tswap16s-call-in-vring_packed_e.patch @@ -0,0 +1,52 @@ +From 400e9dbe4dae8efc110a2363590ce35fd11d7d29 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Mon, 1 Jul 2024 09:52:08 +0200 +Subject: [PATCH] virtio: remove virtio_tswap16s() call in + vring_packed_event_read() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Commit d152cdd6f6 ("virtio: use virtio accessor to access packed event") +switched using of address_space_read_cached() to virito_lduw_phys_cached() +to access packed descriptor event. + +When we used address_space_read_cached(), we needed to call +virtio_tswap16s() to handle the endianess of the field, but +virito_lduw_phys_cached() already handles it internally, so we no longer +need to call virtio_tswap16s() (as the commit had done for `off_wrap`, +but forgot for `flags`). + +Fixes: d152cdd6f6 ("virtio: use virtio accessor to access packed event") +Cc: jasowang@redhat.com +Cc: qemu-stable@nongnu.org +Reported-by: Xoykie +Link: https://lore.kernel.org/qemu-devel/CAFU8RB_pjr77zMLsM0Unf9xPNxfr_--Tjr49F_eX32ZBc5o2zQ@mail.gmail.com +Signed-off-by: Stefano Garzarella +Message-Id: <20240701075208.19634-1-sgarzare@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Peter Maydell +Reviewed-by: Eugenio Pérez +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7aa6492401e95fb296dec7cda81e67d91f6037d7) +Signed-off-by: zhujun2 +--- + hw/virtio/virtio.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 202aae868e..8c3b6b87aa 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -322,7 +322,6 @@ static void vring_packed_event_read(VirtIODevice *vdev, + /* Make sure flags is seen before off_wrap */ + smp_rmb(); + e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); +- virtio_tswap16s(vdev, &e->flags); + } + + static void vring_packed_off_wrap_write(VirtIODevice *vdev, +-- +2.41.0.windows.1 + diff --git a/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch b/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..f78f3d7eac329e114005a901c5a1ce37ceb77dba --- /dev/null +++ b/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch @@ -0,0 +1,37 @@ +From 4e5de00fb124d82f9c4ce2ac433ed3d691783c01 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Wed, 9 Feb 2022 19:58:21 +0800 +Subject: [PATCH] virtio-scsi: bugfix: fix qemu crash for hotplug scsi disk + with dataplane + +The vm will trigger a disk sweep operation after plugging +a controller who's io type is iothread. If attach a scsi +disk immediately, the sg_inqury request in vm will trigger +the assert in virtio_scsi_ctx_check(), which is called by +virtio_scsi_handle_cmd_req_prepare(). + +Add judgment in virtio_scsi_handle_cmd_req_prepare() and +return IO Error directly if the device has not been +initialized. + +Signed-off-by: Jinhua Cao +--- + hw/scsi/virtio-scsi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 9c751bf296..bc7feb404a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -781,7 +781,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) + req->req.cmd.tag, req->req.cmd.cdb[0]); + + d = virtio_scsi_device_get(s, req->req.cmd.lun); +- if (!d) { ++ if (!d || !d->qdev.realized) { + req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + virtio_scsi_complete_cmd_req(req); + return -ENOENT; +-- +2.27.0 + diff --git a/virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch b/virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch deleted file mode 100644 index 02069901b096cd09b0f30dbef9d55e3fe6dc920d..0000000000000000000000000000000000000000 --- a/virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 0d93f5455489274201b1054d987b12f8e8a6206e Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Mon, 9 Mar 2020 10:17:38 +0800 -Subject: [PATCH 11/14] virtio-serial-bus: Plug memory leak on realize() error - paths - -We neglect to free port->bh on the error paths. Fix that. -Reproducer: - {'execute': 'device_add', 'arguments': {'id': 'virtio_serial_pci0', 'driver': 'virtio-serial-pci', 'bus': 'pci.0', 'addr': '0x5'}, 'id': 'yVkZcGgV'} - {'execute': 'device_add', 'arguments': {'id': 'port1', 'driver': 'virtserialport', 'name': 'port1', 'chardev': 'channel1', 'bus': 'virtio_serial_pci0.0', 'nr': 1}, 'id': '3dXdUgJA'} - {'execute': 'device_add', 'arguments': {'id': 'port2', 'driver': 'virtserialport', 'name': 'port2', 'chardev': 'channel2', 'bus': 'virtio_serial_pci0.0', 'nr': 1}, 'id': 'qLzcCkob'} - {'execute': 'device_add', 'arguments': {'id': 'port2', 'driver': 'virtserialport', 'name': 'port2', 'chardev': 'channel2', 'bus': 'virtio_serial_pci0.0', 'nr': 2}, 'id': 'qLzcCkob'} - -The leak stack: -Direct leak of 40 byte(s) in 1 object(s) allocated from: - #0 0x7f04a8008ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8) - #1 0x7f04a73cf1d5 in g_malloc (/lib64/libglib-2.0.so.0+0x531d5) - #2 0x56273eaee484 in aio_bh_new /mnt/sdb/backup/qemu/util/async.c:125 - #3 0x56273eafe9a8 in qemu_bh_new /mnt/sdb/backup/qemu/util/main-loop.c:532 - #4 0x56273d52e62e in virtser_port_device_realize /mnt/sdb/backup/qemu/hw/char/virtio-serial-bus.c:946 - #5 0x56273dcc5040 in device_set_realized /mnt/sdb/backup/qemu/hw/core/qdev.c:891 - #6 0x56273e5ebbce in property_set_bool /mnt/sdb/backup/qemu/qom/object.c:2238 - #7 0x56273e5e5a9c in object_property_set /mnt/sdb/backup/qemu/qom/object.c:1324 - #8 0x56273e5ef5f8 in object_property_set_qobject /mnt/sdb/backup/qemu/qom/qom-qobject.c:26 - #9 0x56273e5e5e6a in object_property_set_bool /mnt/sdb/backup/qemu/qom/object.c:1390 - #10 0x56273daa40de in qdev_device_add /mnt/sdb/backup/qemu/qdev-monitor.c:680 - #11 0x56273daa53e9 in qmp_device_add /mnt/sdb/backup/qemu/qdev-monitor.c:805 - -Fixes: 199646d81522509ac2dba6d28c31e8c7d807bc93 -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Reviewed-by: Markus Armbruster -Reviewed-by: Amit Shah -Message-Id: <20200309021738.30072-1-pannengyuan@huawei.com> -Reviewed-by: Laurent Vivier -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Peng Liang ---- - hw/char/virtio-serial-bus.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c -index f7a54f261b21..2d23dae6d2b7 100644 ---- a/hw/char/virtio-serial-bus.c -+++ b/hw/char/virtio-serial-bus.c -@@ -940,7 +940,6 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) - Error *err = NULL; - - port->vser = bus->vser; -- port->bh = qemu_bh_new(flush_queued_data_bh, port); - - assert(vsc->have_data); - -@@ -989,6 +988,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) - return; - } - -+ port->bh = qemu_bh_new(flush_queued_data_bh, port); - port->elem = NULL; - } - --- -2.26.2 - diff --git a/virtio-snd-add-max-size-bounds-check-in-input-cb-CVE.patch b/virtio-snd-add-max-size-bounds-check-in-input-cb-CVE.patch new file mode 100644 index 0000000000000000000000000000000000000000..f68bc0c0b232cb1c30b9cd67d32316a1b998de86 --- /dev/null +++ b/virtio-snd-add-max-size-bounds-check-in-input-cb-CVE.patch @@ -0,0 +1,61 @@ +From 56a588dad8d085a89b24fe2103bd623d4260e02d Mon Sep 17 00:00:00 2001 +From: Manos Pitsidianakis +Date: Mon, 8 Jul 2024 10:09:49 +0300 +Subject: [PATCH] virtio-snd: add max size bounds check in input + cb(CVE-2024-7730) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cherry-pick from 98e77e3dd8dd6e7aa9a7dffa60f49c8c8a49d4e3 + +When reading input audio in the virtio-snd input callback, +virtio_snd_pcm_in_cb(), we do not check whether the iov can actually fit +the data buffer. This is because we use the buffer->size field as a +total-so-far accumulator instead of byte-size-left like in TX buffers. + +This triggers an out of bounds write if the size of the virtio queue +element is equal to virtio_snd_pcm_status, which makes the available +space for audio data zero. This commit adds a check for reaching the +maximum buffer size before attempting any writes. + +Reported-by: Zheyu Ma +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2427 +Signed-off-by: Manos Pitsidianakis +Message-Id: +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/audio/virtio-snd.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c +index 137fa77a01..15986af41e 100644 +--- a/hw/audio/virtio-snd.c ++++ b/hw/audio/virtio-snd.c +@@ -1274,7 +1274,7 @@ static void virtio_snd_pcm_in_cb(void *data, int available) + { + VirtIOSoundPCMStream *stream = data; + VirtIOSoundPCMBuffer *buffer; +- size_t size; ++ size_t size, max_size; + + WITH_QEMU_LOCK_GUARD(&stream->queue_mutex) { + while (!QSIMPLEQ_EMPTY(&stream->queue)) { +@@ -1288,7 +1288,12 @@ static void virtio_snd_pcm_in_cb(void *data, int available) + continue; + } + ++ max_size = iov_size(buffer->elem->in_sg, buffer->elem->in_num); + for (;;) { ++ if (buffer->size >= max_size) { ++ return_rx_buffer(stream, buffer); ++ break; ++ } + size = AUD_read(stream->voice.in, + buffer->data + buffer->size, + MIN(available, (stream->params.period_bytes - +-- +2.41.0.windows.1 + diff --git a/virtio_blk-Add-support-for-retry-on-errors.patch b/virtio_blk-Add-support-for-retry-on-errors.patch index e7d8efd7ed99ae35f5309bee2e90bedfcc1f5e86..696bde5b607d4ca3ffe912930afcf5e854c0491f 100644 --- a/virtio_blk-Add-support-for-retry-on-errors.patch +++ b/virtio_blk-Add-support-for-retry-on-errors.patch @@ -1,5 +1,5 @@ -From f3158cc327d435939d87ecee23485d082ebf3ba2 Mon Sep 17 00:00:00 2001 -From: Jiahui Cen +From 0da112402efe63e09fdd6ed43aa026d5b625988f Mon Sep 17 00:00:00 2001 +From: yexiao Date: Thu, 21 Jan 2021 15:46:53 +0800 Subject: [PATCH] virtio_blk: Add support for retry on errors @@ -8,15 +8,16 @@ queued requests to implement retry_request_cb. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang +Signed-off-by: Alex Chen --- - hw/block/virtio-blk.c | 21 ++++++++++++++++++--- - 1 file changed, 18 insertions(+), 3 deletions(-) + hw/block/virtio-blk.c | 47 ++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index ddf525b9d7..2db9804cfe 100644 +index a1f8e15522..1ebc9188c0 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c -@@ -101,6 +101,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, +@@ -90,6 +90,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, block_acct_failed(blk_get_stats(s->blk), &req->acct); } virtio_blk_free_request(req); @@ -27,7 +28,7 @@ index ddf525b9d7..2db9804cfe 100644 } blk_error_action(s->blk, action, is_read, error); -@@ -142,6 +146,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) +@@ -131,6 +135,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) } } @@ -35,7 +36,7 @@ index ddf525b9d7..2db9804cfe 100644 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); -@@ -161,6 +166,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) +@@ -150,6 +155,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) } } @@ -43,7 +44,7 @@ index ddf525b9d7..2db9804cfe 100644 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); -@@ -183,6 +189,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) +@@ -172,6 +178,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) } } @@ -51,10 +52,10 @@ index ddf525b9d7..2db9804cfe 100644 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); if (is_write_zeroes) { block_acct_done(blk_get_stats(s->blk), &req->acct); -@@ -811,12 +818,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - - void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) +@@ -1183,12 +1190,12 @@ static void virtio_blk_dma_restart_bh(void *opaque) { + VirtIOBlock *s = opaque; + - VirtIOBlockReq *req = s->rq; + VirtIOBlockReq *req; MultiReqBuffer mrb = {}; @@ -67,19 +68,47 @@ index ddf525b9d7..2db9804cfe 100644 while (req) { VirtIOBlockReq *next = req->next; if (virtio_blk_handle_request(req, &mrb)) { -@@ -1101,8 +1108,16 @@ static void virtio_blk_resize(void *opaque) - virtio_notify_config(vdev); +@@ -1541,10 +1548,44 @@ static void virtio_blk_drained_end(void *opaque) + } } +static void virtio_blk_retry_request(void *opaque) +{ + VirtIOBlock *s = VIRTIO_BLK(opaque); + -+ virtio_blk_process_queued_requests(s, false); ++ VirtIOBlockReq *req; ++ MultiReqBuffer mrb = {}; ++ ++ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); ++ req = s->rq; ++ s->rq = NULL; ++ while (req) { ++ VirtIOBlockReq *next = req->next; ++ if (virtio_blk_handle_request(req, &mrb)) { ++ /* Device is now broken and won't do any processing until it gets ++ * reset. Already queued requests will be lost: let's purge them. ++ */ ++ while (req) { ++ next = req->next; ++ virtqueue_detach_element(req->vq, &req->elem, 0); ++ virtio_blk_free_request(req); ++ req = next; ++ } ++ break; ++ } ++ req = next; ++ } ++ ++ if (mrb.num_reqs) { ++ virtio_blk_submit_multireq(s, &mrb); ++ } ++ aio_context_release(blk_get_aio_context(s->conf.conf.blk)); +} + static const BlockDevOps virtio_block_ops = { - .resize_cb = virtio_blk_resize, + .resize_cb = virtio_blk_resize, + .drained_begin = virtio_blk_drained_begin, + .drained_end = virtio_blk_drained_end, + .retry_request_cb = virtio_blk_retry_request, }; diff --git a/vl-Don-t-mismatch-g_strsplit-g_free.patch b/vl-Don-t-mismatch-g_strsplit-g_free.patch deleted file mode 100644 index dc1f4cc484e8b27af68fa7c533ce338fdfbcf7ad..0000000000000000000000000000000000000000 --- a/vl-Don-t-mismatch-g_strsplit-g_free.patch +++ /dev/null @@ -1,56 +0,0 @@ -From cad4a99e8cab2fe581fb2c6c1421f5547b451e96 Mon Sep 17 00:00:00 2001 -From: Pan Nengyuan -Date: Fri, 10 Jan 2020 17:17:09 +0800 -Subject: [PATCH] vl: Don't mismatch g_strsplit()/g_free() - -It's a mismatch between g_strsplit and g_free, it will cause a memory leak as follow: - -[root@localhost]# ./aarch64-softmmu/qemu-system-aarch64 -accel help -Accelerators supported in QEMU binary: -tcg -kvm -================================================================= -==1207900==ERROR: LeakSanitizer: detected memory leaks - -Direct leak of 8 byte(s) in 2 object(s) allocated from: - #0 0xfffd700231cb in __interceptor_malloc (/lib64/libasan.so.4+0xd31cb) - #1 0xfffd6ec57163 in g_malloc (/lib64/libglib-2.0.so.0+0x57163) - #2 0xfffd6ec724d7 in g_strndup (/lib64/libglib-2.0.so.0+0x724d7) - #3 0xfffd6ec73d3f in g_strsplit (/lib64/libglib-2.0.so.0+0x73d3f) - #4 0xaaab66be5077 in main /mnt/sdc/qemu-master/qemu-4.2.0-rc0/vl.c:3517 - #5 0xfffd6e140b9f in __libc_start_main (/lib64/libc.so.6+0x20b9f) - #6 0xaaab66bf0f53 (./build/aarch64-softmmu/qemu-system-aarch64+0x8a0f53) - -Direct leak of 2 byte(s) in 2 object(s) allocated from: - #0 0xfffd700231cb in __interceptor_malloc (/lib64/libasan.so.4+0xd31cb) - #1 0xfffd6ec57163 in g_malloc (/lib64/libglib-2.0.so.0+0x57163) - #2 0xfffd6ec7243b in g_strdup (/lib64/libglib-2.0.so.0+0x7243b) - #3 0xfffd6ec73e6f in g_strsplit (/lib64/libglib-2.0.so.0+0x73e6f) - #4 0xaaab66be5077 in main /mnt/sdc/qemu-master/qemu-4.2.0-rc0/vl.c:3517 - #5 0xfffd6e140b9f in __libc_start_main (/lib64/libc.so.6+0x20b9f) - #6 0xaaab66bf0f53 (./build/aarch64-softmmu/qemu-system-aarch64+0x8a0f53) - -Reported-by: Euler Robot -Signed-off-by: Pan Nengyuan -Message-Id: <20200110091710.53424-2-pannengyuan@huawei.com> -Signed-off-by: Paolo Bonzini ---- - vl.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/vl.c b/vl.c -index b426b32134..cec0bfdb44 100644 ---- a/vl.c -+++ b/vl.c -@@ -3532,7 +3532,7 @@ int main(int argc, char **argv, char **envp) - gchar **optname = g_strsplit(typename, - ACCEL_CLASS_SUFFIX, 0); - printf("%s\n", optname[0]); -- g_free(optname); -+ g_strfreev(optname); - } - g_free(typename); - } --- -2.27.0 - diff --git a/vl-fix-type-is-NULL-in-vga-help.patch b/vl-fix-type-is-NULL-in-vga-help.patch new file mode 100644 index 0000000000000000000000000000000000000000..9215ef8b9eafc8b61703f40176245c871c902441 --- /dev/null +++ b/vl-fix-type-is-NULL-in-vga-help.patch @@ -0,0 +1,49 @@ +From ef42d79d805e430e24df57d46c156f9a7e3e1bed Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 25 Jul 2024 14:11:12 +0800 +Subject: [PATCH] vl: fix "type is NULL" in -vga help +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from a99dc9cd611cbaf10edee6260272e299626d0871 + +Don't pass NULL to module_object_class_by_name(), when the interface is +unavailable. + +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240715114420.2062870-1-marcandre.lureau@redhat.com> +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: qihao_yewu +--- + system/vl.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/system/vl.c b/system/vl.c +index 165c3cae8a..8e3357c578 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -994,9 +994,16 @@ static bool vga_interface_available(VGAInterfaceType t) + const VGAInterfaceInfo *ti = &vga_interfaces[t]; + + assert(t < VGA_TYPE_MAX); +- return !ti->class_names[0] || +- module_object_class_by_name(ti->class_names[0]) || +- module_object_class_by_name(ti->class_names[1]); ++ ++ if (!ti->class_names[0] || module_object_class_by_name(ti->class_names[0])) { ++ return true; ++ } ++ ++ if (ti->class_names[1] && module_object_class_by_name(ti->class_names[1])) { ++ return true; ++ } ++ ++ return false; + } + + static const char * +-- +2.41.0.windows.1 + diff --git a/vmstate-add-qom-interface-to-get-id.patch b/vmstate-add-qom-interface-to-get-id.patch deleted file mode 100644 index 53a004405a907109dfb0bbc9354a3b0ef979846f..0000000000000000000000000000000000000000 --- a/vmstate-add-qom-interface-to-get-id.patch +++ /dev/null @@ -1,210 +0,0 @@ -From d771fca664e40c7d7ec5dfa2c656a282bff705b7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 28 Aug 2019 16:00:19 +0400 -Subject: [PATCH] vmstate: add qom interface to get id -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add an interface to get the instance id, instead of depending on -Device and qdev_get_dev_path(). - -Signed-off-by: Marc-André Lureau -Reviewed-by: Daniel P. Berrangé -Acked-by: Dr. David Alan Gilbert ---- - MAINTAINERS | 2 ++ - hw/core/Makefile.objs | 1 + - hw/core/qdev.c | 14 +++++++++++++ - hw/core/vmstate-if.c | 23 +++++++++++++++++++++ - include/hw/vmstate-if.h | 40 ++++++++++++++++++++++++++++++++++++ - include/migration/register.h | 2 ++ - include/migration/vmstate.h | 2 ++ - tests/Makefile.include | 1 + - 8 files changed, 85 insertions(+) - create mode 100644 hw/core/vmstate-if.c - create mode 100644 include/hw/vmstate-if.h - -diff --git a/MAINTAINERS b/MAINTAINERS -index d6de200453..e2d74d7ec3 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -2135,6 +2135,8 @@ Migration - M: Juan Quintela - M: Dr. David Alan Gilbert - S: Maintained -+F: hw/core/vmstate-if.c -+F: include/hw/vmstate-if.h - F: include/migration/ - F: migration/ - F: scripts/vmstate-static-checker.py -diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs -index f8481d959f..54c51583d8 100644 ---- a/hw/core/Makefile.objs -+++ b/hw/core/Makefile.objs -@@ -8,6 +8,7 @@ common-obj-y += irq.o - common-obj-y += hotplug.o - common-obj-$(CONFIG_SOFTMMU) += nmi.o - common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o -+common-obj-y += vmstate-if.o - - common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o - common-obj-$(CONFIG_XILINX_AXI) += stream.o -diff --git a/hw/core/qdev.c b/hw/core/qdev.c -index 4b32f2f46d..13931b1117 100644 ---- a/hw/core/qdev.c -+++ b/hw/core/qdev.c -@@ -1048,9 +1048,18 @@ static void device_unparent(Object *obj) - } - } - -+static char * -+device_vmstate_if_get_id(VMStateIf *obj) -+{ -+ DeviceState *dev = DEVICE(obj); -+ -+ return qdev_get_dev_path(dev); -+} -+ - static void device_class_init(ObjectClass *class, void *data) - { - DeviceClass *dc = DEVICE_CLASS(class); -+ VMStateIfClass *vc = VMSTATE_IF_CLASS(class); - - class->unparent = device_unparent; - -@@ -1062,6 +1071,7 @@ static void device_class_init(ObjectClass *class, void *data) - */ - dc->hotpluggable = true; - dc->user_creatable = true; -+ vc->get_id = device_vmstate_if_get_id; - } - - void device_class_set_parent_reset(DeviceClass *dc, -@@ -1119,6 +1129,10 @@ static const TypeInfo device_type_info = { - .class_init = device_class_init, - .abstract = true, - .class_size = sizeof(DeviceClass), -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_VMSTATE_IF }, -+ { } -+ } - }; - - static void qdev_register_types(void) -diff --git a/hw/core/vmstate-if.c b/hw/core/vmstate-if.c -new file mode 100644 -index 0000000000..bf453620fe ---- /dev/null -+++ b/hw/core/vmstate-if.c -@@ -0,0 +1,23 @@ -+/* -+ * VMState interface -+ * -+ * Copyright (c) 2009-2019 Red Hat Inc -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "hw/vmstate-if.h" -+ -+static const TypeInfo vmstate_if_info = { -+ .name = TYPE_VMSTATE_IF, -+ .parent = TYPE_INTERFACE, -+ .class_size = sizeof(VMStateIfClass), -+}; -+ -+static void vmstate_register_types(void) -+{ -+ type_register_static(&vmstate_if_info); -+} -+ -+type_init(vmstate_register_types); -diff --git a/include/hw/vmstate-if.h b/include/hw/vmstate-if.h -new file mode 100644 -index 0000000000..8ff7f0f292 ---- /dev/null -+++ b/include/hw/vmstate-if.h -@@ -0,0 +1,40 @@ -+/* -+ * VMState interface -+ * -+ * Copyright (c) 2009-2019 Red Hat Inc -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#ifndef VMSTATE_IF_H -+#define VMSTATE_IF_H -+ -+#include "qom/object.h" -+ -+#define TYPE_VMSTATE_IF "vmstate-if" -+ -+#define VMSTATE_IF_CLASS(klass) \ -+ OBJECT_CLASS_CHECK(VMStateIfClass, (klass), TYPE_VMSTATE_IF) -+#define VMSTATE_IF_GET_CLASS(obj) \ -+ OBJECT_GET_CLASS(VMStateIfClass, (obj), TYPE_VMSTATE_IF) -+#define VMSTATE_IF(obj) \ -+ INTERFACE_CHECK(VMStateIf, (obj), TYPE_VMSTATE_IF) -+ -+typedef struct VMStateIf VMStateIf; -+ -+typedef struct VMStateIfClass { -+ InterfaceClass parent_class; -+ -+ char * (*get_id)(VMStateIf *obj); -+} VMStateIfClass; -+ -+static inline char *vmstate_if_get_id(VMStateIf *vmif) -+{ -+ if (!vmif) { -+ return NULL; -+ } -+ -+ return VMSTATE_IF_GET_CLASS(vmif)->get_id(vmif); -+} -+ -+#endif /* VMSTATE_IF_H */ -diff --git a/include/migration/register.h b/include/migration/register.h -index f3ba10b6ef..158130c8c4 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -14,6 +14,8 @@ - #ifndef MIGRATION_REGISTER_H - #define MIGRATION_REGISTER_H - -+#include "hw/vmstate-if.h" -+ - typedef struct SaveVMHandlers { - /* This runs inside the iothread lock. */ - SaveStateHandler *save_state; -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index 8abd2e3b80..8cc1e19fd9 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -27,6 +27,8 @@ - #ifndef QEMU_VMSTATE_H - #define QEMU_VMSTATE_H - -+#include "hw/vmstate-if.h" -+ - typedef struct VMStateInfo VMStateInfo; - typedef struct VMStateDescription VMStateDescription; - typedef struct VMStateField VMStateField; -diff --git a/tests/Makefile.include b/tests/Makefile.include -index 3be60ab999..1c7772a230 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -566,6 +566,7 @@ tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ - hw/core/irq.o \ - hw/core/fw-path-provider.o \ - hw/core/reset.o \ -+ hw/core/vmstate-if.o \ - $(test-qapi-obj-y) - tests/test-vmstate$(EXESUF): tests/test-vmstate.o \ - migration/vmstate.o migration/vmstate-types.o migration/qemu-file.o \ --- -2.27.0 - diff --git a/vmxcap-add-support-for-VMX-FRED-controls.patch b/vmxcap-add-support-for-VMX-FRED-controls.patch new file mode 100644 index 0000000000000000000000000000000000000000..347356b3b5f3b9cfb86007044c5f17be1a98ca19 --- /dev/null +++ b/vmxcap-add-support-for-VMX-FRED-controls.patch @@ -0,0 +1,66 @@ +From 3aa85bc2d9265305dde99cde12d716ffa9bcef4b Mon Sep 17 00:00:00 2001 +From: Xin Li +Date: Wed, 8 Nov 2023 23:20:10 -0800 +Subject: [PATCH] vmxcap: add support for VMX FRED controls + +commit 2e641870170e28df28c5d9914e76ea7cab141516 upstream. + +Report secondary vm-exit controls and the VMX controls used to +save/load FRED MSRs. + +Intel-SIG: commit 2e641870170e vmxcap: add support for VMX FRED controls + +Tested-by: Shan Kang +Signed-off-by: Xin Li +Message-ID: <20231109072012.8078-5-xin3.li@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + scripts/kvm/vmxcap | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap +index 3fb4d5b342..44898d73c2 100755 +--- a/scripts/kvm/vmxcap ++++ b/scripts/kvm/vmxcap +@@ -24,6 +24,7 @@ MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F + MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 + MSR_IA32_VMX_VMFUNC = 0x491 + MSR_IA32_VMX_PROCBASED_CTLS3 = 0x492 ++MSR_IA32_VMX_EXIT_CTLS2 = 0x493 + + class msr(object): + def __init__(self): +@@ -219,11 +220,21 @@ controls = [ + 23: 'Clear IA32_BNDCFGS', + 24: 'Conceal VM exits from PT', + 25: 'Clear IA32_RTIT_CTL', ++ 31: 'Activate secondary VM-exit controls', + }, + cap_msr = MSR_IA32_VMX_EXIT_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS, + ), + ++ Allowed1Control( ++ name = 'secondary VM-Exit controls', ++ bits = { ++ 0: 'Save IA32 FRED MSRs', ++ 1: 'Load IA32 FRED MSRs', ++ }, ++ cap_msr = MSR_IA32_VMX_EXIT_CTLS2, ++ ), ++ + Control( + name = 'VM-Entry controls', + bits = { +@@ -237,6 +248,7 @@ controls = [ + 16: 'Load IA32_BNDCFGS', + 17: 'Conceal VM entries from PT', + 18: 'Load IA32_RTIT_CTL', ++ 23: 'Load IA32 FRED MSRs', + }, + cap_msr = MSR_IA32_VMX_ENTRY_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS, +-- +2.41.0.windows.1 + diff --git a/vmxcap-correct-the-name-of-the-variables.patch b/vmxcap-correct-the-name-of-the-variables.patch deleted file mode 100644 index 3a402dfa1e6908d301ff51e2499af5b3443e3014..0000000000000000000000000000000000000000 --- a/vmxcap-correct-the-name-of-the-variables.patch +++ /dev/null @@ -1,44 +0,0 @@ -From de8779d10794312d1eb56dda5936df7ad6e3c87f Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 1 Jul 2019 16:51:24 +0200 -Subject: [PATCH] vmxcap: correct the name of the variables - -The low bits are 1 if the control must be one, the high bits -are 1 if the control can be one. Correct the variable names -as they are very confusing. - -Signed-off-by: Paolo Bonzini ---- - scripts/kvm/vmxcap | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap -index 99a8146aaa..2db683215d 100755 ---- a/scripts/kvm/vmxcap -+++ b/scripts/kvm/vmxcap -@@ -51,15 +51,15 @@ class Control(object): - return (val & 0xffffffff, val >> 32) - def show(self): - print(self.name) -- mbz, mb1 = self.read2(self.cap_msr) -- tmbz, tmb1 = 0, 0 -+ mb1, cb1 = self.read2(self.cap_msr) -+ tmb1, tcb1 = 0, 0 - if self.true_cap_msr: -- tmbz, tmb1 = self.read2(self.true_cap_msr) -+ tmb1, tcb1 = self.read2(self.true_cap_msr) - for bit in sorted(self.bits.keys()): -- zero = not (mbz & (1 << bit)) -- one = mb1 & (1 << bit) -- true_zero = not (tmbz & (1 << bit)) -- true_one = tmb1 & (1 << bit) -+ zero = not (mb1 & (1 << bit)) -+ one = cb1 & (1 << bit) -+ true_zero = not (tmb1 & (1 << bit)) -+ true_one = tcb1 & (1 << bit) - s= '?' - if (self.true_cap_msr and true_zero and true_one - and one and not zero): --- -2.27.0 - diff --git a/vnc-fix-memory-leak-when-vnc-disconnect.patch b/vnc-fix-memory-leak-when-vnc-disconnect.patch deleted file mode 100644 index 3eddde4aafc1a213623db042b0cc94a7942e9574..0000000000000000000000000000000000000000 --- a/vnc-fix-memory-leak-when-vnc-disconnect.patch +++ /dev/null @@ -1,1014 +0,0 @@ -From 6bf21f3d83e95bcc4ba35a7a07cc6655e8b010b0 Mon Sep 17 00:00:00 2001 -From: Li Qiang -Date: Sat, 31 Aug 2019 08:39:22 -0700 -Subject: [PATCH] vnc: fix memory leak when vnc disconnect - -Currently when qemu receives a vnc connect, it creates a 'VncState' to -represent this connection. In 'vnc_worker_thread_loop' it creates a -local 'VncState'. The connection 'VcnState' and local 'VncState' exchange -data in 'vnc_async_encoding_start' and 'vnc_async_encoding_end'. -In 'zrle_compress_data' it calls 'deflateInit2' to allocate the libz library -opaque data. The 'VncState' used in 'zrle_compress_data' is the local -'VncState'. In 'vnc_zrle_clear' it calls 'deflateEnd' to free the libz -library opaque data. The 'VncState' used in 'vnc_zrle_clear' is the connection -'VncState'. In currently implementation there will be a memory leak when the -vnc disconnect. Following is the asan output backtrack: - -Direct leak of 29760 byte(s) in 5 object(s) allocated from: - 0 0xffffa67ef3c3 in __interceptor_calloc (/lib64/libasan.so.4+0xd33c3) - 1 0xffffa65071cb in g_malloc0 (/lib64/libglib-2.0.so.0+0x571cb) - 2 0xffffa5e968f7 in deflateInit2_ (/lib64/libz.so.1+0x78f7) - 3 0xaaaacec58613 in zrle_compress_data ui/vnc-enc-zrle.c:87 - 4 0xaaaacec58613 in zrle_send_framebuffer_update ui/vnc-enc-zrle.c:344 - 5 0xaaaacec34e77 in vnc_send_framebuffer_update ui/vnc.c:919 - 6 0xaaaacec5e023 in vnc_worker_thread_loop ui/vnc-jobs.c:271 - 7 0xaaaacec5e5e7 in vnc_worker_thread ui/vnc-jobs.c:340 - 8 0xaaaacee4d3c3 in qemu_thread_start util/qemu-thread-posix.c:502 - 9 0xffffa544e8bb in start_thread (/lib64/libpthread.so.0+0x78bb) - 10 0xffffa53965cb in thread_start (/lib64/libc.so.6+0xd55cb) - -This is because the opaque allocated in 'deflateInit2' is not freed in -'deflateEnd'. The reason is that the 'deflateEnd' calls 'deflateStateCheck' -and in the latter will check whether 's->strm != strm'(libz's data structure). -This check will be true so in 'deflateEnd' it just return 'Z_STREAM_ERROR' and -not free the data allocated in 'deflateInit2'. - -The reason this happens is that the 'VncState' contains the whole 'VncZrle', -so when calling 'deflateInit2', the 's->strm' will be the local address. -So 's->strm != strm' will be true. - -To fix this issue, we need to make 'zrle' of 'VncState' to be a pointer. -Then the connection 'VncState' and local 'VncState' exchange mechanism will -work as expection. The 'tight' of 'VncState' has the same issue, let's also turn -it to a pointer. - -Reported-by: Ying Fang -Signed-off-by: Li Qiang -Message-id: 20190831153922.121308-1-liq3ea@163.com -Signed-off-by: Gerd Hoffmann ---- - ui/vnc-enc-tight.c | 219 +++++++++++++++++++++++++------------------------- - ui/vnc-enc-zlib.c | 11 +-- - ui/vnc-enc-zrle.c | 68 ++++++++-------- - ui/vnc-enc-zrle.inc.c | 2 +- - ui/vnc.c | 28 ++++--- - ui/vnc.h | 4 +- - 6 files changed, 170 insertions(+), 162 deletions(-) - -diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c -index 9084c22..1e08518 100644 ---- a/ui/vnc-enc-tight.c -+++ b/ui/vnc-enc-tight.c -@@ -116,7 +116,7 @@ static int send_png_rect(VncState *vs, int x, int y, int w, int h, - - static bool tight_can_send_png_rect(VncState *vs, int w, int h) - { -- if (vs->tight.type != VNC_ENCODING_TIGHT_PNG) { -+ if (vs->tight->type != VNC_ENCODING_TIGHT_PNG) { - return false; - } - -@@ -144,7 +144,7 @@ tight_detect_smooth_image24(VncState *vs, int w, int h) - int pixels = 0; - int pix, left[3]; - unsigned int errors; -- unsigned char *buf = vs->tight.tight.buffer; -+ unsigned char *buf = vs->tight->tight.buffer; - - /* - * If client is big-endian, color samples begin from the second -@@ -215,7 +215,7 @@ tight_detect_smooth_image24(VncState *vs, int w, int h) - int pixels = 0; \ - int sample, sum, left[3]; \ - unsigned int errors; \ -- unsigned char *buf = vs->tight.tight.buffer; \ -+ unsigned char *buf = vs->tight->tight.buffer; \ - \ - endian = 0; /* FIXME */ \ - \ -@@ -296,8 +296,8 @@ static int - tight_detect_smooth_image(VncState *vs, int w, int h) - { - unsigned int errors; -- int compression = vs->tight.compression; -- int quality = vs->tight.quality; -+ int compression = vs->tight->compression; -+ int quality = vs->tight->quality; - - if (!vs->vd->lossy) { - return 0; -@@ -309,7 +309,7 @@ tight_detect_smooth_image(VncState *vs, int w, int h) - return 0; - } - -- if (vs->tight.quality != (uint8_t)-1) { -+ if (vs->tight->quality != (uint8_t)-1) { - if (w * h < VNC_TIGHT_JPEG_MIN_RECT_SIZE) { - return 0; - } -@@ -320,9 +320,9 @@ tight_detect_smooth_image(VncState *vs, int w, int h) - } - - if (vs->client_pf.bytes_per_pixel == 4) { -- if (vs->tight.pixel24) { -+ if (vs->tight->pixel24) { - errors = tight_detect_smooth_image24(vs, w, h); -- if (vs->tight.quality != (uint8_t)-1) { -+ if (vs->tight->quality != (uint8_t)-1) { - return (errors < tight_conf[quality].jpeg_threshold24); - } - return (errors < tight_conf[compression].gradient_threshold24); -@@ -352,7 +352,7 @@ tight_detect_smooth_image(VncState *vs, int w, int h) - uint##bpp##_t c0, c1, ci; \ - int i, n0, n1; \ - \ -- data = (uint##bpp##_t *)vs->tight.tight.buffer; \ -+ data = (uint##bpp##_t *)vs->tight->tight.buffer; \ - \ - c0 = data[0]; \ - i = 1; \ -@@ -423,9 +423,9 @@ static int tight_fill_palette(VncState *vs, int x, int y, - { - int max; - -- max = count / tight_conf[vs->tight.compression].idx_max_colors_divisor; -+ max = count / tight_conf[vs->tight->compression].idx_max_colors_divisor; - if (max < 2 && -- count >= tight_conf[vs->tight.compression].mono_min_rect_size) { -+ count >= tight_conf[vs->tight->compression].mono_min_rect_size) { - max = 2; - } - if (max >= 256) { -@@ -558,7 +558,7 @@ tight_filter_gradient24(VncState *vs, uint8_t *buf, int w, int h) - int x, y, c; - - buf32 = (uint32_t *)buf; -- memset(vs->tight.gradient.buffer, 0, w * 3 * sizeof(int)); -+ memset(vs->tight->gradient.buffer, 0, w * 3 * sizeof(int)); - - if (1 /* FIXME */) { - shift[0] = vs->client_pf.rshift; -@@ -575,7 +575,7 @@ tight_filter_gradient24(VncState *vs, uint8_t *buf, int w, int h) - upper[c] = 0; - here[c] = 0; - } -- prev = (int *)vs->tight.gradient.buffer; -+ prev = (int *)vs->tight->gradient.buffer; - for (x = 0; x < w; x++) { - pix32 = *buf32++; - for (c = 0; c < 3; c++) { -@@ -615,7 +615,7 @@ tight_filter_gradient24(VncState *vs, uint8_t *buf, int w, int h) - int prediction; \ - int x, y, c; \ - \ -- memset (vs->tight.gradient.buffer, 0, w * 3 * sizeof(int)); \ -+ memset(vs->tight->gradient.buffer, 0, w * 3 * sizeof(int)); \ - \ - endian = 0; /* FIXME */ \ - \ -@@ -631,7 +631,7 @@ tight_filter_gradient24(VncState *vs, uint8_t *buf, int w, int h) - upper[c] = 0; \ - here[c] = 0; \ - } \ -- prev = (int *)vs->tight.gradient.buffer; \ -+ prev = (int *)vs->tight->gradient.buffer; \ - for (x = 0; x < w; x++) { \ - pix = *buf; \ - if (endian) { \ -@@ -785,7 +785,7 @@ static void extend_solid_area(VncState *vs, int x, int y, int w, int h, - static int tight_init_stream(VncState *vs, int stream_id, - int level, int strategy) - { -- z_streamp zstream = &vs->tight.stream[stream_id]; -+ z_streamp zstream = &vs->tight->stream[stream_id]; - - if (zstream->opaque == NULL) { - int err; -@@ -803,15 +803,15 @@ static int tight_init_stream(VncState *vs, int stream_id, - return -1; - } - -- vs->tight.levels[stream_id] = level; -+ vs->tight->levels[stream_id] = level; - zstream->opaque = vs; - } - -- if (vs->tight.levels[stream_id] != level) { -+ if (vs->tight->levels[stream_id] != level) { - if (deflateParams(zstream, level, strategy) != Z_OK) { - return -1; - } -- vs->tight.levels[stream_id] = level; -+ vs->tight->levels[stream_id] = level; - } - return 0; - } -@@ -839,11 +839,11 @@ static void tight_send_compact_size(VncState *vs, size_t len) - static int tight_compress_data(VncState *vs, int stream_id, size_t bytes, - int level, int strategy) - { -- z_streamp zstream = &vs->tight.stream[stream_id]; -+ z_streamp zstream = &vs->tight->stream[stream_id]; - int previous_out; - - if (bytes < VNC_TIGHT_MIN_TO_COMPRESS) { -- vnc_write(vs, vs->tight.tight.buffer, vs->tight.tight.offset); -+ vnc_write(vs, vs->tight->tight.buffer, vs->tight->tight.offset); - return bytes; - } - -@@ -852,13 +852,13 @@ static int tight_compress_data(VncState *vs, int stream_id, size_t bytes, - } - - /* reserve memory in output buffer */ -- buffer_reserve(&vs->tight.zlib, bytes + 64); -+ buffer_reserve(&vs->tight->zlib, bytes + 64); - - /* set pointers */ -- zstream->next_in = vs->tight.tight.buffer; -- zstream->avail_in = vs->tight.tight.offset; -- zstream->next_out = vs->tight.zlib.buffer + vs->tight.zlib.offset; -- zstream->avail_out = vs->tight.zlib.capacity - vs->tight.zlib.offset; -+ zstream->next_in = vs->tight->tight.buffer; -+ zstream->avail_in = vs->tight->tight.offset; -+ zstream->next_out = vs->tight->zlib.buffer + vs->tight->zlib.offset; -+ zstream->avail_out = vs->tight->zlib.capacity - vs->tight->zlib.offset; - previous_out = zstream->avail_out; - zstream->data_type = Z_BINARY; - -@@ -868,14 +868,14 @@ static int tight_compress_data(VncState *vs, int stream_id, size_t bytes, - return -1; - } - -- vs->tight.zlib.offset = vs->tight.zlib.capacity - zstream->avail_out; -+ vs->tight->zlib.offset = vs->tight->zlib.capacity - zstream->avail_out; - /* ...how much data has actually been produced by deflate() */ - bytes = previous_out - zstream->avail_out; - - tight_send_compact_size(vs, bytes); -- vnc_write(vs, vs->tight.zlib.buffer, bytes); -+ vnc_write(vs, vs->tight->zlib.buffer, bytes); - -- buffer_reset(&vs->tight.zlib); -+ buffer_reset(&vs->tight->zlib); - - return bytes; - } -@@ -927,16 +927,17 @@ static int send_full_color_rect(VncState *vs, int x, int y, int w, int h) - - vnc_write_u8(vs, stream << 4); /* no flushing, no filter */ - -- if (vs->tight.pixel24) { -- tight_pack24(vs, vs->tight.tight.buffer, w * h, &vs->tight.tight.offset); -+ if (vs->tight->pixel24) { -+ tight_pack24(vs, vs->tight->tight.buffer, w * h, -+ &vs->tight->tight.offset); - bytes = 3; - } else { - bytes = vs->client_pf.bytes_per_pixel; - } - - bytes = tight_compress_data(vs, stream, w * h * bytes, -- tight_conf[vs->tight.compression].raw_zlib_level, -- Z_DEFAULT_STRATEGY); -+ tight_conf[vs->tight->compression].raw_zlib_level, -+ Z_DEFAULT_STRATEGY); - - return (bytes >= 0); - } -@@ -947,14 +948,14 @@ static int send_solid_rect(VncState *vs) - - vnc_write_u8(vs, VNC_TIGHT_FILL << 4); /* no flushing, no filter */ - -- if (vs->tight.pixel24) { -- tight_pack24(vs, vs->tight.tight.buffer, 1, &vs->tight.tight.offset); -+ if (vs->tight->pixel24) { -+ tight_pack24(vs, vs->tight->tight.buffer, 1, &vs->tight->tight.offset); - bytes = 3; - } else { - bytes = vs->client_pf.bytes_per_pixel; - } - -- vnc_write(vs, vs->tight.tight.buffer, bytes); -+ vnc_write(vs, vs->tight->tight.buffer, bytes); - return 1; - } - -@@ -963,7 +964,7 @@ static int send_mono_rect(VncState *vs, int x, int y, - { - ssize_t bytes; - int stream = 1; -- int level = tight_conf[vs->tight.compression].mono_zlib_level; -+ int level = tight_conf[vs->tight->compression].mono_zlib_level; - - #ifdef CONFIG_VNC_PNG - if (tight_can_send_png_rect(vs, w, h)) { -@@ -991,26 +992,26 @@ static int send_mono_rect(VncState *vs, int x, int y, - uint32_t buf[2] = {bg, fg}; - size_t ret = sizeof (buf); - -- if (vs->tight.pixel24) { -+ if (vs->tight->pixel24) { - tight_pack24(vs, (unsigned char*)buf, 2, &ret); - } - vnc_write(vs, buf, ret); - -- tight_encode_mono_rect32(vs->tight.tight.buffer, w, h, bg, fg); -+ tight_encode_mono_rect32(vs->tight->tight.buffer, w, h, bg, fg); - break; - } - case 2: - vnc_write(vs, &bg, 2); - vnc_write(vs, &fg, 2); -- tight_encode_mono_rect16(vs->tight.tight.buffer, w, h, bg, fg); -+ tight_encode_mono_rect16(vs->tight->tight.buffer, w, h, bg, fg); - break; - default: - vnc_write_u8(vs, bg); - vnc_write_u8(vs, fg); -- tight_encode_mono_rect8(vs->tight.tight.buffer, w, h, bg, fg); -+ tight_encode_mono_rect8(vs->tight->tight.buffer, w, h, bg, fg); - break; - } -- vs->tight.tight.offset = bytes; -+ vs->tight->tight.offset = bytes; - - bytes = tight_compress_data(vs, stream, bytes, level, Z_DEFAULT_STRATEGY); - return (bytes >= 0); -@@ -1040,7 +1041,7 @@ static void write_palette(int idx, uint32_t color, void *opaque) - static bool send_gradient_rect(VncState *vs, int x, int y, int w, int h) - { - int stream = 3; -- int level = tight_conf[vs->tight.compression].gradient_zlib_level; -+ int level = tight_conf[vs->tight->compression].gradient_zlib_level; - ssize_t bytes; - - if (vs->client_pf.bytes_per_pixel == 1) { -@@ -1050,23 +1051,23 @@ static bool send_gradient_rect(VncState *vs, int x, int y, int w, int h) - vnc_write_u8(vs, (stream | VNC_TIGHT_EXPLICIT_FILTER) << 4); - vnc_write_u8(vs, VNC_TIGHT_FILTER_GRADIENT); - -- buffer_reserve(&vs->tight.gradient, w * 3 * sizeof (int)); -+ buffer_reserve(&vs->tight->gradient, w * 3 * sizeof(int)); - -- if (vs->tight.pixel24) { -- tight_filter_gradient24(vs, vs->tight.tight.buffer, w, h); -+ if (vs->tight->pixel24) { -+ tight_filter_gradient24(vs, vs->tight->tight.buffer, w, h); - bytes = 3; - } else if (vs->client_pf.bytes_per_pixel == 4) { -- tight_filter_gradient32(vs, (uint32_t *)vs->tight.tight.buffer, w, h); -+ tight_filter_gradient32(vs, (uint32_t *)vs->tight->tight.buffer, w, h); - bytes = 4; - } else { -- tight_filter_gradient16(vs, (uint16_t *)vs->tight.tight.buffer, w, h); -+ tight_filter_gradient16(vs, (uint16_t *)vs->tight->tight.buffer, w, h); - bytes = 2; - } - -- buffer_reset(&vs->tight.gradient); -+ buffer_reset(&vs->tight->gradient); - - bytes = w * h * bytes; -- vs->tight.tight.offset = bytes; -+ vs->tight->tight.offset = bytes; - - bytes = tight_compress_data(vs, stream, bytes, - level, Z_FILTERED); -@@ -1077,7 +1078,7 @@ static int send_palette_rect(VncState *vs, int x, int y, - int w, int h, VncPalette *palette) - { - int stream = 2; -- int level = tight_conf[vs->tight.compression].idx_zlib_level; -+ int level = tight_conf[vs->tight->compression].idx_zlib_level; - int colors; - ssize_t bytes; - -@@ -1104,12 +1105,12 @@ static int send_palette_rect(VncState *vs, int x, int y, - palette_iter(palette, write_palette, &priv); - vnc_write(vs, header, sizeof(header)); - -- if (vs->tight.pixel24) { -+ if (vs->tight->pixel24) { - tight_pack24(vs, vs->output.buffer + old_offset, colors, &offset); - vs->output.offset = old_offset + offset; - } - -- tight_encode_indexed_rect32(vs->tight.tight.buffer, w * h, palette); -+ tight_encode_indexed_rect32(vs->tight->tight.buffer, w * h, palette); - break; - } - case 2: -@@ -1119,7 +1120,7 @@ static int send_palette_rect(VncState *vs, int x, int y, - - palette_iter(palette, write_palette, &priv); - vnc_write(vs, header, sizeof(header)); -- tight_encode_indexed_rect16(vs->tight.tight.buffer, w * h, palette); -+ tight_encode_indexed_rect16(vs->tight->tight.buffer, w * h, palette); - break; - } - default: -@@ -1127,7 +1128,7 @@ static int send_palette_rect(VncState *vs, int x, int y, - break; - } - bytes = w * h; -- vs->tight.tight.offset = bytes; -+ vs->tight->tight.offset = bytes; - - bytes = tight_compress_data(vs, stream, bytes, - level, Z_DEFAULT_STRATEGY); -@@ -1146,7 +1147,7 @@ static int send_palette_rect(VncState *vs, int x, int y, - static void jpeg_init_destination(j_compress_ptr cinfo) - { - VncState *vs = cinfo->client_data; -- Buffer *buffer = &vs->tight.jpeg; -+ Buffer *buffer = &vs->tight->jpeg; - - cinfo->dest->next_output_byte = (JOCTET *)buffer->buffer + buffer->offset; - cinfo->dest->free_in_buffer = (size_t)(buffer->capacity - buffer->offset); -@@ -1156,7 +1157,7 @@ static void jpeg_init_destination(j_compress_ptr cinfo) - static boolean jpeg_empty_output_buffer(j_compress_ptr cinfo) - { - VncState *vs = cinfo->client_data; -- Buffer *buffer = &vs->tight.jpeg; -+ Buffer *buffer = &vs->tight->jpeg; - - buffer->offset = buffer->capacity; - buffer_reserve(buffer, 2048); -@@ -1168,7 +1169,7 @@ static boolean jpeg_empty_output_buffer(j_compress_ptr cinfo) - static void jpeg_term_destination(j_compress_ptr cinfo) - { - VncState *vs = cinfo->client_data; -- Buffer *buffer = &vs->tight.jpeg; -+ Buffer *buffer = &vs->tight->jpeg; - - buffer->offset = buffer->capacity - cinfo->dest->free_in_buffer; - } -@@ -1187,7 +1188,7 @@ static int send_jpeg_rect(VncState *vs, int x, int y, int w, int h, int quality) - return send_full_color_rect(vs, x, y, w, h); - } - -- buffer_reserve(&vs->tight.jpeg, 2048); -+ buffer_reserve(&vs->tight->jpeg, 2048); - - cinfo.err = jpeg_std_error(&jerr); - jpeg_create_compress(&cinfo); -@@ -1222,9 +1223,9 @@ static int send_jpeg_rect(VncState *vs, int x, int y, int w, int h, int quality) - - vnc_write_u8(vs, VNC_TIGHT_JPEG << 4); - -- tight_send_compact_size(vs, vs->tight.jpeg.offset); -- vnc_write(vs, vs->tight.jpeg.buffer, vs->tight.jpeg.offset); -- buffer_reset(&vs->tight.jpeg); -+ tight_send_compact_size(vs, vs->tight->jpeg.offset); -+ vnc_write(vs, vs->tight->jpeg.buffer, vs->tight->jpeg.offset); -+ buffer_reset(&vs->tight->jpeg); - - return 1; - } -@@ -1240,7 +1241,7 @@ static void write_png_palette(int idx, uint32_t pix, void *opaque) - VncState *vs = priv->vs; - png_colorp color = &priv->png_palette[idx]; - -- if (vs->tight.pixel24) -+ if (vs->tight->pixel24) - { - color->red = (pix >> vs->client_pf.rshift) & vs->client_pf.rmax; - color->green = (pix >> vs->client_pf.gshift) & vs->client_pf.gmax; -@@ -1267,10 +1268,10 @@ static void png_write_data(png_structp png_ptr, png_bytep data, - { - VncState *vs = png_get_io_ptr(png_ptr); - -- buffer_reserve(&vs->tight.png, vs->tight.png.offset + length); -- memcpy(vs->tight.png.buffer + vs->tight.png.offset, data, length); -+ buffer_reserve(&vs->tight->png, vs->tight->png.offset + length); -+ memcpy(vs->tight->png.buffer + vs->tight->png.offset, data, length); - -- vs->tight.png.offset += length; -+ vs->tight->png.offset += length; - } - - static void png_flush_data(png_structp png_ptr) -@@ -1295,8 +1296,8 @@ static int send_png_rect(VncState *vs, int x, int y, int w, int h, - png_infop info_ptr; - png_colorp png_palette = NULL; - pixman_image_t *linebuf; -- int level = tight_png_conf[vs->tight.compression].png_zlib_level; -- int filters = tight_png_conf[vs->tight.compression].png_filters; -+ int level = tight_png_conf[vs->tight->compression].png_zlib_level; -+ int filters = tight_png_conf[vs->tight->compression].png_filters; - uint8_t *buf; - int dy; - -@@ -1340,21 +1341,23 @@ static int send_png_rect(VncState *vs, int x, int y, int w, int h, - png_set_PLTE(png_ptr, info_ptr, png_palette, palette_size(palette)); - - if (vs->client_pf.bytes_per_pixel == 4) { -- tight_encode_indexed_rect32(vs->tight.tight.buffer, w * h, palette); -+ tight_encode_indexed_rect32(vs->tight->tight.buffer, w * h, -+ palette); - } else { -- tight_encode_indexed_rect16(vs->tight.tight.buffer, w * h, palette); -+ tight_encode_indexed_rect16(vs->tight->tight.buffer, w * h, -+ palette); - } - } - - png_write_info(png_ptr, info_ptr); - -- buffer_reserve(&vs->tight.png, 2048); -+ buffer_reserve(&vs->tight->png, 2048); - linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, w); - buf = (uint8_t *)pixman_image_get_data(linebuf); - for (dy = 0; dy < h; dy++) - { - if (color_type == PNG_COLOR_TYPE_PALETTE) { -- memcpy(buf, vs->tight.tight.buffer + (dy * w), w); -+ memcpy(buf, vs->tight->tight.buffer + (dy * w), w); - } else { - qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, x, y + dy); - } -@@ -1372,27 +1375,27 @@ static int send_png_rect(VncState *vs, int x, int y, int w, int h, - - vnc_write_u8(vs, VNC_TIGHT_PNG << 4); - -- tight_send_compact_size(vs, vs->tight.png.offset); -- vnc_write(vs, vs->tight.png.buffer, vs->tight.png.offset); -- buffer_reset(&vs->tight.png); -+ tight_send_compact_size(vs, vs->tight->png.offset); -+ vnc_write(vs, vs->tight->png.buffer, vs->tight->png.offset); -+ buffer_reset(&vs->tight->png); - return 1; - } - #endif /* CONFIG_VNC_PNG */ - - static void vnc_tight_start(VncState *vs) - { -- buffer_reset(&vs->tight.tight); -+ buffer_reset(&vs->tight->tight); - - // make the output buffer be the zlib buffer, so we can compress it later -- vs->tight.tmp = vs->output; -- vs->output = vs->tight.tight; -+ vs->tight->tmp = vs->output; -+ vs->output = vs->tight->tight; - } - - static void vnc_tight_stop(VncState *vs) - { - // switch back to normal output/zlib buffers -- vs->tight.tight = vs->output; -- vs->output = vs->tight.tmp; -+ vs->tight->tight = vs->output; -+ vs->output = vs->tight->tmp; - } - - static int send_sub_rect_nojpeg(VncState *vs, int x, int y, int w, int h, -@@ -1426,9 +1429,9 @@ static int send_sub_rect_jpeg(VncState *vs, int x, int y, int w, int h, - int ret; - - if (colors == 0) { -- if (force || (tight_jpeg_conf[vs->tight.quality].jpeg_full && -+ if (force || (tight_jpeg_conf[vs->tight->quality].jpeg_full && - tight_detect_smooth_image(vs, w, h))) { -- int quality = tight_conf[vs->tight.quality].jpeg_quality; -+ int quality = tight_conf[vs->tight->quality].jpeg_quality; - - ret = send_jpeg_rect(vs, x, y, w, h, quality); - } else { -@@ -1440,9 +1443,9 @@ static int send_sub_rect_jpeg(VncState *vs, int x, int y, int w, int h, - ret = send_mono_rect(vs, x, y, w, h, bg, fg); - } else if (colors <= 256) { - if (force || (colors > 96 && -- tight_jpeg_conf[vs->tight.quality].jpeg_idx && -+ tight_jpeg_conf[vs->tight->quality].jpeg_idx && - tight_detect_smooth_image(vs, w, h))) { -- int quality = tight_conf[vs->tight.quality].jpeg_quality; -+ int quality = tight_conf[vs->tight->quality].jpeg_quality; - - ret = send_jpeg_rect(vs, x, y, w, h, quality); - } else { -@@ -1480,20 +1483,20 @@ static int send_sub_rect(VncState *vs, int x, int y, int w, int h) - qemu_thread_atexit_add(&vnc_tight_cleanup_notifier); - } - -- vnc_framebuffer_update(vs, x, y, w, h, vs->tight.type); -+ vnc_framebuffer_update(vs, x, y, w, h, vs->tight->type); - - vnc_tight_start(vs); - vnc_raw_send_framebuffer_update(vs, x, y, w, h); - vnc_tight_stop(vs); - - #ifdef CONFIG_VNC_JPEG -- if (!vs->vd->non_adaptive && vs->tight.quality != (uint8_t)-1) { -+ if (!vs->vd->non_adaptive && vs->tight->quality != (uint8_t)-1) { - double freq = vnc_update_freq(vs, x, y, w, h); - -- if (freq < tight_jpeg_conf[vs->tight.quality].jpeg_freq_min) { -+ if (freq < tight_jpeg_conf[vs->tight->quality].jpeg_freq_min) { - allow_jpeg = false; - } -- if (freq >= tight_jpeg_conf[vs->tight.quality].jpeg_freq_threshold) { -+ if (freq >= tight_jpeg_conf[vs->tight->quality].jpeg_freq_threshold) { - force_jpeg = true; - vnc_sent_lossy_rect(vs, x, y, w, h); - } -@@ -1503,7 +1506,7 @@ static int send_sub_rect(VncState *vs, int x, int y, int w, int h) - colors = tight_fill_palette(vs, x, y, w * h, &bg, &fg, color_count_palette); - - #ifdef CONFIG_VNC_JPEG -- if (allow_jpeg && vs->tight.quality != (uint8_t)-1) { -+ if (allow_jpeg && vs->tight->quality != (uint8_t)-1) { - ret = send_sub_rect_jpeg(vs, x, y, w, h, bg, fg, colors, - color_count_palette, force_jpeg); - } else { -@@ -1520,7 +1523,7 @@ static int send_sub_rect(VncState *vs, int x, int y, int w, int h) - - static int send_sub_rect_solid(VncState *vs, int x, int y, int w, int h) - { -- vnc_framebuffer_update(vs, x, y, w, h, vs->tight.type); -+ vnc_framebuffer_update(vs, x, y, w, h, vs->tight->type); - - vnc_tight_start(vs); - vnc_raw_send_framebuffer_update(vs, x, y, w, h); -@@ -1538,8 +1541,8 @@ static int send_rect_simple(VncState *vs, int x, int y, int w, int h, - int rw, rh; - int n = 0; - -- max_size = tight_conf[vs->tight.compression].max_rect_size; -- max_width = tight_conf[vs->tight.compression].max_rect_width; -+ max_size = tight_conf[vs->tight->compression].max_rect_size; -+ max_width = tight_conf[vs->tight->compression].max_rect_width; - - if (split && (w > max_width || w * h > max_size)) { - max_sub_width = (w > max_width) ? max_width : w; -@@ -1648,16 +1651,16 @@ static int tight_send_framebuffer_update(VncState *vs, int x, int y, - - if (vs->client_pf.bytes_per_pixel == 4 && vs->client_pf.rmax == 0xFF && - vs->client_pf.bmax == 0xFF && vs->client_pf.gmax == 0xFF) { -- vs->tight.pixel24 = true; -+ vs->tight->pixel24 = true; - } else { -- vs->tight.pixel24 = false; -+ vs->tight->pixel24 = false; - } - - #ifdef CONFIG_VNC_JPEG -- if (vs->tight.quality != (uint8_t)-1) { -+ if (vs->tight->quality != (uint8_t)-1) { - double freq = vnc_update_freq(vs, x, y, w, h); - -- if (freq > tight_jpeg_conf[vs->tight.quality].jpeg_freq_threshold) { -+ if (freq > tight_jpeg_conf[vs->tight->quality].jpeg_freq_threshold) { - return send_rect_simple(vs, x, y, w, h, false); - } - } -@@ -1669,8 +1672,8 @@ static int tight_send_framebuffer_update(VncState *vs, int x, int y, - - /* Calculate maximum number of rows in one non-solid rectangle. */ - -- max_rows = tight_conf[vs->tight.compression].max_rect_size; -- max_rows /= MIN(tight_conf[vs->tight.compression].max_rect_width, w); -+ max_rows = tight_conf[vs->tight->compression].max_rect_size; -+ max_rows /= MIN(tight_conf[vs->tight->compression].max_rect_width, w); - - return find_large_solid_color_rect(vs, x, y, w, h, max_rows); - } -@@ -1678,33 +1681,33 @@ static int tight_send_framebuffer_update(VncState *vs, int x, int y, - int vnc_tight_send_framebuffer_update(VncState *vs, int x, int y, - int w, int h) - { -- vs->tight.type = VNC_ENCODING_TIGHT; -+ vs->tight->type = VNC_ENCODING_TIGHT; - return tight_send_framebuffer_update(vs, x, y, w, h); - } - - int vnc_tight_png_send_framebuffer_update(VncState *vs, int x, int y, - int w, int h) - { -- vs->tight.type = VNC_ENCODING_TIGHT_PNG; -+ vs->tight->type = VNC_ENCODING_TIGHT_PNG; - return tight_send_framebuffer_update(vs, x, y, w, h); - } - - void vnc_tight_clear(VncState *vs) - { - int i; -- for (i=0; itight.stream); i++) { -- if (vs->tight.stream[i].opaque) { -- deflateEnd(&vs->tight.stream[i]); -+ for (i = 0; i < ARRAY_SIZE(vs->tight->stream); i++) { -+ if (vs->tight->stream[i].opaque) { -+ deflateEnd(&vs->tight->stream[i]); - } - } - -- buffer_free(&vs->tight.tight); -- buffer_free(&vs->tight.zlib); -- buffer_free(&vs->tight.gradient); -+ buffer_free(&vs->tight->tight); -+ buffer_free(&vs->tight->zlib); -+ buffer_free(&vs->tight->gradient); - #ifdef CONFIG_VNC_JPEG -- buffer_free(&vs->tight.jpeg); -+ buffer_free(&vs->tight->jpeg); - #endif - #ifdef CONFIG_VNC_PNG -- buffer_free(&vs->tight.png); -+ buffer_free(&vs->tight->png); - #endif - } -diff --git a/ui/vnc-enc-zlib.c b/ui/vnc-enc-zlib.c -index 33e9df2..900ae5b 100644 ---- a/ui/vnc-enc-zlib.c -+++ b/ui/vnc-enc-zlib.c -@@ -76,7 +76,8 @@ static int vnc_zlib_stop(VncState *vs) - zstream->zalloc = vnc_zlib_zalloc; - zstream->zfree = vnc_zlib_zfree; - -- err = deflateInit2(zstream, vs->tight.compression, Z_DEFLATED, MAX_WBITS, -+ err = deflateInit2(zstream, vs->tight->compression, Z_DEFLATED, -+ MAX_WBITS, - MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); - - if (err != Z_OK) { -@@ -84,16 +85,16 @@ static int vnc_zlib_stop(VncState *vs) - return -1; - } - -- vs->zlib.level = vs->tight.compression; -+ vs->zlib.level = vs->tight->compression; - zstream->opaque = vs; - } - -- if (vs->tight.compression != vs->zlib.level) { -- if (deflateParams(zstream, vs->tight.compression, -+ if (vs->tight->compression != vs->zlib.level) { -+ if (deflateParams(zstream, vs->tight->compression, - Z_DEFAULT_STRATEGY) != Z_OK) { - return -1; - } -- vs->zlib.level = vs->tight.compression; -+ vs->zlib.level = vs->tight->compression; - } - - // reserve memory in output buffer -diff --git a/ui/vnc-enc-zrle.c b/ui/vnc-enc-zrle.c -index 7493a84..17fd28a 100644 ---- a/ui/vnc-enc-zrle.c -+++ b/ui/vnc-enc-zrle.c -@@ -37,18 +37,18 @@ static const int bits_per_packed_pixel[] = { - - static void vnc_zrle_start(VncState *vs) - { -- buffer_reset(&vs->zrle.zrle); -+ buffer_reset(&vs->zrle->zrle); - - /* make the output buffer be the zlib buffer, so we can compress it later */ -- vs->zrle.tmp = vs->output; -- vs->output = vs->zrle.zrle; -+ vs->zrle->tmp = vs->output; -+ vs->output = vs->zrle->zrle; - } - - static void vnc_zrle_stop(VncState *vs) - { - /* switch back to normal output/zlib buffers */ -- vs->zrle.zrle = vs->output; -- vs->output = vs->zrle.tmp; -+ vs->zrle->zrle = vs->output; -+ vs->output = vs->zrle->tmp; - } - - static void *zrle_convert_fb(VncState *vs, int x, int y, int w, int h, -@@ -56,24 +56,24 @@ static void *zrle_convert_fb(VncState *vs, int x, int y, int w, int h, - { - Buffer tmp; - -- buffer_reset(&vs->zrle.fb); -- buffer_reserve(&vs->zrle.fb, w * h * bpp + bpp); -+ buffer_reset(&vs->zrle->fb); -+ buffer_reserve(&vs->zrle->fb, w * h * bpp + bpp); - - tmp = vs->output; -- vs->output = vs->zrle.fb; -+ vs->output = vs->zrle->fb; - - vnc_raw_send_framebuffer_update(vs, x, y, w, h); - -- vs->zrle.fb = vs->output; -+ vs->zrle->fb = vs->output; - vs->output = tmp; -- return vs->zrle.fb.buffer; -+ return vs->zrle->fb.buffer; - } - - static int zrle_compress_data(VncState *vs, int level) - { -- z_streamp zstream = &vs->zrle.stream; -+ z_streamp zstream = &vs->zrle->stream; - -- buffer_reset(&vs->zrle.zlib); -+ buffer_reset(&vs->zrle->zlib); - - if (zstream->opaque != vs) { - int err; -@@ -93,13 +93,13 @@ static int zrle_compress_data(VncState *vs, int level) - } - - /* reserve memory in output buffer */ -- buffer_reserve(&vs->zrle.zlib, vs->zrle.zrle.offset + 64); -+ buffer_reserve(&vs->zrle->zlib, vs->zrle->zrle.offset + 64); - - /* set pointers */ -- zstream->next_in = vs->zrle.zrle.buffer; -- zstream->avail_in = vs->zrle.zrle.offset; -- zstream->next_out = vs->zrle.zlib.buffer + vs->zrle.zlib.offset; -- zstream->avail_out = vs->zrle.zlib.capacity - vs->zrle.zlib.offset; -+ zstream->next_in = vs->zrle->zrle.buffer; -+ zstream->avail_in = vs->zrle->zrle.offset; -+ zstream->next_out = vs->zrle->zlib.buffer + vs->zrle->zlib.offset; -+ zstream->avail_out = vs->zrle->zlib.capacity - vs->zrle->zlib.offset; - zstream->data_type = Z_BINARY; - - /* start encoding */ -@@ -108,8 +108,8 @@ static int zrle_compress_data(VncState *vs, int level) - return -1; - } - -- vs->zrle.zlib.offset = vs->zrle.zlib.capacity - zstream->avail_out; -- return vs->zrle.zlib.offset; -+ vs->zrle->zlib.offset = vs->zrle->zlib.capacity - zstream->avail_out; -+ return vs->zrle->zlib.offset; - } - - /* Try to work out whether to use RLE and/or a palette. We do this by -@@ -259,14 +259,14 @@ static int zrle_send_framebuffer_update(VncState *vs, int x, int y, - size_t bytes; - int zywrle_level; - -- if (vs->zrle.type == VNC_ENCODING_ZYWRLE) { -- if (!vs->vd->lossy || vs->tight.quality == (uint8_t)-1 -- || vs->tight.quality == 9) { -+ if (vs->zrle->type == VNC_ENCODING_ZYWRLE) { -+ if (!vs->vd->lossy || vs->tight->quality == (uint8_t)-1 -+ || vs->tight->quality == 9) { - zywrle_level = 0; -- vs->zrle.type = VNC_ENCODING_ZRLE; -- } else if (vs->tight.quality < 3) { -+ vs->zrle->type = VNC_ENCODING_ZRLE; -+ } else if (vs->tight->quality < 3) { - zywrle_level = 3; -- } else if (vs->tight.quality < 6) { -+ } else if (vs->tight->quality < 6) { - zywrle_level = 2; - } else { - zywrle_level = 1; -@@ -337,30 +337,30 @@ static int zrle_send_framebuffer_update(VncState *vs, int x, int y, - - vnc_zrle_stop(vs); - bytes = zrle_compress_data(vs, Z_DEFAULT_COMPRESSION); -- vnc_framebuffer_update(vs, x, y, w, h, vs->zrle.type); -+ vnc_framebuffer_update(vs, x, y, w, h, vs->zrle->type); - vnc_write_u32(vs, bytes); -- vnc_write(vs, vs->zrle.zlib.buffer, vs->zrle.zlib.offset); -+ vnc_write(vs, vs->zrle->zlib.buffer, vs->zrle->zlib.offset); - return 1; - } - - int vnc_zrle_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) - { -- vs->zrle.type = VNC_ENCODING_ZRLE; -+ vs->zrle->type = VNC_ENCODING_ZRLE; - return zrle_send_framebuffer_update(vs, x, y, w, h); - } - - int vnc_zywrle_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) - { -- vs->zrle.type = VNC_ENCODING_ZYWRLE; -+ vs->zrle->type = VNC_ENCODING_ZYWRLE; - return zrle_send_framebuffer_update(vs, x, y, w, h); - } - - void vnc_zrle_clear(VncState *vs) - { -- if (vs->zrle.stream.opaque) { -- deflateEnd(&vs->zrle.stream); -+ if (vs->zrle->stream.opaque) { -+ deflateEnd(&vs->zrle->stream); - } -- buffer_free(&vs->zrle.zrle); -- buffer_free(&vs->zrle.fb); -- buffer_free(&vs->zrle.zlib); -+ buffer_free(&vs->zrle->zrle); -+ buffer_free(&vs->zrle->fb); -+ buffer_free(&vs->zrle->zlib); - } -diff --git a/ui/vnc-enc-zrle.inc.c b/ui/vnc-enc-zrle.inc.c -index abf6b86..c107d8a 100644 ---- a/ui/vnc-enc-zrle.inc.c -+++ b/ui/vnc-enc-zrle.inc.c -@@ -96,7 +96,7 @@ static void ZRLE_ENCODE(VncState *vs, int x, int y, int w, int h, - static void ZRLE_ENCODE_TILE(VncState *vs, ZRLE_PIXEL *data, int w, int h, - int zywrle_level) - { -- VncPalette *palette = &vs->zrle.palette; -+ VncPalette *palette = &vs->zrle->palette; - - int runs = 0; - int single_pixels = 0; -diff --git a/ui/vnc.c b/ui/vnc.c -index bc43c4c..87b8045 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -1307,6 +1307,8 @@ void vnc_disconnect_finish(VncState *vs) - object_unref(OBJECT(vs->sioc)); - vs->sioc = NULL; - vs->magic = 0; -+ g_free(vs->zrle); -+ g_free(vs->tight); - g_free(vs); - } - -@@ -2058,8 +2060,8 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings) - - vs->features = 0; - vs->vnc_encoding = 0; -- vs->tight.compression = 9; -- vs->tight.quality = -1; /* Lossless by default */ -+ vs->tight->compression = 9; -+ vs->tight->quality = -1; /* Lossless by default */ - vs->absolute = -1; - - /* -@@ -2127,11 +2129,11 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings) - vs->features |= VNC_FEATURE_LED_STATE_MASK; - break; - case VNC_ENCODING_COMPRESSLEVEL0 ... VNC_ENCODING_COMPRESSLEVEL0 + 9: -- vs->tight.compression = (enc & 0x0F); -+ vs->tight->compression = (enc & 0x0F); - break; - case VNC_ENCODING_QUALITYLEVEL0 ... VNC_ENCODING_QUALITYLEVEL0 + 9: - if (vs->vd->lossy) { -- vs->tight.quality = (enc & 0x0F); -+ vs->tight->quality = (enc & 0x0F); - } - break; - default: -@@ -3034,6 +3036,8 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc, - int i; - - trace_vnc_client_connect(vs, sioc); -+ vs->zrle = g_new0(VncZrle, 1); -+ vs->tight = g_new0(VncTight, 1); - vs->magic = VNC_MAGIC; - vs->sioc = sioc; - object_ref(OBJECT(vs->sioc)); -@@ -3045,19 +3049,19 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc, - buffer_init(&vs->output, "vnc-output/%p", sioc); - buffer_init(&vs->jobs_buffer, "vnc-jobs_buffer/%p", sioc); - -- buffer_init(&vs->tight.tight, "vnc-tight/%p", sioc); -- buffer_init(&vs->tight.zlib, "vnc-tight-zlib/%p", sioc); -- buffer_init(&vs->tight.gradient, "vnc-tight-gradient/%p", sioc); -+ buffer_init(&vs->tight->tight, "vnc-tight/%p", sioc); -+ buffer_init(&vs->tight->zlib, "vnc-tight-zlib/%p", sioc); -+ buffer_init(&vs->tight->gradient, "vnc-tight-gradient/%p", sioc); - #ifdef CONFIG_VNC_JPEG -- buffer_init(&vs->tight.jpeg, "vnc-tight-jpeg/%p", sioc); -+ buffer_init(&vs->tight->jpeg, "vnc-tight-jpeg/%p", sioc); - #endif - #ifdef CONFIG_VNC_PNG -- buffer_init(&vs->tight.png, "vnc-tight-png/%p", sioc); -+ buffer_init(&vs->tight->png, "vnc-tight-png/%p", sioc); - #endif - buffer_init(&vs->zlib.zlib, "vnc-zlib/%p", sioc); -- buffer_init(&vs->zrle.zrle, "vnc-zrle/%p", sioc); -- buffer_init(&vs->zrle.fb, "vnc-zrle-fb/%p", sioc); -- buffer_init(&vs->zrle.zlib, "vnc-zrle-zlib/%p", sioc); -+ buffer_init(&vs->zrle->zrle, "vnc-zrle/%p", sioc); -+ buffer_init(&vs->zrle->fb, "vnc-zrle-fb/%p", sioc); -+ buffer_init(&vs->zrle->zlib, "vnc-zrle-zlib/%p", sioc); - - if (skipauth) { - vs->auth = VNC_AUTH_NONE; -diff --git a/ui/vnc.h b/ui/vnc.h -index 8643860..fea79c2 100644 ---- a/ui/vnc.h -+++ b/ui/vnc.h -@@ -338,10 +338,10 @@ struct VncState - /* Encoding specific, if you add something here, don't forget to - * update vnc_async_encoding_start() - */ -- VncTight tight; -+ VncTight *tight; - VncZlib zlib; - VncHextile hextile; -- VncZrle zrle; -+ VncZrle *zrle; - VncZywrle zywrle; - - Notifier mouse_mode_notifier; --- -1.8.3.1 - diff --git a/vpc-Return-0-from-vpc_co_create-on-success.patch b/vpc-Return-0-from-vpc_co_create-on-success.patch deleted file mode 100644 index 46fbd90d1bd39f5549ce5d0185d58bbd437a82aa..0000000000000000000000000000000000000000 --- a/vpc-Return-0-from-vpc_co_create-on-success.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 97c478c355fee96eb2b740313f50561e69b6f305 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 2 Sep 2019 21:33:16 +0200 -Subject: [PATCH] vpc: Return 0 from vpc_co_create() on success - -blockdev_create_run() directly uses .bdrv_co_create()'s return value as -the job's return value. Jobs must return 0 on success, not just any -nonnegative value. Therefore, using blockdev-create for VPC images may -currently fail as the vpc driver may return a positive integer. - -Because there is no point in returning a positive integer anywhere in -the block layer (all non-negative integers are generally treated as -complete success), we probably do not want to add more such cases. -Therefore, fix this problem by making the vpc driver always return 0 in -case of success. - -Suggested-by: Kevin Wolf -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 1a37e3124407b5a145d44478d3ecbdb89c63789f) -Signed-off-by: Michael Roth ---- - block/vpc.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/block/vpc.c b/block/vpc.c -index d4776ee8a5..3a88e28e2b 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -885,6 +885,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, - goto fail; - } - -+ ret = 0; - fail: - return ret; - } -@@ -908,7 +909,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, - return ret; - } - -- return ret; -+ return 0; - } - - static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts, --- -2.23.0 diff --git a/vtimer-Drop-vtimer-virtual-timer-adjust.patch b/vtimer-Drop-vtimer-virtual-timer-adjust.patch deleted file mode 100644 index 726498fb6c778f8d5739e9614ae451d54a11bb56..0000000000000000000000000000000000000000 --- a/vtimer-Drop-vtimer-virtual-timer-adjust.patch +++ /dev/null @@ -1,144 +0,0 @@ -From b1782119bcfac96d8a541d8d60ee00f954d721db Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Wed, 27 May 2020 17:48:54 +0800 -Subject: [PATCH] vtimer: Drop vtimer virtual timer adjust - -This patch drops the vtimer virtual timer adjust, cross version migration -from openEuler qemu-4.0.1 to qemu-4.1.0 is not supported as a consequence. - -By default openEuler qemu-4.1.0 use kvm_adjvtime as the virtual timer. - -Signed-off-by: Ying Fang - -diff --git a/cpus.c b/cpus.c -index 6a28bdef..927a00aa 100644 ---- a/cpus.c -+++ b/cpus.c -@@ -1066,34 +1066,6 @@ void cpu_synchronize_all_pre_loadvm(void) - } - } - --#ifdef __aarch64__ --static bool kvm_adjvtime_enabled(CPUState *cs) --{ -- ARMCPU *cpu = ARM_CPU(cs); -- return cpu->kvm_adjvtime == true; --} -- --static void get_vcpu_timer_tick(CPUState *cs) --{ -- CPUARMState *env = &ARM_CPU(cs)->env; -- int err; -- struct kvm_one_reg reg; -- uint64_t timer_tick; -- -- reg.id = KVM_REG_ARM_TIMER_CNT; -- reg.addr = (uintptr_t) &timer_tick; -- -- err = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); -- if (err < 0) { -- error_report("get vcpu tick failed, ret = %d", err); -- env->vtimer = 0; -- return; -- } -- env->vtimer = timer_tick; -- return; --} --#endif -- - static int do_vm_stop(RunState state, bool send_stop) - { - int ret = 0; -@@ -1101,17 +1073,6 @@ static int do_vm_stop(RunState state, bool send_stop) - if (runstate_is_running()) { - cpu_disable_ticks(); - pause_all_vcpus(); --#ifdef __aarch64__ -- /* vtimer adjust is used in openEuler qemu-4.0.1, however kvm_adjvtime -- * is introduced in openEuler qemu-4.1.0. To maintain the compatibility -- * and enable cross version migration, let's enable vtimer adjust only -- * if kvm_adjvtime is not enabled, otherwise there may be conflicts -- * between vtimer adjust and kvm_adjvtime. -- */ -- if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { -- get_vcpu_timer_tick(first_cpu); -- } --#endif - runstate_set(state); - vm_state_notify(0, state); - if (send_stop) { -@@ -1957,46 +1918,11 @@ void cpu_resume(CPUState *cpu) - qemu_cpu_kick(cpu); - } - --#ifdef __aarch64__ -- --static void set_vcpu_timer_tick(CPUState *cs) --{ -- CPUARMState *env = &ARM_CPU(cs)->env; -- -- if (env->vtimer == 0) { -- return; -- } -- -- int err; -- struct kvm_one_reg reg; -- uint64_t timer_tick = env->vtimer; -- env->vtimer = 0; -- -- reg.id = KVM_REG_ARM_TIMER_CNT; -- reg.addr = (uintptr_t) &timer_tick; -- -- err = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); -- if (err < 0) { -- error_report("Set vcpu tick failed, ret = %d", err); -- return; -- } -- return; --} --#endif -- - void resume_all_vcpus(void) - { - CPUState *cpu; - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); --#ifdef __aarch64__ -- /* Enable vtimer adjust only if kvm_adjvtime is not enabled, otherwise -- * there may be conflicts between vtimer adjust and kvm_adjvtime. -- */ -- if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { -- set_vcpu_timer_tick(first_cpu); -- } --#endif - CPU_FOREACH(cpu) { - cpu_resume(cpu); - } -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index aec6a214..86eb79cd 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -262,8 +262,6 @@ typedef struct CPUARMState { - uint64_t sp_el[4]; /* AArch64 banked stack pointers */ - - -- uint64_t vtimer; /* Timer tick when vcpu stop */ -- - /* System control coprocessor (cp15) */ - struct { - uint32_t c0_cpuid; -diff --git a/target/arm/machine.c b/target/arm/machine.c -index ec28b839..ee3c59a6 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -814,7 +814,6 @@ const VMStateDescription vmstate_arm_cpu = { - VMSTATE_UINT32(env.exception.syndrome, ARMCPU), - VMSTATE_UINT32(env.exception.fsr, ARMCPU), - VMSTATE_UINT64(env.exception.vaddress, ARMCPU), -- VMSTATE_UINT64(env.vtimer, ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), - { --- -2.23.0 - diff --git a/vtimer-compat-cross-version-migration-from-v4.0.1.patch b/vtimer-compat-cross-version-migration-from-v4.0.1.patch deleted file mode 100644 index f452948fd29818c9551899e5044de1e3b33bc235..0000000000000000000000000000000000000000 --- a/vtimer-compat-cross-version-migration-from-v4.0.1.patch +++ /dev/null @@ -1,41 +0,0 @@ -From aec34c33730c36b34e4442548885463f57100e13 Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Fri, 8 May 2020 11:25:28 +0800 -Subject: [PATCH] vtimer: compat cross version migration from v4.0.1 - -vtimer feature was added to qemu v4.0.1 to record timer tick when vcpu -is stopped. However this feature is discared and the new virtual time -adjustment is introduced. - -This patch add the missing vtimer parameter to ARMCPUState in order -to compat cross version migration fromm v4.0.1 openEuler 2003 lts release. - -Singed-off-by: Ying Fang - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 219c222b..2609113d 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -261,6 +261,8 @@ typedef struct CPUARMState { - uint64_t elr_el[4]; /* AArch64 exception link regs */ - uint64_t sp_el[4]; /* AArch64 banked stack pointers */ - -+ uint64_t vtimer; /* Timer tick when vcpu is stopped */ -+ - /* System control coprocessor (cp15) */ - struct { - uint32_t c0_cpuid; -diff --git a/target/arm/machine.c b/target/arm/machine.c -index ee3c59a6..ec28b839 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -814,6 +814,7 @@ const VMStateDescription vmstate_arm_cpu = { - VMSTATE_UINT32(env.exception.syndrome, ARMCPU), - VMSTATE_UINT32(env.exception.fsr, ARMCPU), - VMSTATE_UINT64(env.exception.vaddress, ARMCPU), -+ VMSTATE_UINT64(env.vtimer, ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), - VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), - { --- -2.23.0 diff --git a/vvfat-Fix-bug-in-writing-to-middle-of-file.patch b/vvfat-Fix-bug-in-writing-to-middle-of-file.patch new file mode 100644 index 0000000000000000000000000000000000000000..c37caba7b2d8ed735339a4e893d4440886bb2865 --- /dev/null +++ b/vvfat-Fix-bug-in-writing-to-middle-of-file.patch @@ -0,0 +1,41 @@ +From db722158867b3b7541ed788b0a0f42a29a839ee4 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Fri, 16 Aug 2024 17:51:45 +0800 +Subject: [PATCH] vvfat: Fix bug in writing to middle of file + +cheery-pick from b881cf00c99e03bc8a3648581f97736ff275b18b + +Before this commit, the behavior when calling `commit_one_file` for +example with `offset=0x2000` (second cluster), what will happen is that +we won't fetch the next cluster from the fat, and instead use the first +cluster for the read operation. + +This is due to off-by-one error here, where `i=0x2000 !< offset=0x2000`, +thus not fetching the next cluster. + +Signed-off-by: Amjad Alsharafi +Reviewed-by: Kevin Wolf +Tested-by: Kevin Wolf +Message-ID: +Signed-off-by: Kevin Wolf +Signed-off-by: qihao_yewu +--- + block/vvfat.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/vvfat.c b/block/vvfat.c +index 9d050ba3ae..9010f3f33f 100644 +--- a/block/vvfat.c ++++ b/block/vvfat.c +@@ -2525,7 +2525,7 @@ commit_one_file(BDRVVVFATState* s, int dir_index, uint32_t offset) + return -1; + } + +- for (i = s->cluster_size; i < offset; i += s->cluster_size) ++ for (i = 0; i < offset; i += s->cluster_size) + c = modified_fat_get(s, c); + + fd = qemu_open_old(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666); +-- +2.41.0.windows.1 + diff --git a/x86-Intel-AVX512_BF16-feature-enabling.patch b/x86-Intel-AVX512_BF16-feature-enabling.patch deleted file mode 100644 index 175190f10c71a4670f32ab3d16a49fee127e1c29..0000000000000000000000000000000000000000 --- a/x86-Intel-AVX512_BF16-feature-enabling.patch +++ /dev/null @@ -1,179 +0,0 @@ -From e2fdc78f93d61be487c03a782aef6fdd8b26fa7e Mon Sep 17 00:00:00 2001 -From: Jing Liu -Date: Thu, 25 Jul 2019 14:14:16 +0800 -Subject: [PATCH] x86: Intel AVX512_BF16 feature enabling - -Intel CooperLake cpu adds AVX512_BF16 instruction, defining as -CPUID.(EAX=7,ECX=1):EAX[bit 05]. - -The patch adds a property for setting the subleaf of CPUID leaf 7 in -case that people would like to specify it. - -The release spec link as follows, -https://software.intel.com/sites/default/files/managed/c5/15/\ -architecture-instruction-set-extensions-programming-reference.pdf - -Signed-off-by: Jing Liu -Signed-off-by: Paolo Bonzini - -Signed-off-by: Jingyi Wang ---- - target/i386/cpu.c | 39 ++++++++++++++++++++++++++++++++++++++- - target/i386/cpu.h | 7 +++++++ - target/i386/kvm.c | 3 ++- - 3 files changed, 47 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 19751e37a7..1ade90c28b 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -770,6 +770,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, - /* CPUID_7_0_ECX_OSPKE is dynamic */ \ - CPUID_7_0_ECX_LA57) - #define TCG_7_0_EDX_FEATURES 0 -+#define TCG_7_1_EAX_FEATURES 0 - #define TCG_APM_FEATURES 0 - #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT - #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) -@@ -1095,6 +1096,25 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - }, - .tcg_features = TCG_7_0_EDX_FEATURES, - }, -+ [FEAT_7_1_EAX] = { -+ .type = CPUID_FEATURE_WORD, -+ .feat_names = { -+ NULL, NULL, NULL, NULL, -+ NULL, "avx512-bf16", NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .cpuid = { -+ .eax = 7, -+ .needs_ecx = true, .ecx = 1, -+ .reg = R_EAX, -+ }, -+ .tcg_features = TCG_7_1_EAX_FEATURES, -+ }, - [FEAT_8000_0007_EDX] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { -@@ -4292,13 +4312,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - case 7: - /* Structured Extended Feature Flags Enumeration Leaf */ - if (count == 0) { -- *eax = 0; /* Maximum ECX value for sub-leaves */ -+ /* Maximum ECX value for sub-leaves */ -+ *eax = env->cpuid_level_func7; - *ebx = env->features[FEAT_7_0_EBX]; /* Feature flags */ - *ecx = env->features[FEAT_7_0_ECX]; /* Feature flags */ - if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) { - *ecx |= CPUID_7_0_ECX_OSPKE; - } - *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ -+ } else if (count == 1) { -+ *eax = env->features[FEAT_7_1_EAX]; -+ *ebx = 0; -+ *ecx = 0; -+ *edx = 0; - } else { - *eax = 0; - *ebx = 0; -@@ -4948,6 +4974,11 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w) - x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel2, eax); - break; - } -+ -+ if (eax == 7) { -+ x86_cpu_adjust_level(cpu, &env->cpuid_min_level_func7, -+ fi->cpuid.ecx); -+ } - } - - /* Calculate XSAVE components based on the configured CPU feature flags */ -@@ -5066,6 +5097,7 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - x86_cpu_adjust_feat_level(cpu, FEAT_1_ECX); - x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); - x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); -+ x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); - x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); - x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_ECX); - x86_cpu_adjust_feat_level(cpu, FEAT_8000_0007_EDX); -@@ -5097,6 +5129,9 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - } - - /* Set cpuid_*level* based on cpuid_min_*level, if not explicitly set */ -+ if (env->cpuid_level_func7 == UINT32_MAX) { -+ env->cpuid_level_func7 = env->cpuid_min_level_func7; -+ } - if (env->cpuid_level == UINT32_MAX) { - env->cpuid_level = env->cpuid_min_level; - } -@@ -5868,6 +5903,8 @@ static Property x86_cpu_properties[] = { - DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), - DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), - DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), -+ DEFINE_PROP_UINT32("level-func7", X86CPU, env.cpuid_level_func7, -+ UINT32_MAX), - DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), - DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX), - DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, UINT32_MAX), -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 8b3dc5533e..488b4dc778 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -479,6 +479,7 @@ typedef enum FeatureWord { - FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */ - FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */ - FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */ -+ FEAT_7_1_EAX, /* CPUID[EAX=7,ECX=1].EAX */ - FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ - FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ - FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ -@@ -692,6 +693,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_7_0_EDX_CORE_CAPABILITY (1U << 30) /*Core Capability*/ - #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ - -+#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) /* AVX512 BFloat16 Instruction */ -+ - #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) /* Write back and - do not invalidate cache */ - #define CPUID_8000_0008_EBX_IBPB (1U << 12) /* Indirect Branch Prediction Barrier */ -@@ -1322,6 +1325,10 @@ typedef struct CPUX86State { - /* Fields after this point are preserved across CPU reset. */ - - /* processor features (e.g. for CPUID insn) */ -+ /* Minimum cpuid leaf 7 value */ -+ uint32_t cpuid_level_func7; -+ /* Actual cpuid leaf 7 value */ -+ uint32_t cpuid_min_level_func7; - /* Minimum level/xlevel/xlevel2, based on CPU model + features */ - uint32_t cpuid_min_level, cpuid_min_xlevel, cpuid_min_xlevel2; - /* Maximum level/xlevel/xlevel2 value for auto-assignment: */ -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index dbbb13772a..f55d4b4b97 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -1497,6 +1497,7 @@ int kvm_arch_init_vcpu(CPUState *cs) - c = &cpuid_data.entries[cpuid_i++]; - } - break; -+ case 0x7: - case 0x14: { - uint32_t times; - -@@ -1509,7 +1510,7 @@ int kvm_arch_init_vcpu(CPUState *cs) - for (j = 1; j <= times; ++j) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " -- "cpuid(eax:0x14,ecx:0x%x)\n", j); -+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; --- -2.27.0 - diff --git a/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch b/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch deleted file mode 100644 index fc17f48b7a395bafffaf7ef9763d04bff110af0a..0000000000000000000000000000000000000000 --- a/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 725dfa851f8e1de8653f41a4bd38c7f98757eb40 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 2 Sep 2019 08:02:22 -0400 -Subject: [PATCH] x86: do not advertise die-id in query-hotpluggbale-cpus if - '-smp dies' is not set - -Commit 176d2cda0 (i386/cpu: Consolidate die-id validity in smp context) added -new 'die-id' topology property to CPUs and exposed it via QMP command -query-hotpluggable-cpus, which broke -device/device_add cpu-foo for existing -users that do not support die-id/dies yet. That's would be fine if it happened -to new machine type only but it also happened to old machine types, -which breaks migration from old QEMU to the new one, for example following CLI: - - OLD-QEMU -M pc-i440fx-4.0 -smp 1,max_cpus=2 \ - -device qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id -is not able to start with new QEMU, complaining about invalid die-id. - -After discovering regression, the patch - "pc: Don't make die-id mandatory unless necessary" -makes die-id optional so old CLI would work. - -However it's not enough as new QEMU still exposes die-id via query-hotpluggbale-cpus -QMP command, so the users that started old machine type on new QEMU, using all -properties (including die-id) received from QMP command (as required), won't be -able to start old QEMU using the same properties since it doesn't support die-id. - -Fix it by hiding die-id in query-hotpluggbale-cpus for all machine types in case -'-smp dies' is not provided on CLI or -smp dies = 1', in which case smp_dies == 1 -and APIC ID is calculated in default way (as it was before DIE support) so we won't -need compat code as in both cases the topology provided to guest via CPUID is the same. - -Signed-off-by: Igor Mammedov -Message-Id: <20190902120222.6179-1-imammedo@redhat.com> -Reviewed-by: Eduardo Habkost -Signed-off-by: Eduardo Habkost -(cherry picked from commit c6c1bb89fb46f3b88f832e654cf5a6f7941aac51) -Signed-off-by: Michael Roth ---- - hw/i386/pc.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 947f81070f..d011733ff7 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2887,8 +2887,10 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) - ms->smp.threads, &topo); - ms->possible_cpus->cpus[i].props.has_socket_id = true; - ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; -- ms->possible_cpus->cpus[i].props.has_die_id = true; -- ms->possible_cpus->cpus[i].props.die_id = topo.die_id; -+ if (pcms->smp_dies > 1) { -+ ms->possible_cpus->cpus[i].props.has_die_id = true; -+ ms->possible_cpus->cpus[i].props.die_id = topo.die_id; -+ } - ms->possible_cpus->cpus[i].props.has_core_id = true; - ms->possible_cpus->cpus[i].props.core_id = topo.core_id; - ms->possible_cpus->cpus[i].props.has_thread_id = true; --- -2.23.0 diff --git a/xhci-Fix-memory-leak-in-xhci_address_slot.patch b/xhci-Fix-memory-leak-in-xhci_address_slot.patch deleted file mode 100644 index 1d0f858f3e6ea2975aff0a7e89a155083a9695b7..0000000000000000000000000000000000000000 --- a/xhci-Fix-memory-leak-in-xhci_address_slot.patch +++ /dev/null @@ -1,47 +0,0 @@ -From c0de0a04d03183f524c2f60cda8ae1e886197a7d Mon Sep 17 00:00:00 2001 -From: Ying Fang -Date: Tue, 27 Aug 2019 10:54:48 +0800 -Subject: [PATCH] xhci: Fix memory leak in xhci_address_slot - -Address Sanitizer shows memory leak in xhci_address_slot -hw/usb/hcd-xhci.c:2156 and the stack is as bellow: - -Direct leak of 64 byte(s) in 4 object(s) allocated from: - #0 0xffff91c6f5ab in realloc (/lib64/libasan.so.4+0xd35ab) - #1 0xffff91987243 in g_realloc (/lib64/libglib-2.0.so.0+0x57243) - #2 0xaaaab0b26a1f in qemu_iovec_add util/iov.c:296 - #3 0xaaaab07e5ce3 in xhci_address_slot hw/usb/hcd-xhci.c:2156 - #4 0xaaaab07e5ce3 in xhci_process_commands hw/usb/hcd-xhci.c:2493 - #5 0xaaaab00058d7 in memory_region_write_accessor qemu/memory.c:507 - #6 0xaaaab0000d87 in access_with_adjusted_size memory.c:573 - #7 0xaaaab000abcf in memory_region_dispatch_write memory.c:1516 - #8 0xaaaaaff59947 in flatview_write_continue exec.c:3367 - #9 0xaaaaaff59c33 in flatview_write exec.c:3406 - #10 0xaaaaaff63b3b in address_space_write exec.c:3496 - #11 0xaaaab002f263 in kvm_cpu_exec accel/kvm/kvm-all.c:2288 - #12 0xaaaaaffee427 in qemu_kvm_cpu_thread_fn cpus.c:1290 - #13 0xaaaab0b1a943 in qemu_thread_start util/qemu-thread-posix.c:502 - #14 0xffff908ce8bb in start_thread (/lib64/libpthread.so.0+0x78bb) - #15 0xffff908165cb in thread_start (/lib64/libc.so.6+0xd55cb) - -Cc: zhanghailiang -Signed-off-by: Ying Fang ---- - hw/usb/hcd-xhci.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index f578264948..471759cd4c 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -2161,6 +2161,7 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid, - DeviceOutRequest | USB_REQ_SET_ADDRESS, - slotid, 0, 0, NULL); - assert(p.status != USB_RET_ASYNC); -+ usb_packet_cleanup(&p); - } - - res = xhci_enable_ep(xhci, slotid, 1, octx+32, ep0_ctx); --- -2.19.1 - diff --git a/xhci-Fix-memory-leak-in-xhci_kick_epctx-when-poweroff.patch b/xhci-Fix-memory-leak-in-xhci_kick_epctx-when-poweroff.patch deleted file mode 100644 index 7d226e65cdc2e6e6e9c19e4791567246e1851787..0000000000000000000000000000000000000000 --- a/xhci-Fix-memory-leak-in-xhci_kick_epctx-when-poweroff.patch +++ /dev/null @@ -1,56 +0,0 @@ -From cf859d444770243fa184019fd4eab135b2653390 Mon Sep 17 00:00:00 2001 -From: Chen Qun -Date: Fri, 10 Jan 2020 18:36:24 +0800 -Subject: [PATCH] xhci: Fix memory leak in xhci_kick_epctx when poweroff - GuestOS -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -start vm with libvirt, when GuestOS running, enter poweroff command using -the xhci keyboard, then ASAN shows memory leak stack: - -Direct leak of 80 byte(s) in 5 object(s) allocated from: - #0 0xfffd1e6431cb in __interceptor_malloc (/lib64/libasan.so.4+0xd31cb) - #1 0xfffd1e107163 in g_malloc (/lib64/libglib-2.0.so.0+0x57163) - #2 0xaaad39051367 in qemu_sglist_init /qemu/dma-helpers.c:43 - #3 0xaaad3947c407 in pci_dma_sglist_init /qemu/include/hw/pci/pci.h:842 - #4 0xaaad3947c407 in xhci_xfer_create_sgl /qemu/hw/usb/hcd-xhci.c:1446 - #5 0xaaad3947c407 in xhci_setup_packet /qemu/hw/usb/hcd-xhci.c:1618 - #6 0xaaad3948625f in xhci_submit /qemu/hw/usb/hcd-xhci.c:1827 - #7 0xaaad3948625f in xhci_fire_transfer /qemu/hw/usb/hcd-xhci.c:1839 - #8 0xaaad3948625f in xhci_kick_epctx /qemu/hw/usb/hcd-xhci.c:1991 - #9 0xaaad3948f537 in xhci_doorbell_write /qemu/hw/usb/hcd-xhci.c:3158 - #10 0xaaad38bcbfc7 in memory_region_write_accessor /qemu/memory.c:483 - #11 0xaaad38bc654f in access_with_adjusted_size /qemu/memory.c:544 - #12 0xaaad38bd1877 in memory_region_dispatch_write /qemu/memory.c:1482 - #13 0xaaad38b1c77f in flatview_write_continue /qemu/exec.c:3167 - #14 0xaaad38b1ca83 in flatview_write /qemu/exec.c:3207 - #15 0xaaad38b268db in address_space_write /qemu/exec.c:3297 - #16 0xaaad38bf909b in kvm_cpu_exec /qemu/accel/kvm/kvm-all.c:2383 - #17 0xaaad38bb063f in qemu_kvm_cpu_thread_fn /qemu/cpus.c:1246 - #18 0xaaad39821c93 in qemu_thread_start /qemu/util/qemu-thread-posix.c:519 - #19 0xfffd1c8378bb (/lib64/libpthread.so.0+0x78bb) - #20 0xfffd1c77616b (/lib64/libc.so.6+0xd616b) - -Reported-by: Euler Robot -Signed-off-by: Chen Qun ---- - hw/usb/hcd-xhci.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 80988bb305..0d3d96d05a 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -2000,6 +2000,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - if (xfer != NULL && xfer->running_retry) { - DPRINTF("xhci: xfer nacked, stopping schedule\n"); - epctx->retry = xfer; -+ xhci_xfer_unmap(xfer); - break; - } - if (count++ > TRANSFER_LIMIT) { --- -2.23.0 - diff --git a/xhci-Fix-memory-leak-in-xhci_kick_epctx.patch b/xhci-Fix-memory-leak-in-xhci_kick_epctx.patch deleted file mode 100644 index 398b1bcdd6e784cf1ecd87f41d0f1066d2780c6c..0000000000000000000000000000000000000000 --- a/xhci-Fix-memory-leak-in-xhci_kick_epctx.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 2212f37e0e477d8da0cff02cfc8b7a921ca11bef Mon Sep 17 00:00:00 2001 -From: fangying -Date: Wed, 28 Aug 2019 14:02:22 +0800 -Subject: [PATCH] xhci: Fix memory leak in xhci_kick_epctx - -Address Sanitizer shows memory leak in xhci_kick_epctx hw/usb/hcd-xhci.c:1912. -A sglist is leaked when a packet is retired and returns USB_RET_NAK status. -The leak stack is as bellow: - -Direct leak of 2688 byte(s) in 168 object(s) allocated from: - #0 0xffffae8b11db in __interceptor_malloc (/lib64/libasan.so.4+0xd31db) - #1 0xffffae5c9163 in g_malloc (/lib64/libglib-2.0.so.0+0x57163) - #2 0xaaaabb6fb3f7 in qemu_sglist_init dma-helpers.c:43 - #3 0xaaaabba705a7 in pci_dma_sglist_init include/hw/pci/pci.h:837 - #4 0xaaaabba705a7 in xhci_xfer_create_sgl hw/usb/hcd-xhci.c:1443 - #5 0xaaaabba705a7 in xhci_setup_packet hw/usb/hcd-xhci.c:1615 - #6 0xaaaabba77a6f in xhci_kick_epctx hw/usb/hcd-xhci.c:1912 - #7 0xaaaabbdaad27 in timerlist_run_timers util/qemu-timer.c:592 - #8 0xaaaabbdab19f in qemu_clock_run_timers util/qemu-timer.c:606 - #9 0xaaaabbdab19f in qemu_clock_run_all_timers util/qemu-timer.c:692 - #10 0xaaaabbdab9a3 in main_loop_wait util/main-loop.c:524 - #11 0xaaaabb6ff5e7 in main_loop vl.c:1806 - #12 0xaaaabb1e1453 in main vl.c:4488 - -Signed-off-by: Ying Fang ---- - hw/usb/hcd-xhci.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 6e1ec786..e10fbd3d 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -1911,6 +1911,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - } - usb_handle_packet(xfer->packet.ep->dev, &xfer->packet); - if (xfer->packet.status == USB_RET_NAK) { -+ xhci_xfer_unmap(xfer); - return; - } - xhci_try_complete_packet(xfer); --- -2.19.1 - diff --git a/xhci-fix-valid.max_access_size-to-access-address-reg.patch b/xhci-fix-valid.max_access_size-to-access-address-reg.patch deleted file mode 100644 index 466cbf2667efaf26cc65c160c8223659abb0c288..0000000000000000000000000000000000000000 --- a/xhci-fix-valid.max_access_size-to-access-address-reg.patch +++ /dev/null @@ -1,62 +0,0 @@ -From a71d1847aa780b3c4062e582ab400a7fea0413b3 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 21 Jul 2020 10:33:22 +0200 -Subject: [PATCH 01/11] xhci: fix valid.max_access_size to access address - registers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -QEMU XHCI advertises AC64 (64-bit addressing) but doesn't allow -64-bit mode access in "runtime" and "operational" MemoryRegionOps. - -Set the max_access_size based on sizeof(dma_addr_t) as AC64 is set. - -XHCI specs: -"If the xHC supports 64-bit addressing (AC64 = ‘1’), then software -should write 64-bit registers using only Qword accesses. If a -system is incapable of issuing Qword accesses, then writes to the -64-bit address fields shall be performed using 2 Dword accesses; -low Dword-first, high-Dword second. If the xHC supports 32-bit -addressing (AC64 = ‘0’), then the high Dword of registers containing -64-bit address fields are unused and software should write addresses -using only Dword accesses" - -The problem has been detected with SLOF, as linux kernel always accesses -registers using 32-bit access even if AC64 is set and revealed by -5d971f9e6725 ("memory: Revert "memory: accept mismatching sizes in memory_region_access_valid"") - -Suggested-by: Alexey Kardashevskiy -Signed-off-by: Laurent Vivier -Message-id: 20200721083322.90651-1-lvivier@redhat.com -Signed-off-by: Gerd Hoffmann -Signed-off-by: BiaoXiang Ye ---- - hw/usb/hcd-xhci.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index a21485fe..24565de1 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -3171,7 +3171,7 @@ static const MemoryRegionOps xhci_oper_ops = { - .read = xhci_oper_read, - .write = xhci_oper_write, - .valid.min_access_size = 4, -- .valid.max_access_size = 4, -+ .valid.max_access_size = sizeof(dma_addr_t), - .endianness = DEVICE_LITTLE_ENDIAN, - }; - -@@ -3187,7 +3187,7 @@ static const MemoryRegionOps xhci_runtime_ops = { - .read = xhci_runtime_read, - .write = xhci_runtime_write, - .valid.min_access_size = 4, -- .valid.max_access_size = 4, -+ .valid.max_access_size = sizeof(dma_addr_t), - .endianness = DEVICE_LITTLE_ENDIAN, - }; - --- -2.27.0.dirty - diff --git a/xhci-recheck-slot-status.patch b/xhci-recheck-slot-status.patch deleted file mode 100644 index d05c3c8344802c788827334b2f48693ec4b72edb..0000000000000000000000000000000000000000 --- a/xhci-recheck-slot-status.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 33d6a2bc0e432a85962b71bcb2c3b5eec39bf436 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Tue, 7 Jan 2020 09:36:06 +0100 -Subject: [PATCH] xhci: recheck slot status -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Factor out slot status check into a helper function. Add an additional -check after completing transfers. This is needed in case a guest -queues multiple transfers in a row and a device unplug happens while -qemu processes them. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 -Signed-off-by: Gerd Hoffmann -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20200107083606.12393-1-kraxel@redhat.com ---- - hw/usb/hcd-xhci.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 24565de1d1..4b42f53b9c 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -1860,6 +1860,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, - xhci_kick_epctx(epctx, streamid); - } - -+static bool xhci_slot_ok(XHCIState *xhci, int slotid) -+{ -+ return (xhci->slots[slotid - 1].uport && -+ xhci->slots[slotid - 1].uport->dev && -+ xhci->slots[slotid - 1].uport->dev->attached); -+} -+ - static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - { - XHCIState *xhci = epctx->xhci; -@@ -1877,9 +1884,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - - /* If the device has been detached, but the guest has not noticed this - yet the 2 above checks will succeed, but we must NOT continue */ -- if (!xhci->slots[epctx->slotid - 1].uport || -- !xhci->slots[epctx->slotid - 1].uport->dev || -- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { - return; - } - -@@ -1986,6 +1991,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - } else { - xhci_fire_transfer(xhci, xfer, epctx); - } -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { -+ /* surprise removal -> stop processing */ -+ break; -+ } - if (xfer->complete) { - /* update ring dequeue ptr */ - xhci_set_ep_state(xhci, epctx, stctx, epctx->state); --- -2.27.0 - diff --git a/xics-Don-t-deassert-outputs.patch b/xics-Don-t-deassert-outputs.patch deleted file mode 100644 index 083a9a2e885cd2b4c2d8fe701c2ad037b5bece00..0000000000000000000000000000000000000000 --- a/xics-Don-t-deassert-outputs.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 5b137b37ef7c4941200798cca99200e80ef17a01 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Wed, 4 Dec 2019 20:43:43 +0100 -Subject: [PATCH] xics: Don't deassert outputs - -The correct way to do this is to deassert the input pins on the CPU side. -This is the case since a previous change. - -Signed-off-by: Greg Kurz -Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> -Signed-off-by: David Gibson ---- - hw/intc/xics.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/intc/xics.c b/hw/intc/xics.c -index faa976e2f8..d2d377fc85 100644 ---- a/hw/intc/xics.c -+++ b/hw/intc/xics.c -@@ -303,9 +303,6 @@ static void icp_reset_handler(void *dev) - icp->pending_priority = 0xff; - icp->mfrr = 0xff; - -- /* Make all outputs are deasserted */ -- qemu_set_irq(icp->output, 0); -- - if (kvm_irqchip_in_kernel()) { - Error *local_err = NULL; - --- -2.27.0 -